from urllib.request import urlopen
s = urlopen('http://igm.univ-mlv.fr/~jyt/M1_python').read()


print(s)

b'<html>\n\t<body>\n\t\t<H1> M1 Informatique : Python </H1>\n\t\t<ul>\n\t\t\t<li> Commencer ici : <a href="http://www.python.org/">python.org</a>\n\t\t\t<li> La <a href="http://docs.python.org/3.8">documentation</a> officielle\n\t\t\t<li> Le <a href="http://www.pythonchallenge.com/"> Python Challenge</a>\n\t\t\t<li> <a href="https://python.developpez.com/cours/DiveIntoPython/"><i>Dive into Python</i></a>  \n\t\t\tpar <a href="http://en.wikipedia.org/wiki/Mark_Pilgrim_%28software_developer%29">Mark Pilgrim</a> \n\t\t\t(un livre recommand&eacute; mais ancien);\n\t\t\t<a href="http://diveintopython3.problemsolving.io/">version Python 3</a>. \n\t\t\t<li> La <a href="http://fr.wikipedia.org/wiki/Python_%28langage%29">page</a> de Wikip&eacute;dia contient un bon r&eacute;sum&eacute;\n\t\t\t<li> Introduction au <i>notebook</i> ipython ou jupyter :)\n\t\t\t<ul> \n\t\t\t\t<li> <a href="intro_jupyter.html">Exemple</a>  de ce qu\'on peut obtenir (converti en html)\n\t\t\t\t<li> Le <a href="intro_jupyter.ipynb">code source</a> (fichier <tt>.ipynb</tt>), &agrave; t&eacute;l&eacute;charger\n\t\t\t\t<li> Lancer un terminal, aller dans le dossier contenant le fichier <tt>.ipynb</tt>, et entrer la commande <tt>jupyter notebook</tt>\n\t\t\t</ul>\n\t\t\t<li> Cours 1 (22/9/21) <a href="python3_M1_2021-1.html">html</a> - <a href="python3_M1_2021-1.ipynb">notebook</a>\n\t\t\t<li> <a href="td1_python3_2020.html"> TD 1</a> (22/9/21)\n\t\t\t<li> <a href="script3.py">mod&egrave;le de script</a> avec options et documentation\n\t\t\t<li> <a href="td1_sol_3_2021.html">Corrig&eacute</a> du TD 1\n\t\t\t<li> Cours 2 (29/09/2021) <a href="Python3_M1_2021-2.html">html</a> - <a href="Python3_M1_2021-2.ipynb">notebook</a>\n\t\t\t<li> <a href="td2_2021_texte.html">TD 2</a> (29/09/2021)\n\t\t\t<li> <a href="td2_solpy3_2021.html">Corrig&eacute</a> du TD 2\n\t\t\t<li> Cours 3 (6/10/2021) <a href="python3_M1_2021-3.html">html</a> - <a href="python3_M1_2021-3.ipynb">notebook</a>\n\t\t\t<li> <a href="td3_2021_texte.html">TD 3</a> (6/10/2021)\n\n\n\n\t\t\n\n\n\t\t\t\n\n\t\t</ul>\n\t</body>\n</html>\n\n\t\t\n'


print(s.decode('ascii'))

<html>
	<body>
		<H1> M1 Informatique : Python </H1>
		<ul>
			<li> Commencer ici : <a href="http://www.python.org/">python.org</a>
			<li> La <a href="http://docs.python.org/3.8">documentation</a> officielle
			<li> Le <a href="http://www.pythonchallenge.com/"> Python Challenge</a>
			<li> <a href="https://python.developpez.com/cours/DiveIntoPython/"><i>Dive into Python</i></a>  
			par <a href="http://en.wikipedia.org/wiki/Mark_Pilgrim_%28software_developer%29">Mark Pilgrim</a> 
			(un livre recommand&eacute; mais ancien);
			<a href="http://diveintopython3.problemsolving.io/">version Python 3</a>. 
			<li> La <a href="http://fr.wikipedia.org/wiki/Python_%28langage%29">page</a> de Wikip&eacute;dia contient un bon r&eacute;sum&eacute;
			<li> Introduction au <i>notebook</i> ipython ou jupyter :)
			<ul> 
				<li> <a href="intro_jupyter.html">Exemple</a>  de ce qu'on peut obtenir (converti en html)
				<li> Le <a href="intro_jupyter.ipynb">code source</a> (fichier <tt>.ipynb</tt>), &agrave; t&eacute;l&eacute;charger
				<li> Lancer un terminal, aller dans le dossier contenant le fichier <tt>.ipynb</tt>, et entrer la commande <tt>jupyter notebook</tt>
			</ul>
			<li> Cours 1 (22/9/21) <a href="python3_M1_2021-1.html">html</a> - <a href="python3_M1_2021-1.ipynb">notebook</a>
			<li> <a href="td1_python3_2020.html"> TD 1</a> (22/9/21)
			<li> <a href="script3.py">mod&egrave;le de script</a> avec options et documentation
			<li> <a href="td1_sol_3_2021.html">Corrig&eacute</a> du TD 1
			<li> Cours 2 (29/09/2021) <a href="Python3_M1_2021-2.html">html</a> - <a href="Python3_M1_2021-2.ipynb">notebook</a>
			<li> <a href="td2_2021_texte.html">TD 2</a> (29/09/2021)
			<li> <a href="td2_solpy3_2021.html">Corrig&eacute</a> du TD 2
			<li> Cours 3 (6/10/2021) <a href="python3_M1_2021-3.html">html</a> - <a href="python3_M1_2021-3.ipynb">notebook</a>
			<li> <a href="td3_2021_texte.html">TD 3</a> (6/10/2021)


		</ul>
	</body>
</html>


f = urlopen('http://igm.univ-mlv.fr/~jyt/M1_python')


print(dir(f))

['__abstractmethods__', '__class__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '_abc_impl', '_checkClosed', '_checkReadable', '_checkSeekable', '_checkWritable', '_check_close', '_close_conn', '_get_chunk_left', '_method', '_peek_chunked', '_read1_chunked', '_read_and_discard_trailer', '_read_next_chunk_size', '_read_status', '_readall_chunked', '_readinto_chunked', '_safe_read', '_safe_readinto', 'begin', 'chunk_left', 'chunked', 'close', 'closed', 'code', 'debuglevel', 'detach', 'fileno', 'flush', 'fp', 'getcode', 'getheader', 'getheaders', 'geturl', 'headers', 'info', 'isatty', 'isclosed', 'length', 'msg', 'peek', 'read', 'read1', 'readable', 'readinto', 'readinto1', 'readline', 'readlines', 'reason', 'seek', 'seekable', 'status', 'tell', 'truncate', 'url', 'version', 'will_close', 'writable', 'write', 'writelines']


f.code

200


f.getheaders()

[('Date', 'Sun, 10 Oct 2021 09:05:33 GMT'),
 ('Server', 'Apache'),
 ('Last-Modified', 'Wed, 06 Oct 2021 06:00:47 GMT'),
 ('ETag', '"1760011-7b3-5cda8de0ac1c0"'),
 ('Accept-Ranges', 'bytes'),
 ('Content-Length', '1971'),
 ('Connection', 'close'),
 ('Content-Type', 'text/html')]


f.geturl()

'http://igm.univ-mlv.fr/~jyt/M1_python/'


url = 'http://igm.univ-mlv.fr/~jyt/M1_python'
t = open('etag_python').read()
d = urlopen(url).info()
s = d['etag']
print (d['last-modified'])
if s != t :
    print ("La page du cours de Python a été modifiée")
    open('etag_python','w').write(s)
else: print ("Aucune modification")

Wed, 06 Oct 2021 06:00:47 GMT
La page du cours de Python a été modifiée


from urllib.parse import urlencode
url='http://oeis.org'
query={'q':'1,1,3,11,49,257','language':'english', 'go':'Search'}
data = urlencode(query)

data

'q=1%2C1%2C3%2C11%2C49%2C257&language=english&go=Search'


s = urlopen(url,data.encode('ascii')).read()


s[:300]

b'\n<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">\n<html>\n  \n  <head>\n  <style>\n  tt { font-family: monospace; font-size: 100%; }\n  p.editing { font-family: monospace; margin: 10px; text-indent: -10px; word-wrap:break-word;}\n  p { word-wrap: break-word; }\n  </style>\n  <meta http-equiv="content'


from urllib.parse import unquote, urlsplit, urlunsplit


unquote(data)

'q=1,1,3,11,49,257&language=english&go=Search'


x='http://www.google.fr/search?as_q=python&hl=fr&num=10&btnG=Recherche+Google&as_epq=&as_oq=&as_eq=&lr=&cr=&as_ft=i&as_filetype=pdf&as_qdr=all&as_occt=any&as_dt=i&as_sitesearch=univ-mlv.fr&as_rights=&safe=images'

y = urlsplit(x)
y

SplitResult(scheme='http', netloc='www.google.fr', path='/search', query='as_q=python&hl=fr&num=10&btnG=Recherche+Google&as_epq=&as_oq=&as_eq=&lr=&cr=&as_ft=i&as_filetype=pdf&as_qdr=all&as_occt=any&as_dt=i&as_sitesearch=univ-mlv.fr&as_rights=&safe=images', fragment='')


y.netloc

'www.google.fr'


y.path

'/search'


y.query

'as_q=python&hl=fr&num=10&btnG=Recherche+Google&as_epq=&as_oq=&as_eq=&lr=&cr=&as_ft=i&as_filetype=pdf&as_qdr=all&as_occt=any&as_dt=i&as_sitesearch=univ-mlv.fr&as_rights=&safe=images'


urlunsplit(y)

'http://www.google.fr/search?as_q=python&hl=fr&num=10&btnG=Recherche+Google&as_epq=&as_oq=&as_eq=&lr=&cr=&as_ft=i&as_filetype=pdf&as_qdr=all&as_occt=any&as_dt=i&as_sitesearch=univ-mlv.fr&as_rights=&safe=images'


from urllib.request import Request 
url='http://oeis.org'
req = Request(url)
print(dir(req))

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_data', '_full_url', '_parse', '_tunnel_host', 'add_header', 'add_unredirected_header', 'data', 'fragment', 'full_url', 'get_full_url', 'get_header', 'get_method', 'has_header', 'has_proxy', 'header_items', 'headers', 'host', 'origin_req_host', 'remove_header', 'selector', 'set_proxy', 'type', 'unredirected_hdrs', 'unverifiable']


req.add_header('User-agent','Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')
req.headers

{'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}


req.add_header('Cookie', 'info=En-veux-tu%3F%20En%20voil%E0%21')
req.headers

{'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
 'Cookie': 'info=En-veux-tu%3F%20En%20voil%E0%21'}

import urllib.request
# Create an OpenerDirector with support for Basic HTTP Authentication...
auth_handler = urllib.request.HTTPBasicAuthHandler()
auth_handler.add_password(realm='PDQ Application',
                          uri='https://mahler:8092/site-updates.py',
                          user='klem',
                          passwd='kadidd!ehopper')
opener = urllib.request.build_opener(auth_handler)
# ...and install it globally so it can be used with urlopen.
urllib.request.install_opener(opener)
urllib.request.urlopen('http://www.example.com/login.html')


import requests


url = "http://igm.univ-mlv.fr/~jyt/secret/"


f = urlopen(url).read()

---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
<ipython-input-29-3ec16db46158> in <module>
----> 1 f = urlopen(url).read()

/usr/lib/python3.8/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    220     else:
    221         opener = _opener
--> 222     return opener.open(url, data, timeout)
    223 
    224 def install_opener(opener):

/usr/lib/python3.8/urllib/request.py in open(self, fullurl, data, timeout)
    529         for processor in self.process_response.get(protocol, []):
    530             meth = getattr(processor, meth_name)
--> 531             response = meth(req, response)
    532 
    533         return response

/usr/lib/python3.8/urllib/request.py in http_response(self, request, response)
    638         # request was successfully received, understood, and accepted.
    639         if not (200 <= code < 300):
--> 640             response = self.parent.error(
    641                 'http', request, response, code, msg, hdrs)
    642 

/usr/lib/python3.8/urllib/request.py in error(self, proto, *args)
    567         if http_err:
    568             args = (dict, 'default', 'http_error_default') + orig_args
--> 569             return self._call_chain(*args)
    570 
    571 # XXX probably also want an abstract factory that knows when it makes

/usr/lib/python3.8/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    500         for handler in handlers:
    501             func = getattr(handler, meth_name)
--> 502             result = func(*args)
    503             if result is not None:
    504                 return result

/usr/lib/python3.8/urllib/request.py in http_error_default(self, req, fp, code, msg, hdrs)
    647 class HTTPDefaultErrorHandler(BaseHandler):
    648     def http_error_default(self, req, fp, code, msg, hdrs):
--> 649         raise HTTPError(req.full_url, code, msg, hdrs, fp)
    650 
    651 class HTTPRedirectHandler(BaseHandler):

HTTPError: HTTP Error 401: Authorization Required


r = requests.get(url,auth=('jyt','toto'))


r.status_code

200


r.text

"<html>\n\nIl n'y a rien a voir ici ...\n</html>\n"


r.headers

{'Date': 'Sun, 10 Oct 2021 09:15:29 GMT', 'Server': 'Apache', 'Last-Modified': 'Sun, 14 Oct 2012 16:46:23 GMT', 'ETag': '"1b2e2c6-2d-4cc07a93c95c0"', 'Accept-Ranges': 'bytes', 'Content-Length': '45', 'Keep-Alive': 'timeout=15, max=100', 'Connection': 'Keep-Alive', 'Content-Type': 'text/html'}

import http.server
import socketserver

PORT = 8000

Handler = http.server.SimpleHTTPRequestHandler

with socketserver.TCPServer(("", PORT), Handler) as httpd:
    print("serving at port", PORT)
    httpd.serve_forever()
httpd.serve_forever()

#!/usr/bin/env python

import http.server

PORT = 8888
server_address = ("", PORT)

server = http.server.HTTPServer
handler = http.server.CGIHTTPRequestHandler
handler.cgi_directories = ["/"]
print("Serving on port:", PORT)

httpd = server(server_address, handler)
httpd.serve_forever()

# module touille.py
import random, re
p = re.compile('(\w)(\w\w+)(\w)', re.M)  # mot d'au moins 4 lettres

def touille(m):
    milieu = list(m.group(2)) # group(2) est le milieu
    random.shuffle(milieu)
    return m.group(1) + ''.join(milieu) + m.group(3)

def blurr(s):
    return p.sub(touille,s)


from touille import blurr
blurr('Il est plus facile de se laver les dents dans un verre à pied que de se laver les pieds dans un verre à dents.')

'Il est plus flacie de se laevr les dnets dans un verre à pied que de se lvear les pdies dans un vrree à dnets.'

class FuddDialectizer(Dialectizer):
    """convert HTML to Elmer Fudd-speak"""
    subs = ((r'[rl]', r'w'),
            (r'qu', r'qw'),
            (r'th\b', r'f'),
            (r'th', r'd'),
            (r'n[.]', r'n, uh-hah-hah-hah.'))


from html.parser import HTMLParser


class MyHTMLParser(HTMLParser):
    def handle_starttag(self, tag, attrs):
        print ("Balise ouvrante :", tag, attrs)

    def handle_endtag(self, tag):
        print ("Balise fermante :", tag)

    def handle_data(self, data):
        print ("Texte  :", repr(data))

# On créee une instance
parser = MyHTMLParser()

# Et on lui donne à manger

from urllib.request import urlopen
s = urlopen('http://igm.univ-mlv.fr/~jyt/M1_python/').read().decode('utf8')
parser.feed(s)

Balise ouvrante : html []
Texte  : '\n\t'
Balise ouvrante : body []
Texte  : '\n\t\t'
Balise ouvrante : h1 []
Texte  : ' M1 Informatique : Python '
Balise fermante : h1
Texte  : '\n\t\t'
Balise ouvrante : ul []
Texte  : '\n\t\t\t'
Balise ouvrante : li []
Texte  : ' Commencer ici : '
Balise ouvrante : a [('href', 'http://www.python.org/')]
Texte  : 'python.org'
Balise fermante : a
Texte  : '\n\t\t\t'
Balise ouvrante : li []
Texte  : ' La '
Balise ouvrante : a [('href', 'http://docs.python.org/3.8')]
Texte  : 'documentation'
Balise fermante : a
Texte  : ' officielle\n\t\t\t'
Balise ouvrante : li []
Texte  : ' Le '
Balise ouvrante : a [('href', 'http://www.pythonchallenge.com/')]
Texte  : ' Python Challenge'
Balise fermante : a
Texte  : '\n\t\t\t'
Balise ouvrante : li []
Texte  : ' '
Balise ouvrante : a [('href', 'https://python.developpez.com/cours/DiveIntoPython/')]
Balise ouvrante : i []
Texte  : 'Dive into Python'
Balise fermante : i
Balise fermante : a
Texte  : '  \n\t\t\tpar '
Balise ouvrante : a [('href', 'http://en.wikipedia.org/wiki/Mark_Pilgrim_%28software_developer%29')]
Texte  : 'Mark Pilgrim'
Balise fermante : a
Texte  : ' \n\t\t\t(un livre recommandé mais ancien);\n\t\t\t'
Balise ouvrante : a [('href', 'http://diveintopython3.problemsolving.io/')]
Texte  : 'version Python 3'
Balise fermante : a
Texte  : '. \n\t\t\t'
Balise ouvrante : li []
Texte  : ' La '
Balise ouvrante : a [('href', 'http://fr.wikipedia.org/wiki/Python_%28langage%29')]
Texte  : 'page'
Balise fermante : a
Texte  : ' de Wikipédia contient un bon résumé\n\t\t\t'
Balise ouvrante : li []
Texte  : ' Introduction au '
Balise ouvrante : i []
Texte  : 'notebook'
Balise fermante : i
Texte  : ' ipython ou jupyter :)\n\t\t\t'
Balise ouvrante : ul []
Texte  : ' \n\t\t\t\t'
Balise ouvrante : li []
Texte  : ' '
Balise ouvrante : a [('href', 'intro_jupyter.html')]
Texte  : 'Exemple'
Balise fermante : a
Texte  : "  de ce qu'on peut obtenir (converti en html)\n\t\t\t\t"
Balise ouvrante : li []
Texte  : ' Le '
Balise ouvrante : a [('href', 'intro_jupyter.ipynb')]
Texte  : 'code source'
Balise fermante : a
Texte  : ' (fichier '
Balise ouvrante : tt []
Texte  : '.ipynb'
Balise fermante : tt
Texte  : '), à télécharger\n\t\t\t\t'
Balise ouvrante : li []
Texte  : ' Lancer un terminal, aller dans le dossier contenant le fichier '
Balise ouvrante : tt []
Texte  : '.ipynb'
Balise fermante : tt
Texte  : ', et entrer la commande '
Balise ouvrante : tt []
Texte  : 'jupyter notebook'
Balise fermante : tt
Texte  : '\n\t\t\t'
Balise fermante : ul
Texte  : '\n\t\t\t'
Balise ouvrante : li []
Texte  : ' Cours 1 (22/9/21) '
Balise ouvrante : a [('href', 'python3_M1_2021-1.html')]
Texte  : 'html'
Balise fermante : a
Texte  : ' - '
Balise ouvrante : a [('href', 'python3_M1_2021-1.ipynb')]
Texte  : 'notebook'
Balise fermante : a
Texte  : '\n\t\t\t'
Balise ouvrante : li []
Texte  : ' '
Balise ouvrante : a [('href', 'td1_python3_2020.html')]
Texte  : ' TD 1'
Balise fermante : a
Texte  : ' (22/9/21)\n\t\t\t'
Balise ouvrante : li []
Texte  : ' '
Balise ouvrante : a [('href', 'script3.py')]
Texte  : 'modèle de script'
Balise fermante : a
Texte  : ' avec options et documentation\n\t\t\t'
Balise ouvrante : li []
Texte  : ' '
Balise ouvrante : a [('href', 'td1_sol_3_2021.html')]
Texte  : 'Corrigé'
Balise fermante : a
Texte  : ' du TD 1\n\t\t\t'
Balise ouvrante : li []
Texte  : ' Cours 2 (29/09/2021) '
Balise ouvrante : a [('href', 'Python3_M1_2021-2.html')]
Texte  : 'html'
Balise fermante : a
Texte  : ' - '
Balise ouvrante : a [('href', 'Python3_M1_2021-2.ipynb')]
Texte  : 'notebook'
Balise fermante : a
Texte  : '\n\t\t\t'
Balise ouvrante : li []
Texte  : ' '
Balise ouvrante : a [('href', 'td2_2021_texte.html')]
Texte  : 'TD 2'
Balise fermante : a
Texte  : ' (29/09/2021)\n\t\t\t'
Balise ouvrante : li []
Texte  : ' '
Balise ouvrante : a [('href', 'td2_solpy3_2021.html')]
Texte  : 'Corrigé'
Balise fermante : a
Texte  : ' du TD 2\n\t\t\t'
Balise ouvrante : li []
Texte  : ' Cours 3 (6/10/2021) '
Balise ouvrante : a [('href', 'python3_M1_2021-3.html')]
Texte  : 'html'
Balise fermante : a
Texte  : ' - '
Balise ouvrante : a [('href', 'python3_M1_2021-3.ipynb')]
Texte  : 'notebook'
Balise fermante : a
Texte  : '\n\t\t\t'
Balise ouvrante : li []
Texte  : ' '
Balise ouvrante : a [('href', 'td3_2021_texte.html')]
Texte  : 'TD 3'
Balise fermante : a
Texte  : ' (6/10/2021)\n\n\n\n\t\t\n\n\n\t\t\t\n\n\t\t'
Balise fermante : ul
Texte  : '\n\t'
Balise fermante : body
Texte  : '\n'
Balise fermante : html
Texte  : '\n\n\t\t\n'


from html.parser import HTMLParser

class URLLister(HTMLParser):
    def reset(self):                              
        HTMLParser.reset(self)
        self.urls = []

    def handle_starttag(self, tag, attrs):                     
        href = [v for k, v in attrs if k=='href'] # attrs est une liste de couples
        if href:
            self.urls.extend(href)


s = urlopen('http://igm.univ-mlv.fr/~jyt/M1_python').read().decode('utf8')
p = URLLister()
p.feed(s)
p.close()

for u in p.urls: print (u)

http://www.python.org/
http://docs.python.org/3.8
http://www.pythonchallenge.com/
https://python.developpez.com/cours/DiveIntoPython/
http://en.wikipedia.org/wiki/Mark_Pilgrim_%28software_developer%29
http://diveintopython3.problemsolving.io/
http://fr.wikipedia.org/wiki/Python_%28langage%29
intro_jupyter.html
intro_jupyter.ipynb
python3_M1_2021-1.html
python3_M1_2021-1.ipynb
td1_python3_2020.html
script3.py
td1_sol_3_2021.html
Python3_M1_2021-2.html
Python3_M1_2021-2.ipynb
td2_2021_texte.html
td2_solpy3_2021.html
python3_M1_2021-3.html
python3_M1_2021-3.ipynb
td3_2021_texte.html


from html.entities import entitydefs

class BaseHTMLProcessor(HTMLParser):
    def reset(self):                        
        self.pieces = []
        HTMLParser.reset(self)

    def handle_starttag(self, tag, attrs): 
        strattrs = "".join([' %s="%s"' % (key, value) 
                            for key, value in attrs])
        self.pieces.append("<%(tag)s%(strattrs)s>" % locals())

    def handle_endtag(self, tag):          
        self.pieces.append("</%(tag)s>" % locals())

    def handle_charref(self, ref):          
        self.pieces.append("&#%(ref)s;" % locals())

    def handle_entityref(self, ref):
        self.pieces.append("&%(ref)s" % locals())
        if htmlentitydefs.entitydefs.has_key(ref):
            self.pieces.append(";")

    def handle_data(self, text):  # A surcharger         
        self.pieces.append(text)

    def handle_comment(self, text):         
        self.pieces.append("<!--%(text)s-->" % locals())
        
    def handle_pi(self, text):              
        self.pieces.append("<?%(text)s>" % locals())

    def handle_decl(self, text):
        self.pieces.append("<!%(text)s>" % locals())

    def output(self):               
            """Return processed HTML as a single string"""
            return "".join(self.pieces)


b = BaseHTMLProcessor()
b.feed(s)


b.close()


print (b.pieces)

['<html>', '\n\t', '<body>', '\n\t\t', '<h1>', ' M1 Informatique : Python ', '</h1>', '\n\t\t', '<ul>', '\n\t\t\t', '<li>', ' Commencer ici : ', '<a href="http://www.python.org/">', 'python.org', '</a>', '\n\t\t\t', '<li>', ' La ', '<a href="http://docs.python.org/3.8">', 'documentation', '</a>', ' officielle\n\t\t\t', '<li>', ' Le ', '<a href="http://www.pythonchallenge.com/">', ' Python Challenge', '</a>', '\n\t\t\t', '<li>', ' ', '<a href="https://python.developpez.com/cours/DiveIntoPython/">', '<i>', 'Dive into Python', '</i>', '</a>', '  \n\t\t\tpar ', '<a href="http://en.wikipedia.org/wiki/Mark_Pilgrim_%28software_developer%29">', 'Mark Pilgrim', '</a>', ' \n\t\t\t(un livre recommandé mais ancien);\n\t\t\t', '<a href="http://diveintopython3.problemsolving.io/">', 'version Python 3', '</a>', '. \n\t\t\t', '<li>', ' La ', '<a href="http://fr.wikipedia.org/wiki/Python_%28langage%29">', 'page', '</a>', ' de Wikipédia contient un bon résumé\n\t\t\t', '<li>', ' Introduction au ', '<i>', 'notebook', '</i>', ' ipython ou jupyter :)\n\t\t\t', '<ul>', ' \n\t\t\t\t', '<li>', ' ', '<a href="intro_jupyter.html">', 'Exemple', '</a>', "  de ce qu'on peut obtenir (converti en html)\n\t\t\t\t", '<li>', ' Le ', '<a href="intro_jupyter.ipynb">', 'code source', '</a>', ' (fichier ', '<tt>', '.ipynb', '</tt>', '), à télécharger\n\t\t\t\t', '<li>', ' Lancer un terminal, aller dans le dossier contenant le fichier ', '<tt>', '.ipynb', '</tt>', ', et entrer la commande ', '<tt>', 'jupyter notebook', '</tt>', '\n\t\t\t', '</ul>', '\n\t\t\t', '<li>', ' Cours 1 (22/9/21) ', '<a href="python3_M1_2021-1.html">', 'html', '</a>', ' - ', '<a href="python3_M1_2021-1.ipynb">', 'notebook', '</a>', '\n\t\t\t', '<li>', ' ', '<a href="td1_python3_2020.html">', ' TD 1', '</a>', ' (22/9/21)\n\t\t\t', '<li>', ' ', '<a href="script3.py">', 'modèle de script', '</a>', ' avec options et documentation\n\t\t\t', '<li>', ' ', '<a href="td1_sol_3_2021.html">', 'Corrigé', '</a>', ' du TD 1\n\t\t\t', '<li>', ' Cours 2 (29/09/2021) ', '<a href="Python3_M1_2021-2.html">', 'html', '</a>', ' - ', '<a href="Python3_M1_2021-2.ipynb">', 'notebook', '</a>', '\n\t\t\t', '<li>', ' ', '<a href="td2_2021_texte.html">', 'TD 2', '</a>', ' (29/09/2021)\n\t\t\t', '<li>', ' ', '<a href="td2_solpy3_2021.html">', 'Corrigé', '</a>', ' du TD 2\n\t\t\t', '<li>', ' Cours 3 (6/10/2021) ', '<a href="python3_M1_2021-3.html">', 'html', '</a>', ' - ', '<a href="python3_M1_2021-3.ipynb">', 'notebook', '</a>', '\n\t\t\t', '<li>', ' ', '<a href="td3_2021_texte.html">', 'TD 3', '</a>', ' (6/10/2021)\n\n\n\n\t\t\n\n\n\t\t\t\n\n\t\t', '</ul>', '\n\t', '</body>', '\n', '</html>', '\n\n\t\t\n']


b.output()

'<html>\n\t<body>\n\t\t<h1> M1 Informatique : Python </h1>\n\t\t<ul>\n\t\t\t<li> Commencer ici : <a href="http://www.python.org/">python.org</a>\n\t\t\t<li> La <a href="http://docs.python.org/3.8">documentation</a> officielle\n\t\t\t<li> Le <a href="http://www.pythonchallenge.com/"> Python Challenge</a>\n\t\t\t<li> <a href="https://python.developpez.com/cours/DiveIntoPython/"><i>Dive into Python</i></a>  \n\t\t\tpar <a href="http://en.wikipedia.org/wiki/Mark_Pilgrim_%28software_developer%29">Mark Pilgrim</a> \n\t\t\t(un livre recommandé mais ancien);\n\t\t\t<a href="http://diveintopython3.problemsolving.io/">version Python 3</a>. \n\t\t\t<li> La <a href="http://fr.wikipedia.org/wiki/Python_%28langage%29">page</a> de Wikipédia contient un bon résumé\n\t\t\t<li> Introduction au <i>notebook</i> ipython ou jupyter :)\n\t\t\t<ul> \n\t\t\t\t<li> <a href="intro_jupyter.html">Exemple</a>  de ce qu\'on peut obtenir (converti en html)\n\t\t\t\t<li> Le <a href="intro_jupyter.ipynb">code source</a> (fichier <tt>.ipynb</tt>), à télécharger\n\t\t\t\t<li> Lancer un terminal, aller dans le dossier contenant le fichier <tt>.ipynb</tt>, et entrer la commande <tt>jupyter notebook</tt>\n\t\t\t</ul>\n\t\t\t<li> Cours 1 (22/9/21) <a href="python3_M1_2021-1.html">html</a> - <a href="python3_M1_2021-1.ipynb">notebook</a>\n\t\t\t<li> <a href="td1_python3_2020.html"> TD 1</a> (22/9/21)\n\t\t\t<li> <a href="script3.py">modèle de script</a> avec options et documentation\n\t\t\t<li> <a href="td1_sol_3_2021.html">Corrigé</a> du TD 1\n\t\t\t<li> Cours 2 (29/09/2021) <a href="Python3_M1_2021-2.html">html</a> - <a href="Python3_M1_2021-2.ipynb">notebook</a>\n\t\t\t<li> <a href="td2_2021_texte.html">TD 2</a> (29/09/2021)\n\t\t\t<li> <a href="td2_solpy3_2021.html">Corrigé</a> du TD 2\n\t\t\t<li> Cours 3 (6/10/2021) <a href="python3_M1_2021-3.html">html</a> - <a href="python3_M1_2021-3.ipynb">notebook</a>\n\t\t\t<li> <a href="td3_2021_texte.html">TD 3</a> (6/10/2021)\n\n\n\n\t\t\n\n\n\t\t\t\n\n\t\t</ul>\n\t</body>\n</html>\n\n\t\t\n'


def f(x):
    y = 'toto'
    print (locals())


f(42)

{'x': 42, 'y': 'toto'}

>>> print (globals())
{'f': <function f at 0x402d35a4>, '__builtins__': 
<module '__builtin__' (built-in)>, '__name__': '__main__', 
'__doc__': None}
>>> dir()
['__builtins__', '__doc__', '__name__', 'f']


d = {'animal':'cheval', 'parent':'cousin', 'aliment':'foin', 'jour':'dimanche'}
s = 'Le %(animal)s de mon %(parent)s ne mange du %(aliment)s que le %(jour)s'

s % d

'Le cheval de mon cousin ne mange du foin que le dimanche'

def handle_starttag(self, tag, attrs):
        strattrs = "".join([' %s="%s"' % (key, value) 
                             for key, value in attrs]) 
        self.pieces.append("<%(tag)s%(strattrs)s>" % locals())


htmlSource = """        
    <html>
     <head>
     <title>Test page</title>
     </head>
     <body>
     <ul>
     <li><a href=index.html>Home</a></li>
     <li><a href=toc.html>Table of contents</a></li>
     <li><a href=history.html>Revision history</a></li>
     </body>
     </html>"""


parser = BaseHTMLProcessor()
parser.feed(htmlSource) 
parser.close()
print (parser.output())

        
    <html>
     <head>
     <title>Test page</title>
     </head>
     <body>
     <ul>
     <li><a href="index.html">Home</a></li>
     <li><a href="toc.html">Table of contents</a></li>
     <li><a href="history.html">Revision history</a></li>
     </body>
     </html>

def handle_data(self, text):                                         
    self.pieces.append(self.verbatim 
                       and text 
                       or self.process(text))

# module touille.py
import random, re

p = re.compile('(\w)(\w+)(\w)', re.M)

def touille(m):
    milieu = list(m.group(2))
    random.shuffle(milieu)
    return m.group(1) + ''.join(milieu) + m.group(3)


def blurr(s):
    return p.sub(touille,s)

Python et le web¶

Pour le protocole `http` :¶

Pour le traitement du HTML :¶

Pour la construction de sites :¶

Le module urllib¶

Exemple¶

Le module urllib.parse¶

Fonctionalités avancées d'urllib.request¶

Exemple : authentification basique.¶

Le module requests (à installer)¶

Côté serveur¶

Traitement du HTML¶

La classe `HTMLParser`¶

Exemple : URLLister¶

La traduction d'une page HTML¶

La classe `BaseHTMLProcessor`¶

Commentaires sur la syntaxe¶

Le dialectiseur¶

Brouilleur de page web : le code complet¶

Il faut un module de traduction, par exemple celui-ci :¶

La classe `BaseHTMLProcessor`¶

Le programme principal `blurweb3.py`¶

Python et le web¶

Pour le protocole http :¶

Pour le traitement du HTML :¶

Pour la construction de sites :¶

Le module urllib¶

Exemple¶

Le module urllib.parse¶

Fonctionalités avancées d'urllib.request¶

Exemple : authentification basique.¶

Le module requests (à installer)¶

Côté serveur¶

Traitement du HTML¶

La classe HTMLParser¶

Exemple : URLLister¶

La traduction d'une page HTML¶

La classe BaseHTMLProcessor¶

Commentaires sur la syntaxe¶

Le dialectiseur¶

Brouilleur de page web : le code complet¶

Il faut un module de traduction, par exemple celui-ci :¶

La classe BaseHTMLProcessor¶

Le programme principal blurweb3.py¶

Pour le protocole `http` :¶

La classe `HTMLParser`¶

La classe `BaseHTMLProcessor`¶

La classe `BaseHTMLProcessor`¶

Le programme principal `blurweb3.py`¶