source: py-scraping/mechanize/_gzip.py@ 149

Last change on this file since 149 was 106, checked in by Rick van der Zwet, 15 years ago

Initial commit...

File size: 3.2 KB
RevLine 
[106]1import urllib2
2from cStringIO import StringIO
3import _response
4
5# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library
6class GzipConsumer:
7
8 def __init__(self, consumer):
9 self.__consumer = consumer
10 self.__decoder = None
11 self.__data = ""
12
13 def __getattr__(self, key):
14 return getattr(self.__consumer, key)
15
16 def feed(self, data):
17 if self.__decoder is None:
18 # check if we have a full gzip header
19 data = self.__data + data
20 try:
21 i = 10
22 flag = ord(data[3])
23 if flag & 4: # extra
24 x = ord(data[i]) + 256 * ord(data[i + 1])
25 i = i + 2 + x
26 if flag & 8: # filename
27 while ord(data[i]):
28 i = i + 1
29 i = i + 1
30 if flag & 16: # comment
31 while ord(data[i]):
32 i = i + 1
33 i = i + 1
34 if flag & 2: # crc
35 i = i + 2
36 if len(data) < i:
37 raise IndexError("not enough data")
38 if data[:3] != "\x1f\x8b\x08":
39 raise IOError("invalid gzip data")
40 data = data[i:]
41 except IndexError:
42 self.__data = data
43 return # need more data
44 import zlib
45 self.__data = ""
46 self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
47 data = self.__decoder.decompress(data)
48 if data:
49 self.__consumer.feed(data)
50
51 def close(self):
52 if self.__decoder:
53 data = self.__decoder.flush()
54 if data:
55 self.__consumer.feed(data)
56 self.__consumer.close()
57
58
59# --------------------------------------------------------------------
60
61# the rest of this module is John Lee's stupid code, not
62# Fredrik's nice code :-)
63
64class stupid_gzip_consumer:
65 def __init__(self): self.data = []
66 def feed(self, data): self.data.append(data)
67
68class stupid_gzip_wrapper(_response.closeable_response):
69 def __init__(self, response):
70 self._response = response
71
72 c = stupid_gzip_consumer()
73 gzc = GzipConsumer(c)
74 gzc.feed(response.read())
75 self.__data = StringIO("".join(c.data))
76
77 def read(self, size= -1):
78 return self.__data.read(size)
79 def readline(self, size= -1):
80 return self.__data.readline(size)
81 def readlines(self, sizehint= -1):
82 return self.__data.readlines(sizehint)
83
84 def __getattr__(self, name):
85 # delegate unknown methods/attributes
86 return getattr(self._response, name)
87
88class HTTPGzipProcessor(urllib2.BaseHandler):
89 handler_order = 200 # response processing before HTTPEquivProcessor
90
91 def http_request(self, request):
92 request.add_header("Accept-Encoding", "gzip")
93 return request
94
95 def http_response(self, request, response):
96 # post-process response
97 enc_hdrs = response.info().getheaders("Content-encoding")
98 for enc_hdr in enc_hdrs:
99 if ("gzip" in enc_hdr) or ("compress" in enc_hdr):
100 return stupid_gzip_wrapper(response)
101 return response
102
103 https_response = http_response
Note: See TracBrowser for help on using the repository browser.