Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: py-scraping/mechanize/_gzip.py@ 203

Last change on this file since 203 was 106, checked in by Rick van der Zwet, 15 years ago
Initial commit...
File size: 3.2 KB

Line
1	import urllib2
2	from cStringIO import StringIO
3	import _response
4
5	# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library
6	class GzipConsumer:
7
8	def __init__(self, consumer):
9	self.__consumer = consumer
10	self.__decoder = None
11	self.__data = ""
12
13	def __getattr__(self, key):
14	return getattr(self.__consumer, key)
15
16	def feed(self, data):
17	if self.__decoder is None:
18	# check if we have a full gzip header
19	data = self.__data + data
20	try:
21	i = 10
22	flag = ord(data[3])
23	if flag & 4: # extra
24	x = ord(data[i]) + 256 * ord(data[i + 1])
25	i = i + 2 + x
26	if flag & 8: # filename
27	while ord(data[i]):
28	i = i + 1
29	i = i + 1
30	if flag & 16: # comment
31	while ord(data[i]):
32	i = i + 1
33	i = i + 1
34	if flag & 2: # crc
35	i = i + 2
36	if len(data) < i:
37	raise IndexError("not enough data")
38	if data[:3] != "\x1f\x8b\x08":
39	raise IOError("invalid gzip data")
40	data = data[i:]
41	except IndexError:
42	self.__data = data
43	return # need more data
44	import zlib
45	self.__data = ""
46	self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
47	data = self.__decoder.decompress(data)
48	if data:
49	self.__consumer.feed(data)
50
51	def close(self):
52	if self.__decoder:
53	data = self.__decoder.flush()
54	if data:
55	self.__consumer.feed(data)
56	self.__consumer.close()
57
58
59	# --------------------------------------------------------------------
60
61	# the rest of this module is John Lee's stupid code, not
62	# Fredrik's nice code :-)
63
64	class stupid_gzip_consumer:
65	def __init__(self): self.data = []
66	def feed(self, data): self.data.append(data)
67
68	class stupid_gzip_wrapper(_response.closeable_response):
69	def __init__(self, response):
70	self._response = response
71
72	c = stupid_gzip_consumer()
73	gzc = GzipConsumer(c)
74	gzc.feed(response.read())
75	self.__data = StringIO("".join(c.data))
76
77	def read(self, size= -1):
78	return self.__data.read(size)
79	def readline(self, size= -1):
80	return self.__data.readline(size)
81	def readlines(self, sizehint= -1):
82	return self.__data.readlines(sizehint)
83
84	def __getattr__(self, name):
85	# delegate unknown methods/attributes
86	return getattr(self._response, name)
87
88	class HTTPGzipProcessor(urllib2.BaseHandler):
89	handler_order = 200 # response processing before HTTPEquivProcessor
90
91	def http_request(self, request):
92	request.add_header("Accept-Encoding", "gzip")
93	return request
94
95	def http_response(self, request, response):
96	# post-process response
97	enc_hdrs = response.info().getheaders("Content-encoding")
98	for enc_hdr in enc_hdrs:
99	if ("gzip" in enc_hdr) or ("compress" in enc_hdr):
100	return stupid_gzip_wrapper(response)
101	return response
102
103	https_response = http_response

Note: See TracBrowser for help on using the repository browser.

Download in other formats: