Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: py-scraping/mechanize/_gzip.py@ 128

Last change on this file since 128 was 106, checked in by Rick van der Zwet, 15 years ago
Initial commit...
File size: 3.2 KB

Rev	Line
[106]	1	import urllib2
	2	from cStringIO import StringIO
	3	import _response
	4
	5	# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library
	6	class GzipConsumer:
	7
	8	def __init__(self, consumer):
	9	self.__consumer = consumer
	10	self.__decoder = None
	11	self.__data = ""
	12
	13	def __getattr__(self, key):
	14	return getattr(self.__consumer, key)
	15
	16	def feed(self, data):
	17	if self.__decoder is None:
	18	# check if we have a full gzip header
	19	data = self.__data + data
	20	try:
	21	i = 10
	22	flag = ord(data[3])
	23	if flag & 4: # extra
	24	x = ord(data[i]) + 256 * ord(data[i + 1])
	25	i = i + 2 + x
	26	if flag & 8: # filename
	27	while ord(data[i]):
	28	i = i + 1
	29	i = i + 1
	30	if flag & 16: # comment
	31	while ord(data[i]):
	32	i = i + 1
	33	i = i + 1
	34	if flag & 2: # crc
	35	i = i + 2
	36	if len(data) < i:
	37	raise IndexError("not enough data")
	38	if data[:3] != "\x1f\x8b\x08":
	39	raise IOError("invalid gzip data")
	40	data = data[i:]
	41	except IndexError:
	42	self.__data = data
	43	return # need more data
	44	import zlib
	45	self.__data = ""
	46	self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
	47	data = self.__decoder.decompress(data)
	48	if data:
	49	self.__consumer.feed(data)
	50
	51	def close(self):
	52	if self.__decoder:
	53	data = self.__decoder.flush()
	54	if data:
	55	self.__consumer.feed(data)
	56	self.__consumer.close()
	57
	58
	59	# --------------------------------------------------------------------
	60
	61	# the rest of this module is John Lee's stupid code, not
	62	# Fredrik's nice code :-)
	63
	64	class stupid_gzip_consumer:
	65	def __init__(self): self.data = []
	66	def feed(self, data): self.data.append(data)
	67
	68	class stupid_gzip_wrapper(_response.closeable_response):
	69	def __init__(self, response):
	70	self._response = response
	71
	72	c = stupid_gzip_consumer()
	73	gzc = GzipConsumer(c)
	74	gzc.feed(response.read())
	75	self.__data = StringIO("".join(c.data))
	76
	77	def read(self, size= -1):
	78	return self.__data.read(size)
	79	def readline(self, size= -1):
	80	return self.__data.readline(size)
	81	def readlines(self, sizehint= -1):
	82	return self.__data.readlines(sizehint)
	83
	84	def __getattr__(self, name):
	85	# delegate unknown methods/attributes
	86	return getattr(self._response, name)
	87
	88	class HTTPGzipProcessor(urllib2.BaseHandler):
	89	handler_order = 200 # response processing before HTTPEquivProcessor
	90
	91	def http_request(self, request):
	92	request.add_header("Accept-Encoding", "gzip")
	93	return request
	94
	95	def http_response(self, request, response):
	96	# post-process response
	97	enc_hdrs = response.info().getheaders("Content-encoding")
	98	for enc_hdr in enc_hdrs:
	99	if ("gzip" in enc_hdr) or ("compress" in enc_hdr):
	100	return stupid_gzip_wrapper(response)
	101	return response
	102
	103	https_response = http_response

Note: See TracBrowser for help on using the repository browser.

Download in other formats: