[106] | 1 | import urllib2
|
---|
| 2 | from cStringIO import StringIO
|
---|
| 3 | import _response
|
---|
| 4 |
|
---|
| 5 | # GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library
|
---|
| 6 | class GzipConsumer:
|
---|
| 7 |
|
---|
| 8 | def __init__(self, consumer):
|
---|
| 9 | self.__consumer = consumer
|
---|
| 10 | self.__decoder = None
|
---|
| 11 | self.__data = ""
|
---|
| 12 |
|
---|
| 13 | def __getattr__(self, key):
|
---|
| 14 | return getattr(self.__consumer, key)
|
---|
| 15 |
|
---|
| 16 | def feed(self, data):
|
---|
| 17 | if self.__decoder is None:
|
---|
| 18 | # check if we have a full gzip header
|
---|
| 19 | data = self.__data + data
|
---|
| 20 | try:
|
---|
| 21 | i = 10
|
---|
| 22 | flag = ord(data[3])
|
---|
| 23 | if flag & 4: # extra
|
---|
| 24 | x = ord(data[i]) + 256 * ord(data[i + 1])
|
---|
| 25 | i = i + 2 + x
|
---|
| 26 | if flag & 8: # filename
|
---|
| 27 | while ord(data[i]):
|
---|
| 28 | i = i + 1
|
---|
| 29 | i = i + 1
|
---|
| 30 | if flag & 16: # comment
|
---|
| 31 | while ord(data[i]):
|
---|
| 32 | i = i + 1
|
---|
| 33 | i = i + 1
|
---|
| 34 | if flag & 2: # crc
|
---|
| 35 | i = i + 2
|
---|
| 36 | if len(data) < i:
|
---|
| 37 | raise IndexError("not enough data")
|
---|
| 38 | if data[:3] != "\x1f\x8b\x08":
|
---|
| 39 | raise IOError("invalid gzip data")
|
---|
| 40 | data = data[i:]
|
---|
| 41 | except IndexError:
|
---|
| 42 | self.__data = data
|
---|
| 43 | return # need more data
|
---|
| 44 | import zlib
|
---|
| 45 | self.__data = ""
|
---|
| 46 | self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
|
---|
| 47 | data = self.__decoder.decompress(data)
|
---|
| 48 | if data:
|
---|
| 49 | self.__consumer.feed(data)
|
---|
| 50 |
|
---|
| 51 | def close(self):
|
---|
| 52 | if self.__decoder:
|
---|
| 53 | data = self.__decoder.flush()
|
---|
| 54 | if data:
|
---|
| 55 | self.__consumer.feed(data)
|
---|
| 56 | self.__consumer.close()
|
---|
| 57 |
|
---|
| 58 |
|
---|
| 59 | # --------------------------------------------------------------------
|
---|
| 60 |
|
---|
| 61 | # the rest of this module is John Lee's stupid code, not
|
---|
| 62 | # Fredrik's nice code :-)
|
---|
| 63 |
|
---|
| 64 | class stupid_gzip_consumer:
|
---|
| 65 | def __init__(self): self.data = []
|
---|
| 66 | def feed(self, data): self.data.append(data)
|
---|
| 67 |
|
---|
| 68 | class stupid_gzip_wrapper(_response.closeable_response):
|
---|
| 69 | def __init__(self, response):
|
---|
| 70 | self._response = response
|
---|
| 71 |
|
---|
| 72 | c = stupid_gzip_consumer()
|
---|
| 73 | gzc = GzipConsumer(c)
|
---|
| 74 | gzc.feed(response.read())
|
---|
| 75 | self.__data = StringIO("".join(c.data))
|
---|
| 76 |
|
---|
| 77 | def read(self, size= -1):
|
---|
| 78 | return self.__data.read(size)
|
---|
| 79 | def readline(self, size= -1):
|
---|
| 80 | return self.__data.readline(size)
|
---|
| 81 | def readlines(self, sizehint= -1):
|
---|
| 82 | return self.__data.readlines(sizehint)
|
---|
| 83 |
|
---|
| 84 | def __getattr__(self, name):
|
---|
| 85 | # delegate unknown methods/attributes
|
---|
| 86 | return getattr(self._response, name)
|
---|
| 87 |
|
---|
| 88 | class HTTPGzipProcessor(urllib2.BaseHandler):
|
---|
| 89 | handler_order = 200 # response processing before HTTPEquivProcessor
|
---|
| 90 |
|
---|
| 91 | def http_request(self, request):
|
---|
| 92 | request.add_header("Accept-Encoding", "gzip")
|
---|
| 93 | return request
|
---|
| 94 |
|
---|
| 95 | def http_response(self, request, response):
|
---|
| 96 | # post-process response
|
---|
| 97 | enc_hdrs = response.info().getheaders("Content-encoding")
|
---|
| 98 | for enc_hdr in enc_hdrs:
|
---|
| 99 | if ("gzip" in enc_hdr) or ("compress" in enc_hdr):
|
---|
| 100 | return stupid_gzip_wrapper(response)
|
---|
| 101 | return response
|
---|
| 102 |
|
---|
| 103 | https_response = http_response
|
---|