1 | """Response classes.
|
---|
2 |
|
---|
3 | The seek_wrapper code is not used if you're using UserAgent with
|
---|
4 | .set_seekable_responses(False), or if you're using the urllib2-level interface
|
---|
5 | without SeekableProcessor or HTTPEquivProcessor. Class closeable_response is
|
---|
6 | instantiated by some handlers (AbstractHTTPHandler), but the closeable_response
|
---|
7 | interface is only depended upon by Browser-level code. Function
|
---|
8 | upgrade_response is only used if you're using Browser or
|
---|
9 | ResponseUpgradeProcessor.
|
---|
10 |
|
---|
11 |
|
---|
12 | Copyright 2006 John J. Lee <jjl@pobox.com>
|
---|
13 |
|
---|
14 | This code is free software; you can redistribute it and/or modify it
|
---|
15 | under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
|
---|
16 | included with the distribution).
|
---|
17 |
|
---|
18 | """
|
---|
19 |
|
---|
20 | import copy, mimetools
|
---|
21 | from cStringIO import StringIO
|
---|
22 | import urllib2
|
---|
23 |
|
---|
24 |
|
---|
25 | def len_of_seekable(file_):
|
---|
26 | # this function exists because evaluation of len(file_.getvalue()) on every
|
---|
27 | # .read() from seek_wrapper would be O(N**2) in number of .read()s
|
---|
28 | pos = file_.tell()
|
---|
29 | file_.seek(0, 2) # to end
|
---|
30 | try:
|
---|
31 | return file_.tell()
|
---|
32 | finally:
|
---|
33 | file_.seek(pos)
|
---|
34 |
|
---|
35 |
|
---|
36 | # XXX Andrew Dalke kindly sent me a similar class in response to my request on
|
---|
37 | # comp.lang.python, which I then proceeded to lose. I wrote this class
|
---|
38 | # instead, but I think he's released his code publicly since, could pinch the
|
---|
39 | # tests from it, at least...
|
---|
40 |
|
---|
41 | # For testing seek_wrapper invariant (note that
|
---|
42 | # test_urllib2.HandlerTest.test_seekable is expected to fail when this
|
---|
43 | # invariant checking is turned on). The invariant checking is done by module
|
---|
44 | # ipdc, which is available here:
|
---|
45 | # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834
|
---|
46 | ## from ipdbc import ContractBase
|
---|
47 | ## class seek_wrapper(ContractBase):
|
---|
48 | class seek_wrapper:
|
---|
49 | """Adds a seek method to a file object.
|
---|
50 |
|
---|
51 | This is only designed for seeking on readonly file-like objects.
|
---|
52 |
|
---|
53 | Wrapped file-like object must have a read method. The readline method is
|
---|
54 | only supported if that method is present on the wrapped object. The
|
---|
55 | readlines method is always supported. xreadlines and iteration are
|
---|
56 | supported only for Python 2.2 and above.
|
---|
57 |
|
---|
58 | Public attributes:
|
---|
59 |
|
---|
60 | wrapped: the wrapped file object
|
---|
61 | is_closed: true iff .close() has been called
|
---|
62 |
|
---|
63 | WARNING: All other attributes of the wrapped object (ie. those that are not
|
---|
64 | one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
|
---|
65 | are passed through unaltered, which may or may not make sense for your
|
---|
66 | particular file object.
|
---|
67 |
|
---|
68 | """
|
---|
69 | # General strategy is to check that cache is full enough, then delegate to
|
---|
70 | # the cache (self.__cache, which is a cStringIO.StringIO instance). A seek
|
---|
71 | # position (self.__pos) is maintained independently of the cache, in order
|
---|
72 | # that a single cache may be shared between multiple seek_wrapper objects.
|
---|
73 | # Copying using module copy shares the cache in this way.
|
---|
74 |
|
---|
75 | def __init__(self, wrapped):
|
---|
76 | self.wrapped = wrapped
|
---|
77 | self.__read_complete_state = [False]
|
---|
78 | self.__is_closed_state = [False]
|
---|
79 | self.__have_readline = hasattr(self.wrapped, "readline")
|
---|
80 | self.__cache = StringIO()
|
---|
81 | self.__pos = 0 # seek position
|
---|
82 |
|
---|
83 | def invariant(self):
|
---|
84 | # The end of the cache is always at the same place as the end of the
|
---|
85 | # wrapped file (though the .tell() method is not required to be present
|
---|
86 | # on wrapped file).
|
---|
87 | return self.wrapped.tell() == len(self.__cache.getvalue())
|
---|
88 |
|
---|
89 | def close(self):
|
---|
90 | self.wrapped.close()
|
---|
91 | self.is_closed = True
|
---|
92 |
|
---|
93 | def __getattr__(self, name):
|
---|
94 | if name == "is_closed":
|
---|
95 | return self.__is_closed_state[0]
|
---|
96 | elif name == "read_complete":
|
---|
97 | return self.__read_complete_state[0]
|
---|
98 |
|
---|
99 | wrapped = self.__dict__.get("wrapped")
|
---|
100 | if wrapped:
|
---|
101 | return getattr(wrapped, name)
|
---|
102 |
|
---|
103 | return getattr(self.__class__, name)
|
---|
104 |
|
---|
105 | def __setattr__(self, name, value):
|
---|
106 | if name == "is_closed":
|
---|
107 | self.__is_closed_state[0] = bool(value)
|
---|
108 | elif name == "read_complete":
|
---|
109 | if not self.is_closed:
|
---|
110 | self.__read_complete_state[0] = bool(value)
|
---|
111 | else:
|
---|
112 | self.__dict__[name] = value
|
---|
113 |
|
---|
114 | def seek(self, offset, whence=0):
|
---|
115 | assert whence in [0, 1, 2]
|
---|
116 |
|
---|
117 | # how much data, if any, do we need to read?
|
---|
118 | if whence == 2: # 2: relative to end of *wrapped* file
|
---|
119 | if offset < 0: raise ValueError("negative seek offset")
|
---|
120 | # since we don't know yet where the end of that file is, we must
|
---|
121 | # read everything
|
---|
122 | to_read = None
|
---|
123 | else:
|
---|
124 | if whence == 0: # 0: absolute
|
---|
125 | if offset < 0: raise ValueError("negative seek offset")
|
---|
126 | dest = offset
|
---|
127 | else: # 1: relative to current position
|
---|
128 | pos = self.__pos
|
---|
129 | if pos < offset:
|
---|
130 | raise ValueError("seek to before start of file")
|
---|
131 | dest = pos + offset
|
---|
132 | end = len_of_seekable(self.__cache)
|
---|
133 | to_read = dest - end
|
---|
134 | if to_read < 0:
|
---|
135 | to_read = 0
|
---|
136 |
|
---|
137 | if to_read != 0:
|
---|
138 | self.__cache.seek(0, 2)
|
---|
139 | if to_read is None:
|
---|
140 | assert whence == 2
|
---|
141 | self.__cache.write(self.wrapped.read())
|
---|
142 | self.read_complete = True
|
---|
143 | self.__pos = self.__cache.tell() - offset
|
---|
144 | else:
|
---|
145 | data = self.wrapped.read(to_read)
|
---|
146 | if not data:
|
---|
147 | self.read_complete = True
|
---|
148 | else:
|
---|
149 | self.__cache.write(data)
|
---|
150 | # Don't raise an exception even if we've seek()ed past the end
|
---|
151 | # of .wrapped, since fseek() doesn't complain in that case.
|
---|
152 | # Also like fseek(), pretend we have seek()ed past the end,
|
---|
153 | # i.e. not:
|
---|
154 | #self.__pos = self.__cache.tell()
|
---|
155 | # but rather:
|
---|
156 | self.__pos = dest
|
---|
157 | else:
|
---|
158 | self.__pos = dest
|
---|
159 |
|
---|
160 | def tell(self):
|
---|
161 | return self.__pos
|
---|
162 |
|
---|
163 | def __copy__(self):
|
---|
164 | cpy = self.__class__(self.wrapped)
|
---|
165 | cpy.__cache = self.__cache
|
---|
166 | cpy.__read_complete_state = self.__read_complete_state
|
---|
167 | cpy.__is_closed_state = self.__is_closed_state
|
---|
168 | return cpy
|
---|
169 |
|
---|
170 | def get_data(self):
|
---|
171 | pos = self.__pos
|
---|
172 | try:
|
---|
173 | self.seek(0)
|
---|
174 | return self.read(-1)
|
---|
175 | finally:
|
---|
176 | self.__pos = pos
|
---|
177 |
|
---|
178 | def read(self, size= -1):
|
---|
179 | pos = self.__pos
|
---|
180 | end = len_of_seekable(self.__cache)
|
---|
181 | available = end - pos
|
---|
182 |
|
---|
183 | # enough data already cached?
|
---|
184 | if size <= available and size != -1:
|
---|
185 | self.__cache.seek(pos)
|
---|
186 | self.__pos = pos + size
|
---|
187 | return self.__cache.read(size)
|
---|
188 |
|
---|
189 | # no, so read sufficient data from wrapped file and cache it
|
---|
190 | self.__cache.seek(0, 2)
|
---|
191 | if size == -1:
|
---|
192 | self.__cache.write(self.wrapped.read())
|
---|
193 | self.read_complete = True
|
---|
194 | else:
|
---|
195 | to_read = size - available
|
---|
196 | assert to_read > 0
|
---|
197 | data = self.wrapped.read(to_read)
|
---|
198 | if not data:
|
---|
199 | self.read_complete = True
|
---|
200 | else:
|
---|
201 | self.__cache.write(data)
|
---|
202 | self.__cache.seek(pos)
|
---|
203 |
|
---|
204 | data = self.__cache.read(size)
|
---|
205 | self.__pos = self.__cache.tell()
|
---|
206 | assert self.__pos == pos + len(data)
|
---|
207 | return data
|
---|
208 |
|
---|
209 | def readline(self, size= -1):
|
---|
210 | if not self.__have_readline:
|
---|
211 | raise NotImplementedError("no readline method on wrapped object")
|
---|
212 |
|
---|
213 | # line we're about to read might not be complete in the cache, so
|
---|
214 | # read another line first
|
---|
215 | pos = self.__pos
|
---|
216 | self.__cache.seek(0, 2)
|
---|
217 | data = self.wrapped.readline()
|
---|
218 | if not data:
|
---|
219 | self.read_complete = True
|
---|
220 | else:
|
---|
221 | self.__cache.write(data)
|
---|
222 | self.__cache.seek(pos)
|
---|
223 |
|
---|
224 | data = self.__cache.readline()
|
---|
225 | if size != -1:
|
---|
226 | r = data[:size]
|
---|
227 | self.__pos = pos + size
|
---|
228 | else:
|
---|
229 | r = data
|
---|
230 | self.__pos = pos + len(data)
|
---|
231 | return r
|
---|
232 |
|
---|
233 | def readlines(self, sizehint= -1):
|
---|
234 | pos = self.__pos
|
---|
235 | self.__cache.seek(0, 2)
|
---|
236 | self.__cache.write(self.wrapped.read())
|
---|
237 | self.read_complete = True
|
---|
238 | self.__cache.seek(pos)
|
---|
239 | data = self.__cache.readlines(sizehint)
|
---|
240 | self.__pos = self.__cache.tell()
|
---|
241 | return data
|
---|
242 |
|
---|
243 | def __iter__(self): return self
|
---|
244 | def next(self):
|
---|
245 | line = self.readline()
|
---|
246 | if line == "": raise StopIteration
|
---|
247 | return line
|
---|
248 |
|
---|
249 | xreadlines = __iter__
|
---|
250 |
|
---|
251 | def __repr__(self):
|
---|
252 | return ("<%s at %s whose wrapped object = %r>" %
|
---|
253 | (self.__class__.__name__, hex(abs(id(self))), self.wrapped))
|
---|
254 |
|
---|
255 |
|
---|
256 | class response_seek_wrapper(seek_wrapper):
|
---|
257 |
|
---|
258 | """
|
---|
259 | Supports copying response objects and setting response body data.
|
---|
260 |
|
---|
261 | """
|
---|
262 |
|
---|
263 | def __init__(self, wrapped):
|
---|
264 | seek_wrapper.__init__(self, wrapped)
|
---|
265 | self._headers = self.wrapped.info()
|
---|
266 |
|
---|
267 | def __copy__(self):
|
---|
268 | cpy = seek_wrapper.__copy__(self)
|
---|
269 | # copy headers from delegate
|
---|
270 | cpy._headers = copy.copy(self.info())
|
---|
271 | return cpy
|
---|
272 |
|
---|
273 | # Note that .info() and .geturl() (the only two urllib2 response methods
|
---|
274 | # that are not implemented by seek_wrapper) must be here explicitly rather
|
---|
275 | # than by seek_wrapper's __getattr__ delegation) so that the nasty
|
---|
276 | # dynamically-created HTTPError classes in get_seek_wrapper_class() get the
|
---|
277 | # wrapped object's implementation, and not HTTPError's.
|
---|
278 |
|
---|
279 | def info(self):
|
---|
280 | return self._headers
|
---|
281 |
|
---|
282 | def geturl(self):
|
---|
283 | return self.wrapped.geturl()
|
---|
284 |
|
---|
285 | def set_data(self, data):
|
---|
286 | self.seek(0)
|
---|
287 | self.read()
|
---|
288 | self.close()
|
---|
289 | cache = self._seek_wrapper__cache = StringIO()
|
---|
290 | cache.write(data)
|
---|
291 | self.seek(0)
|
---|
292 |
|
---|
293 |
|
---|
294 | class eoffile:
|
---|
295 | # file-like object that always claims to be at end-of-file...
|
---|
296 | def read(self, size= -1): return ""
|
---|
297 | def readline(self, size= -1): return ""
|
---|
298 | def __iter__(self): return self
|
---|
299 | def next(self): return ""
|
---|
300 | def close(self): pass
|
---|
301 |
|
---|
302 | class eofresponse(eoffile):
|
---|
303 | def __init__(self, url, headers, code, msg):
|
---|
304 | self._url = url
|
---|
305 | self._headers = headers
|
---|
306 | self.code = code
|
---|
307 | self.msg = msg
|
---|
308 | def geturl(self): return self._url
|
---|
309 | def info(self): return self._headers
|
---|
310 |
|
---|
311 |
|
---|
312 | class closeable_response:
|
---|
313 | """Avoids unnecessarily clobbering urllib.addinfourl methods on .close().
|
---|
314 |
|
---|
315 | Only supports responses returned by mechanize.HTTPHandler.
|
---|
316 |
|
---|
317 | After .close(), the following methods are supported:
|
---|
318 |
|
---|
319 | .read()
|
---|
320 | .readline()
|
---|
321 | .info()
|
---|
322 | .geturl()
|
---|
323 | .__iter__()
|
---|
324 | .next()
|
---|
325 | .close()
|
---|
326 |
|
---|
327 | and the following attributes are supported:
|
---|
328 |
|
---|
329 | .code
|
---|
330 | .msg
|
---|
331 |
|
---|
332 | Also supports pickling (but the stdlib currently does something to prevent
|
---|
333 | it: http://python.org/sf/1144636).
|
---|
334 |
|
---|
335 | """
|
---|
336 | # presence of this attr indicates is useable after .close()
|
---|
337 | closeable_response = None
|
---|
338 |
|
---|
339 | def __init__(self, fp, headers, url, code, msg):
|
---|
340 | self._set_fp(fp)
|
---|
341 | self._headers = headers
|
---|
342 | self._url = url
|
---|
343 | self.code = code
|
---|
344 | self.msg = msg
|
---|
345 |
|
---|
346 | def _set_fp(self, fp):
|
---|
347 | self.fp = fp
|
---|
348 | self.read = self.fp.read
|
---|
349 | self.readline = self.fp.readline
|
---|
350 | if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
|
---|
351 | if hasattr(self.fp, "fileno"):
|
---|
352 | self.fileno = self.fp.fileno
|
---|
353 | else:
|
---|
354 | self.fileno = lambda: None
|
---|
355 | self.__iter__ = self.fp.__iter__
|
---|
356 | self.next = self.fp.next
|
---|
357 |
|
---|
358 | def __repr__(self):
|
---|
359 | return '<%s at %s whose fp = %r>' % (
|
---|
360 | self.__class__.__name__, hex(abs(id(self))), self.fp)
|
---|
361 |
|
---|
362 | def info(self):
|
---|
363 | return self._headers
|
---|
364 |
|
---|
365 | def geturl(self):
|
---|
366 | return self._url
|
---|
367 |
|
---|
368 | def close(self):
|
---|
369 | wrapped = self.fp
|
---|
370 | wrapped.close()
|
---|
371 | new_wrapped = eofresponse(
|
---|
372 | self._url, self._headers, self.code, self.msg)
|
---|
373 | self._set_fp(new_wrapped)
|
---|
374 |
|
---|
375 | def __getstate__(self):
|
---|
376 | # There are three obvious options here:
|
---|
377 | # 1. truncate
|
---|
378 | # 2. read to end
|
---|
379 | # 3. close socket, pickle state including read position, then open
|
---|
380 | # again on unpickle and use Range header
|
---|
381 | # XXXX um, 4. refuse to pickle unless .close()d. This is better,
|
---|
382 | # actually ("errors should never pass silently"). Pickling doesn't
|
---|
383 | # work anyway ATM, because of http://python.org/sf/1144636 so fix
|
---|
384 | # this later
|
---|
385 |
|
---|
386 | # 2 breaks pickle protocol, because one expects the original object
|
---|
387 | # to be left unscathed by pickling. 3 is too complicated and
|
---|
388 | # surprising (and too much work ;-) to happen in a sane __getstate__.
|
---|
389 | # So we do 1.
|
---|
390 |
|
---|
391 | state = self.__dict__.copy()
|
---|
392 | new_wrapped = eofresponse(
|
---|
393 | self._url, self._headers, self.code, self.msg)
|
---|
394 | state["wrapped"] = new_wrapped
|
---|
395 | return state
|
---|
396 |
|
---|
397 | def test_response(data='test data', headers=[],
|
---|
398 | url="http://example.com/", code=200, msg="OK"):
|
---|
399 | return make_response(data, headers, url, code, msg)
|
---|
400 |
|
---|
401 | def test_html_response(data='test data', headers=[],
|
---|
402 | url="http://example.com/", code=200, msg="OK"):
|
---|
403 | headers += [("Content-type", "text/html")]
|
---|
404 | return make_response(data, headers, url, code, msg)
|
---|
405 |
|
---|
406 | def make_response(data, headers, url, code, msg):
|
---|
407 | """Convenient factory for objects implementing response interface.
|
---|
408 |
|
---|
409 | data: string containing response body data
|
---|
410 | headers: sequence of (name, value) pairs
|
---|
411 | url: URL of response
|
---|
412 | code: integer response code (e.g. 200)
|
---|
413 | msg: string response code message (e.g. "OK")
|
---|
414 |
|
---|
415 | """
|
---|
416 | mime_headers = make_headers(headers)
|
---|
417 | r = closeable_response(StringIO(data), mime_headers, url, code, msg)
|
---|
418 | return response_seek_wrapper(r)
|
---|
419 |
|
---|
420 |
|
---|
421 | def make_headers(headers):
|
---|
422 | """
|
---|
423 | headers: sequence of (name, value) pairs
|
---|
424 | """
|
---|
425 | hdr_text = []
|
---|
426 | for name_value in headers:
|
---|
427 | hdr_text.append("%s: %s" % name_value)
|
---|
428 | return mimetools.Message(StringIO("\n".join(hdr_text)))
|
---|
429 |
|
---|
430 |
|
---|
431 | # Rest of this module is especially horrible, but needed, at least until fork
|
---|
432 | # urllib2. Even then, may want to preseve urllib2 compatibility.
|
---|
433 |
|
---|
434 | def get_seek_wrapper_class(response):
|
---|
435 | # in order to wrap response objects that are also exceptions, we must
|
---|
436 | # dynamically subclass the exception :-(((
|
---|
437 | if (isinstance(response, urllib2.HTTPError) and
|
---|
438 | not hasattr(response, "seek")):
|
---|
439 | if response.__class__.__module__ == "__builtin__":
|
---|
440 | exc_class_name = response.__class__.__name__
|
---|
441 | else:
|
---|
442 | exc_class_name = "%s.%s" % (
|
---|
443 | response.__class__.__module__, response.__class__.__name__)
|
---|
444 |
|
---|
445 | class httperror_seek_wrapper(response_seek_wrapper, response.__class__):
|
---|
446 | # this only derives from HTTPError in order to be a subclass --
|
---|
447 | # the HTTPError behaviour comes from delegation
|
---|
448 |
|
---|
449 | _exc_class_name = exc_class_name
|
---|
450 |
|
---|
451 | def __init__(self, wrapped):
|
---|
452 | response_seek_wrapper.__init__(self, wrapped)
|
---|
453 | # be compatible with undocumented HTTPError attributes :-(
|
---|
454 | self.hdrs = wrapped.info()
|
---|
455 | self.filename = wrapped.geturl()
|
---|
456 |
|
---|
457 | def __repr__(self):
|
---|
458 | return (
|
---|
459 | "<%s (%s instance) at %s "
|
---|
460 | "whose wrapped object = %r>" % (
|
---|
461 | self.__class__.__name__, self._exc_class_name,
|
---|
462 | hex(abs(id(self))), self.wrapped)
|
---|
463 | )
|
---|
464 | wrapper_class = httperror_seek_wrapper
|
---|
465 | else:
|
---|
466 | wrapper_class = response_seek_wrapper
|
---|
467 | return wrapper_class
|
---|
468 |
|
---|
469 | def seek_wrapped_response(response):
|
---|
470 | """Return a copy of response that supports seekable response interface.
|
---|
471 |
|
---|
472 | Accepts responses from both mechanize and urllib2 handlers.
|
---|
473 |
|
---|
474 | Copes with both oridinary response instances and HTTPError instances (which
|
---|
475 | can't be simply wrapped due to the requirement of preserving the exception
|
---|
476 | base class).
|
---|
477 | """
|
---|
478 | if not hasattr(response, "seek"):
|
---|
479 | wrapper_class = get_seek_wrapper_class(response)
|
---|
480 | response = wrapper_class(response)
|
---|
481 | assert hasattr(response, "get_data")
|
---|
482 | return response
|
---|
483 |
|
---|
484 | def upgrade_response(response):
|
---|
485 | """Return a copy of response that supports Browser response interface.
|
---|
486 |
|
---|
487 | Browser response interface is that of "seekable responses"
|
---|
488 | (response_seek_wrapper), plus the requirement that responses must be
|
---|
489 | useable after .close() (closeable_response).
|
---|
490 |
|
---|
491 | Accepts responses from both mechanize and urllib2 handlers.
|
---|
492 |
|
---|
493 | Copes with both ordinary response instances and HTTPError instances (which
|
---|
494 | can't be simply wrapped due to the requirement of preserving the exception
|
---|
495 | base class).
|
---|
496 | """
|
---|
497 | wrapper_class = get_seek_wrapper_class(response)
|
---|
498 | if hasattr(response, "closeable_response"):
|
---|
499 | if not hasattr(response, "seek"):
|
---|
500 | response = wrapper_class(response)
|
---|
501 | assert hasattr(response, "get_data")
|
---|
502 | return copy.copy(response)
|
---|
503 |
|
---|
504 | # a urllib2 handler constructed the response, i.e. the response is an
|
---|
505 | # urllib.addinfourl or a urllib2.HTTPError, instead of a
|
---|
506 | # _Util.closeable_response as returned by e.g. mechanize.HTTPHandler
|
---|
507 | try:
|
---|
508 | code = response.code
|
---|
509 | except AttributeError:
|
---|
510 | code = None
|
---|
511 | try:
|
---|
512 | msg = response.msg
|
---|
513 | except AttributeError:
|
---|
514 | msg = None
|
---|
515 |
|
---|
516 | # may have already-.read() data from .seek() cache
|
---|
517 | data = None
|
---|
518 | get_data = getattr(response, "get_data", None)
|
---|
519 | if get_data:
|
---|
520 | data = get_data()
|
---|
521 |
|
---|
522 | response = closeable_response(
|
---|
523 | response.fp, response.info(), response.geturl(), code, msg)
|
---|
524 | response = wrapper_class(response)
|
---|
525 | if data:
|
---|
526 | response.set_data(data)
|
---|
527 | return response
|
---|