[106] | 1 | """Integration with Python standard library module urllib2: Request class.
|
---|
| 2 |
|
---|
| 3 | Copyright 2004-2006 John J Lee <jjl@pobox.com>
|
---|
| 4 |
|
---|
| 5 | This code is free software; you can redistribute it and/or modify it
|
---|
| 6 | under the terms of the BSD or ZPL 2.1 licenses (see the file
|
---|
| 7 | COPYING.txt included with the distribution).
|
---|
| 8 |
|
---|
| 9 | """
|
---|
| 10 |
|
---|
| 11 | import urllib2, urllib, logging
|
---|
| 12 |
|
---|
| 13 | from _clientcookie import request_host_lc
|
---|
| 14 | import _rfc3986
|
---|
| 15 | import _sockettimeout
|
---|
| 16 |
|
---|
| 17 | warn = logging.getLogger("mechanize").warning
|
---|
| 18 |
|
---|
| 19 |
|
---|
| 20 | class Request(urllib2.Request):
|
---|
| 21 | def __init__(self, url, data=None, headers={},
|
---|
| 22 | origin_req_host=None, unverifiable=False, visit=None,
|
---|
| 23 | timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
---|
| 24 | # In mechanize 0.2, the interpretation of a unicode url argument will
|
---|
| 25 | # change: A unicode url argument will be interpreted as an IRI, and a
|
---|
| 26 | # bytestring as a URI. For now, we accept unicode or bytestring. We
|
---|
| 27 | # don't insist that the value is always a URI (specifically, must only
|
---|
| 28 | # contain characters which are legal), because that might break working
|
---|
| 29 | # code (who knows what bytes some servers want to see, especially with
|
---|
| 30 | # browser plugins for internationalised URIs).
|
---|
| 31 | if not _rfc3986.is_clean_uri(url):
|
---|
| 32 | warn("url argument is not a URI "
|
---|
| 33 | "(contains illegal characters) %r" % url)
|
---|
| 34 | urllib2.Request.__init__(self, url, data, headers)
|
---|
| 35 | self.selector = None
|
---|
| 36 | self.unredirected_hdrs = {}
|
---|
| 37 | self.visit = visit
|
---|
| 38 | self.timeout = timeout
|
---|
| 39 |
|
---|
| 40 | # All the terminology below comes from RFC 2965.
|
---|
| 41 | self.unverifiable = unverifiable
|
---|
| 42 | # Set request-host of origin transaction.
|
---|
| 43 | # The origin request-host is needed in order to decide whether
|
---|
| 44 | # unverifiable sub-requests (automatic redirects, images embedded
|
---|
| 45 | # in HTML, etc.) are to third-party hosts. If they are, the
|
---|
| 46 | # resulting transactions might need to be conducted with cookies
|
---|
| 47 | # turned off.
|
---|
| 48 | if origin_req_host is None:
|
---|
| 49 | origin_req_host = request_host_lc(self)
|
---|
| 50 | self.origin_req_host = origin_req_host
|
---|
| 51 |
|
---|
| 52 | def get_selector(self):
|
---|
| 53 | return urllib.splittag(self.__r_host)[0]
|
---|
| 54 |
|
---|
| 55 | def get_origin_req_host(self):
|
---|
| 56 | return self.origin_req_host
|
---|
| 57 |
|
---|
| 58 | def is_unverifiable(self):
|
---|
| 59 | return self.unverifiable
|
---|
| 60 |
|
---|
| 61 | def add_unredirected_header(self, key, val):
|
---|
| 62 | """Add a header that will not be added to a redirected request."""
|
---|
| 63 | self.unredirected_hdrs[key.capitalize()] = val
|
---|
| 64 |
|
---|
| 65 | def has_header(self, header_name):
|
---|
| 66 | """True iff request has named header (regular or unredirected)."""
|
---|
| 67 | return (header_name in self.headers or
|
---|
| 68 | header_name in self.unredirected_hdrs)
|
---|
| 69 |
|
---|
| 70 | def get_header(self, header_name, default=None):
|
---|
| 71 | return self.headers.get(
|
---|
| 72 | header_name,
|
---|
| 73 | self.unredirected_hdrs.get(header_name, default))
|
---|
| 74 |
|
---|
| 75 | def header_items(self):
|
---|
| 76 | hdrs = self.unredirected_hdrs.copy()
|
---|
| 77 | hdrs.update(self.headers)
|
---|
| 78 | return hdrs.items()
|
---|
| 79 |
|
---|
| 80 | def __str__(self):
|
---|
| 81 | return "<Request for %s>" % self.get_full_url()
|
---|
| 82 |
|
---|
| 83 | def get_method(self):
|
---|
| 84 | if self.has_data():
|
---|
| 85 | return "POST"
|
---|
| 86 | else:
|
---|
| 87 | return "GET"
|
---|