1 | """Integration with Python standard library module urllib2: Request class.
|
---|
2 |
|
---|
3 | Copyright 2004-2006 John J Lee <jjl@pobox.com>
|
---|
4 |
|
---|
5 | This code is free software; you can redistribute it and/or modify it
|
---|
6 | under the terms of the BSD or ZPL 2.1 licenses (see the file
|
---|
7 | COPYING.txt included with the distribution).
|
---|
8 |
|
---|
9 | """
|
---|
10 |
|
---|
11 | import urllib2, urllib, logging
|
---|
12 |
|
---|
13 | from _clientcookie import request_host_lc
|
---|
14 | import _rfc3986
|
---|
15 | import _sockettimeout
|
---|
16 |
|
---|
17 | warn = logging.getLogger("mechanize").warning
|
---|
18 |
|
---|
19 |
|
---|
20 | class Request(urllib2.Request):
|
---|
21 | def __init__(self, url, data=None, headers={},
|
---|
22 | origin_req_host=None, unverifiable=False, visit=None,
|
---|
23 | timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
|
---|
24 | # In mechanize 0.2, the interpretation of a unicode url argument will
|
---|
25 | # change: A unicode url argument will be interpreted as an IRI, and a
|
---|
26 | # bytestring as a URI. For now, we accept unicode or bytestring. We
|
---|
27 | # don't insist that the value is always a URI (specifically, must only
|
---|
28 | # contain characters which are legal), because that might break working
|
---|
29 | # code (who knows what bytes some servers want to see, especially with
|
---|
30 | # browser plugins for internationalised URIs).
|
---|
31 | if not _rfc3986.is_clean_uri(url):
|
---|
32 | warn("url argument is not a URI "
|
---|
33 | "(contains illegal characters) %r" % url)
|
---|
34 | urllib2.Request.__init__(self, url, data, headers)
|
---|
35 | self.selector = None
|
---|
36 | self.unredirected_hdrs = {}
|
---|
37 | self.visit = visit
|
---|
38 | self.timeout = timeout
|
---|
39 |
|
---|
40 | # All the terminology below comes from RFC 2965.
|
---|
41 | self.unverifiable = unverifiable
|
---|
42 | # Set request-host of origin transaction.
|
---|
43 | # The origin request-host is needed in order to decide whether
|
---|
44 | # unverifiable sub-requests (automatic redirects, images embedded
|
---|
45 | # in HTML, etc.) are to third-party hosts. If they are, the
|
---|
46 | # resulting transactions might need to be conducted with cookies
|
---|
47 | # turned off.
|
---|
48 | if origin_req_host is None:
|
---|
49 | origin_req_host = request_host_lc(self)
|
---|
50 | self.origin_req_host = origin_req_host
|
---|
51 |
|
---|
52 | def get_selector(self):
|
---|
53 | return urllib.splittag(self.__r_host)[0]
|
---|
54 |
|
---|
55 | def get_origin_req_host(self):
|
---|
56 | return self.origin_req_host
|
---|
57 |
|
---|
58 | def is_unverifiable(self):
|
---|
59 | return self.unverifiable
|
---|
60 |
|
---|
61 | def add_unredirected_header(self, key, val):
|
---|
62 | """Add a header that will not be added to a redirected request."""
|
---|
63 | self.unredirected_hdrs[key.capitalize()] = val
|
---|
64 |
|
---|
65 | def has_header(self, header_name):
|
---|
66 | """True iff request has named header (regular or unredirected)."""
|
---|
67 | return (header_name in self.headers or
|
---|
68 | header_name in self.unredirected_hdrs)
|
---|
69 |
|
---|
70 | def get_header(self, header_name, default=None):
|
---|
71 | return self.headers.get(
|
---|
72 | header_name,
|
---|
73 | self.unredirected_hdrs.get(header_name, default))
|
---|
74 |
|
---|
75 | def header_items(self):
|
---|
76 | hdrs = self.unredirected_hdrs.copy()
|
---|
77 | hdrs.update(self.headers)
|
---|
78 | return hdrs.items()
|
---|
79 |
|
---|
80 | def __str__(self):
|
---|
81 | return "<Request for %s>" % self.get_full_url()
|
---|
82 |
|
---|
83 | def get_method(self):
|
---|
84 | if self.has_data():
|
---|
85 | return "POST"
|
---|
86 | else:
|
---|
87 | return "GET"
|
---|