source: py-scraping/mechanize/_request.py@ 201

Last change on this file since 201 was 106, checked in by Rick van der Zwet, 15 years ago

Initial commit...

File size: 3.2 KB
RevLine 
[106]1"""Integration with Python standard library module urllib2: Request class.
2
3Copyright 2004-2006 John J Lee <jjl@pobox.com>
4
5This code is free software; you can redistribute it and/or modify it
6under the terms of the BSD or ZPL 2.1 licenses (see the file
7COPYING.txt included with the distribution).
8
9"""
10
11import urllib2, urllib, logging
12
13from _clientcookie import request_host_lc
14import _rfc3986
15import _sockettimeout
16
17warn = logging.getLogger("mechanize").warning
18
19
20class Request(urllib2.Request):
21 def __init__(self, url, data=None, headers={},
22 origin_req_host=None, unverifiable=False, visit=None,
23 timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
24 # In mechanize 0.2, the interpretation of a unicode url argument will
25 # change: A unicode url argument will be interpreted as an IRI, and a
26 # bytestring as a URI. For now, we accept unicode or bytestring. We
27 # don't insist that the value is always a URI (specifically, must only
28 # contain characters which are legal), because that might break working
29 # code (who knows what bytes some servers want to see, especially with
30 # browser plugins for internationalised URIs).
31 if not _rfc3986.is_clean_uri(url):
32 warn("url argument is not a URI "
33 "(contains illegal characters) %r" % url)
34 urllib2.Request.__init__(self, url, data, headers)
35 self.selector = None
36 self.unredirected_hdrs = {}
37 self.visit = visit
38 self.timeout = timeout
39
40 # All the terminology below comes from RFC 2965.
41 self.unverifiable = unverifiable
42 # Set request-host of origin transaction.
43 # The origin request-host is needed in order to decide whether
44 # unverifiable sub-requests (automatic redirects, images embedded
45 # in HTML, etc.) are to third-party hosts. If they are, the
46 # resulting transactions might need to be conducted with cookies
47 # turned off.
48 if origin_req_host is None:
49 origin_req_host = request_host_lc(self)
50 self.origin_req_host = origin_req_host
51
52 def get_selector(self):
53 return urllib.splittag(self.__r_host)[0]
54
55 def get_origin_req_host(self):
56 return self.origin_req_host
57
58 def is_unverifiable(self):
59 return self.unverifiable
60
61 def add_unredirected_header(self, key, val):
62 """Add a header that will not be added to a redirected request."""
63 self.unredirected_hdrs[key.capitalize()] = val
64
65 def has_header(self, header_name):
66 """True iff request has named header (regular or unredirected)."""
67 return (header_name in self.headers or
68 header_name in self.unredirected_hdrs)
69
70 def get_header(self, header_name, default=None):
71 return self.headers.get(
72 header_name,
73 self.unredirected_hdrs.get(header_name, default))
74
75 def header_items(self):
76 hdrs = self.unredirected_hdrs.copy()
77 hdrs.update(self.headers)
78 return hdrs.items()
79
80 def __str__(self):
81 return "<Request for %s>" % self.get_full_url()
82
83 def get_method(self):
84 if self.has_data():
85 return "POST"
86 else:
87 return "GET"
Note: See TracBrowser for help on using the repository browser.