1 | """HTTP cookie handling for web clients.
|
---|
2 |
|
---|
3 | This module originally developed from my port of Gisle Aas' Perl module
|
---|
4 | HTTP::Cookies, from the libwww-perl library.
|
---|
5 |
|
---|
6 | Docstrings, comments and debug strings in this code refer to the
|
---|
7 | attributes of the HTTP cookie system as cookie-attributes, to distinguish
|
---|
8 | them clearly from Python attributes.
|
---|
9 |
|
---|
10 | CookieJar____
|
---|
11 | / \ \
|
---|
12 | FileCookieJar \ \
|
---|
13 | / | \ \ \
|
---|
14 | MozillaCookieJar | LWPCookieJar \ \
|
---|
15 | | | \
|
---|
16 | | ---MSIEBase | \
|
---|
17 | | / | | \
|
---|
18 | | / MSIEDBCookieJar BSDDBCookieJar
|
---|
19 | |/
|
---|
20 | MSIECookieJar
|
---|
21 |
|
---|
22 | Comments to John J Lee <jjl@pobox.com>.
|
---|
23 |
|
---|
24 |
|
---|
25 | Copyright 2002-2006 John J Lee <jjl@pobox.com>
|
---|
26 | Copyright 1997-1999 Gisle Aas (original libwww-perl code)
|
---|
27 | Copyright 2002-2003 Johnny Lee (original MSIE Perl code)
|
---|
28 |
|
---|
29 | This code is free software; you can redistribute it and/or modify it
|
---|
30 | under the terms of the BSD or ZPL 2.1 licenses (see the file
|
---|
31 | COPYING.txt included with the distribution).
|
---|
32 |
|
---|
33 | """
|
---|
34 |
|
---|
35 | import sys, re, copy, time, urllib, types, logging
|
---|
36 | try:
|
---|
37 | import threading
|
---|
38 | _threading = threading; del threading
|
---|
39 | except ImportError:
|
---|
40 | import dummy_threading
|
---|
41 | _threading = dummy_threading; del dummy_threading
|
---|
42 |
|
---|
43 | MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
|
---|
44 | "instance initialised with one)")
|
---|
45 | DEFAULT_HTTP_PORT = "80"
|
---|
46 |
|
---|
47 | from _headersutil import split_header_words, parse_ns_headers
|
---|
48 | from _util import isstringlike
|
---|
49 | import _rfc3986
|
---|
50 |
|
---|
51 | debug = logging.getLogger("mechanize.cookies").debug
|
---|
52 |
|
---|
53 |
|
---|
54 | def reraise_unmasked_exceptions(unmasked=()):
|
---|
55 | # There are a few catch-all except: statements in this module, for
|
---|
56 | # catching input that's bad in unexpected ways.
|
---|
57 | # This function re-raises some exceptions we don't want to trap.
|
---|
58 | import mechanize, warnings
|
---|
59 | if not mechanize.USE_BARE_EXCEPT:
|
---|
60 | raise
|
---|
61 | unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError)
|
---|
62 | etype = sys.exc_info()[0]
|
---|
63 | if issubclass(etype, unmasked):
|
---|
64 | raise
|
---|
65 | # swallowed an exception
|
---|
66 | import traceback, StringIO
|
---|
67 | f = StringIO.StringIO()
|
---|
68 | traceback.print_exc(None, f)
|
---|
69 | msg = f.getvalue()
|
---|
70 | warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2)
|
---|
71 |
|
---|
72 |
|
---|
73 | IPV4_RE = re.compile(r"\.\d+$")
|
---|
74 | def is_HDN(text):
|
---|
75 | """Return True if text is a host domain name."""
|
---|
76 | # XXX
|
---|
77 | # This may well be wrong. Which RFC is HDN defined in, if any (for
|
---|
78 | # the purposes of RFC 2965)?
|
---|
79 | # For the current implementation, what about IPv6? Remember to look
|
---|
80 | # at other uses of IPV4_RE also, if change this.
|
---|
81 | return not (IPV4_RE.search(text) or
|
---|
82 | text == "" or
|
---|
83 | text[0] == "." or text[-1] == ".")
|
---|
84 |
|
---|
85 | def domain_match(A, B):
|
---|
86 | """Return True if domain A domain-matches domain B, according to RFC 2965.
|
---|
87 |
|
---|
88 | A and B may be host domain names or IP addresses.
|
---|
89 |
|
---|
90 | RFC 2965, section 1:
|
---|
91 |
|
---|
92 | Host names can be specified either as an IP address or a HDN string.
|
---|
93 | Sometimes we compare one host name with another. (Such comparisons SHALL
|
---|
94 | be case-insensitive.) Host A's name domain-matches host B's if
|
---|
95 |
|
---|
96 | * their host name strings string-compare equal; or
|
---|
97 |
|
---|
98 | * A is a HDN string and has the form NB, where N is a non-empty
|
---|
99 | name string, B has the form .B', and B' is a HDN string. (So,
|
---|
100 | x.y.com domain-matches .Y.com but not Y.com.)
|
---|
101 |
|
---|
102 | Note that domain-match is not a commutative operation: a.b.c.com
|
---|
103 | domain-matches .c.com, but not the reverse.
|
---|
104 |
|
---|
105 | """
|
---|
106 | # Note that, if A or B are IP addresses, the only relevant part of the
|
---|
107 | # definition of the domain-match algorithm is the direct string-compare.
|
---|
108 | A = A.lower()
|
---|
109 | B = B.lower()
|
---|
110 | if A == B:
|
---|
111 | return True
|
---|
112 | if not is_HDN(A):
|
---|
113 | return False
|
---|
114 | i = A.rfind(B)
|
---|
115 | has_form_nb = not (i == -1 or i == 0)
|
---|
116 | return (
|
---|
117 | has_form_nb and
|
---|
118 | B.startswith(".") and
|
---|
119 | is_HDN(B[1:])
|
---|
120 | )
|
---|
121 |
|
---|
122 | def liberal_is_HDN(text):
|
---|
123 | """Return True if text is a sort-of-like a host domain name.
|
---|
124 |
|
---|
125 | For accepting/blocking domains.
|
---|
126 |
|
---|
127 | """
|
---|
128 | return not IPV4_RE.search(text)
|
---|
129 |
|
---|
130 | def user_domain_match(A, B):
|
---|
131 | """For blocking/accepting domains.
|
---|
132 |
|
---|
133 | A and B may be host domain names or IP addresses.
|
---|
134 |
|
---|
135 | """
|
---|
136 | A = A.lower()
|
---|
137 | B = B.lower()
|
---|
138 | if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
|
---|
139 | if A == B:
|
---|
140 | # equal IP addresses
|
---|
141 | return True
|
---|
142 | return False
|
---|
143 | initial_dot = B.startswith(".")
|
---|
144 | if initial_dot and A.endswith(B):
|
---|
145 | return True
|
---|
146 | if not initial_dot and A == B:
|
---|
147 | return True
|
---|
148 | return False
|
---|
149 |
|
---|
150 | cut_port_re = re.compile(r":\d+$")
|
---|
151 | def request_host(request):
|
---|
152 | """Return request-host, as defined by RFC 2965.
|
---|
153 |
|
---|
154 | Variation from RFC: returned value is lowercased, for convenient
|
---|
155 | comparison.
|
---|
156 |
|
---|
157 | """
|
---|
158 | url = request.get_full_url()
|
---|
159 | host = _rfc3986.urlsplit(url)[1]
|
---|
160 | if host is None:
|
---|
161 | host = request.get_header("Host", "")
|
---|
162 | # remove port, if present
|
---|
163 | return cut_port_re.sub("", host, 1)
|
---|
164 |
|
---|
165 | def request_host_lc(request):
|
---|
166 | return request_host(request).lower()
|
---|
167 |
|
---|
168 | def eff_request_host(request):
|
---|
169 | """Return a tuple (request-host, effective request-host name)."""
|
---|
170 | erhn = req_host = request_host(request)
|
---|
171 | if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
|
---|
172 | erhn = req_host + ".local"
|
---|
173 | return req_host, erhn
|
---|
174 |
|
---|
175 | def eff_request_host_lc(request):
|
---|
176 | req_host, erhn = eff_request_host(request)
|
---|
177 | return req_host.lower(), erhn.lower()
|
---|
178 |
|
---|
179 | def effective_request_host(request):
|
---|
180 | """Return the effective request-host, as defined by RFC 2965."""
|
---|
181 | return eff_request_host(request)[1]
|
---|
182 |
|
---|
183 | def request_path(request):
|
---|
184 | """request-URI, as defined by RFC 2965."""
|
---|
185 | url = request.get_full_url()
|
---|
186 | path, query, frag = _rfc3986.urlsplit(url)[2:]
|
---|
187 | path = escape_path(path)
|
---|
188 | req_path = _rfc3986.urlunsplit((None, None, path, query, frag))
|
---|
189 | if not req_path.startswith("/"):
|
---|
190 | req_path = "/" + req_path
|
---|
191 | return req_path
|
---|
192 |
|
---|
193 | def request_port(request):
|
---|
194 | host = request.get_host()
|
---|
195 | i = host.find(':')
|
---|
196 | if i >= 0:
|
---|
197 | port = host[i + 1:]
|
---|
198 | try:
|
---|
199 | int(port)
|
---|
200 | except ValueError:
|
---|
201 | debug("nonnumeric port: '%s'", port)
|
---|
202 | return None
|
---|
203 | else:
|
---|
204 | port = DEFAULT_HTTP_PORT
|
---|
205 | return port
|
---|
206 |
|
---|
207 | def request_is_unverifiable(request):
|
---|
208 | try:
|
---|
209 | return request.is_unverifiable()
|
---|
210 | except AttributeError:
|
---|
211 | if hasattr(request, "unverifiable"):
|
---|
212 | return request.unverifiable
|
---|
213 | else:
|
---|
214 | raise
|
---|
215 |
|
---|
216 | # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
|
---|
217 | # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
|
---|
218 | HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
|
---|
219 | ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
|
---|
220 | def uppercase_escaped_char(match):
|
---|
221 | return "%%%s" % match.group(1).upper()
|
---|
222 | def escape_path(path):
|
---|
223 | """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
|
---|
224 | # There's no knowing what character encoding was used to create URLs
|
---|
225 | # containing %-escapes, but since we have to pick one to escape invalid
|
---|
226 | # path characters, we pick UTF-8, as recommended in the HTML 4.0
|
---|
227 | # specification:
|
---|
228 | # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
|
---|
229 | # And here, kind of: draft-fielding-uri-rfc2396bis-03
|
---|
230 | # (And in draft IRI specification: draft-duerst-iri-05)
|
---|
231 | # (And here, for new URI schemes: RFC 2718)
|
---|
232 | if isinstance(path, types.UnicodeType):
|
---|
233 | path = path.encode("utf-8")
|
---|
234 | path = urllib.quote(path, HTTP_PATH_SAFE)
|
---|
235 | path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
|
---|
236 | return path
|
---|
237 |
|
---|
238 | def reach(h):
|
---|
239 | """Return reach of host h, as defined by RFC 2965, section 1.
|
---|
240 |
|
---|
241 | The reach R of a host name H is defined as follows:
|
---|
242 |
|
---|
243 | * If
|
---|
244 |
|
---|
245 | - H is the host domain name of a host; and,
|
---|
246 |
|
---|
247 | - H has the form A.B; and
|
---|
248 |
|
---|
249 | - A has no embedded (that is, interior) dots; and
|
---|
250 |
|
---|
251 | - B has at least one embedded dot, or B is the string "local".
|
---|
252 | then the reach of H is .B.
|
---|
253 |
|
---|
254 | * Otherwise, the reach of H is H.
|
---|
255 |
|
---|
256 | >>> reach("www.acme.com")
|
---|
257 | '.acme.com'
|
---|
258 | >>> reach("acme.com")
|
---|
259 | 'acme.com'
|
---|
260 | >>> reach("acme.local")
|
---|
261 | '.local'
|
---|
262 |
|
---|
263 | """
|
---|
264 | i = h.find(".")
|
---|
265 | if i >= 0:
|
---|
266 | #a = h[:i] # this line is only here to show what a is
|
---|
267 | b = h[i + 1:]
|
---|
268 | i = b.find(".")
|
---|
269 | if is_HDN(h) and (i >= 0 or b == "local"):
|
---|
270 | return "." + b
|
---|
271 | return h
|
---|
272 |
|
---|
273 | def is_third_party(request):
|
---|
274 | """
|
---|
275 |
|
---|
276 | RFC 2965, section 3.3.6:
|
---|
277 |
|
---|
278 | An unverifiable transaction is to a third-party host if its request-
|
---|
279 | host U does not domain-match the reach R of the request-host O in the
|
---|
280 | origin transaction.
|
---|
281 |
|
---|
282 | """
|
---|
283 | req_host = request_host_lc(request)
|
---|
284 | # the origin request's request-host was stuffed into request by
|
---|
285 | # _urllib2_support.AbstractHTTPHandler
|
---|
286 | return not domain_match(req_host, reach(request.origin_req_host))
|
---|
287 |
|
---|
288 |
|
---|
289 | class Cookie:
|
---|
290 | """HTTP Cookie.
|
---|
291 |
|
---|
292 | This class represents both Netscape and RFC 2965 cookies.
|
---|
293 |
|
---|
294 | This is deliberately a very simple class. It just holds attributes. It's
|
---|
295 | possible to construct Cookie instances that don't comply with the cookie
|
---|
296 | standards. CookieJar.make_cookies is the factory function for Cookie
|
---|
297 | objects -- it deals with cookie parsing, supplying defaults, and
|
---|
298 | normalising to the representation used in this class. CookiePolicy is
|
---|
299 | responsible for checking them to see whether they should be accepted from
|
---|
300 | and returned to the server.
|
---|
301 |
|
---|
302 | version: integer;
|
---|
303 | name: string;
|
---|
304 | value: string (may be None);
|
---|
305 | port: string; None indicates no attribute was supplied (eg. "Port", rather
|
---|
306 | than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list
|
---|
307 | string (eg. "80,8080")
|
---|
308 | port_specified: boolean; true if a value was supplied with the Port
|
---|
309 | cookie-attribute
|
---|
310 | domain: string;
|
---|
311 | domain_specified: boolean; true if Domain was explicitly set
|
---|
312 | domain_initial_dot: boolean; true if Domain as set in HTTP header by server
|
---|
313 | started with a dot (yes, this really is necessary!)
|
---|
314 | path: string;
|
---|
315 | path_specified: boolean; true if Path was explicitly set
|
---|
316 | secure: boolean; true if should only be returned over secure connection
|
---|
317 | expires: integer; seconds since epoch (RFC 2965 cookies should calculate
|
---|
318 | this value from the Max-Age attribute)
|
---|
319 | discard: boolean, true if this is a session cookie; (if no expires value,
|
---|
320 | this should be true)
|
---|
321 | comment: string;
|
---|
322 | comment_url: string;
|
---|
323 | rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not
|
---|
324 | Set-Cookie2:) header, but had a version cookie-attribute of 1
|
---|
325 | rest: mapping of other cookie-attributes
|
---|
326 |
|
---|
327 | Note that the port may be present in the headers, but unspecified ("Port"
|
---|
328 | rather than"Port=80", for example); if this is the case, port is None.
|
---|
329 |
|
---|
330 | """
|
---|
331 |
|
---|
332 | def __init__(self, version, name, value,
|
---|
333 | port, port_specified,
|
---|
334 | domain, domain_specified, domain_initial_dot,
|
---|
335 | path, path_specified,
|
---|
336 | secure,
|
---|
337 | expires,
|
---|
338 | discard,
|
---|
339 | comment,
|
---|
340 | comment_url,
|
---|
341 | rest,
|
---|
342 | rfc2109=False,
|
---|
343 | ):
|
---|
344 |
|
---|
345 | if version is not None: version = int(version)
|
---|
346 | if expires is not None: expires = int(expires)
|
---|
347 | if port is None and port_specified is True:
|
---|
348 | raise ValueError("if port is None, port_specified must be false")
|
---|
349 |
|
---|
350 | self.version = version
|
---|
351 | self.name = name
|
---|
352 | self.value = value
|
---|
353 | self.port = port
|
---|
354 | self.port_specified = port_specified
|
---|
355 | # normalise case, as per RFC 2965 section 3.3.3
|
---|
356 | self.domain = domain.lower()
|
---|
357 | self.domain_specified = domain_specified
|
---|
358 | # Sigh. We need to know whether the domain given in the
|
---|
359 | # cookie-attribute had an initial dot, in order to follow RFC 2965
|
---|
360 | # (as clarified in draft errata). Needed for the returned $Domain
|
---|
361 | # value.
|
---|
362 | self.domain_initial_dot = domain_initial_dot
|
---|
363 | self.path = path
|
---|
364 | self.path_specified = path_specified
|
---|
365 | self.secure = secure
|
---|
366 | self.expires = expires
|
---|
367 | self.discard = discard
|
---|
368 | self.comment = comment
|
---|
369 | self.comment_url = comment_url
|
---|
370 | self.rfc2109 = rfc2109
|
---|
371 |
|
---|
372 | self._rest = copy.copy(rest)
|
---|
373 |
|
---|
374 | def has_nonstandard_attr(self, name):
|
---|
375 | return self._rest.has_key(name)
|
---|
376 | def get_nonstandard_attr(self, name, default=None):
|
---|
377 | return self._rest.get(name, default)
|
---|
378 | def set_nonstandard_attr(self, name, value):
|
---|
379 | self._rest[name] = value
|
---|
380 | def nonstandard_attr_keys(self):
|
---|
381 | return self._rest.keys()
|
---|
382 |
|
---|
383 | def is_expired(self, now=None):
|
---|
384 | if now is None: now = time.time()
|
---|
385 | return (self.expires is not None) and (self.expires <= now)
|
---|
386 |
|
---|
387 | def __str__(self):
|
---|
388 | if self.port is None: p = ""
|
---|
389 | else: p = ":" + self.port
|
---|
390 | limit = self.domain + p + self.path
|
---|
391 | if self.value is not None:
|
---|
392 | namevalue = "%s=%s" % (self.name, self.value)
|
---|
393 | else:
|
---|
394 | namevalue = self.name
|
---|
395 | return "<Cookie %s for %s>" % (namevalue, limit)
|
---|
396 |
|
---|
397 | def __repr__(self):
|
---|
398 | args = []
|
---|
399 | for name in ["version", "name", "value",
|
---|
400 | "port", "port_specified",
|
---|
401 | "domain", "domain_specified", "domain_initial_dot",
|
---|
402 | "path", "path_specified",
|
---|
403 | "secure", "expires", "discard", "comment", "comment_url",
|
---|
404 | ]:
|
---|
405 | attr = getattr(self, name)
|
---|
406 | args.append("%s=%s" % (name, repr(attr)))
|
---|
407 | args.append("rest=%s" % repr(self._rest))
|
---|
408 | args.append("rfc2109=%s" % repr(self.rfc2109))
|
---|
409 | return "Cookie(%s)" % ", ".join(args)
|
---|
410 |
|
---|
411 |
|
---|
412 | class CookiePolicy:
|
---|
413 | """Defines which cookies get accepted from and returned to server.
|
---|
414 |
|
---|
415 | May also modify cookies.
|
---|
416 |
|
---|
417 | The subclass DefaultCookiePolicy defines the standard rules for Netscape
|
---|
418 | and RFC 2965 cookies -- override that if you want a customised policy.
|
---|
419 |
|
---|
420 | As well as implementing set_ok and return_ok, implementations of this
|
---|
421 | interface must also supply the following attributes, indicating which
|
---|
422 | protocols should be used, and how. These can be read and set at any time,
|
---|
423 | though whether that makes complete sense from the protocol point of view is
|
---|
424 | doubtful.
|
---|
425 |
|
---|
426 | Public attributes:
|
---|
427 |
|
---|
428 | netscape: implement netscape protocol
|
---|
429 | rfc2965: implement RFC 2965 protocol
|
---|
430 | rfc2109_as_netscape:
|
---|
431 | WARNING: This argument will change or go away if is not accepted into
|
---|
432 | the Python standard library in this form!
|
---|
433 | If true, treat RFC 2109 cookies as though they were Netscape cookies. The
|
---|
434 | default is for this attribute to be None, which means treat 2109 cookies
|
---|
435 | as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is,
|
---|
436 | by default), and as Netscape cookies otherwise.
|
---|
437 | hide_cookie2: don't add Cookie2 header to requests (the presence of
|
---|
438 | this header indicates to the server that we understand RFC 2965
|
---|
439 | cookies)
|
---|
440 |
|
---|
441 | """
|
---|
442 | def set_ok(self, cookie, request):
|
---|
443 | """Return true if (and only if) cookie should be accepted from server.
|
---|
444 |
|
---|
445 | Currently, pre-expired cookies never get this far -- the CookieJar
|
---|
446 | class deletes such cookies itself.
|
---|
447 |
|
---|
448 | cookie: mechanize.Cookie object
|
---|
449 | request: object implementing the interface defined by
|
---|
450 | CookieJar.extract_cookies.__doc__
|
---|
451 |
|
---|
452 | """
|
---|
453 | raise NotImplementedError()
|
---|
454 |
|
---|
455 | def return_ok(self, cookie, request):
|
---|
456 | """Return true if (and only if) cookie should be returned to server.
|
---|
457 |
|
---|
458 | cookie: mechanize.Cookie object
|
---|
459 | request: object implementing the interface defined by
|
---|
460 | CookieJar.add_cookie_header.__doc__
|
---|
461 |
|
---|
462 | """
|
---|
463 | raise NotImplementedError()
|
---|
464 |
|
---|
465 | def domain_return_ok(self, domain, request):
|
---|
466 | """Return false if cookies should not be returned, given cookie domain.
|
---|
467 |
|
---|
468 | This is here as an optimization, to remove the need for checking every
|
---|
469 | cookie with a particular domain (which may involve reading many files).
|
---|
470 | The default implementations of domain_return_ok and path_return_ok
|
---|
471 | (return True) leave all the work to return_ok.
|
---|
472 |
|
---|
473 | If domain_return_ok returns true for the cookie domain, path_return_ok
|
---|
474 | is called for the cookie path. Otherwise, path_return_ok and return_ok
|
---|
475 | are never called for that cookie domain. If path_return_ok returns
|
---|
476 | true, return_ok is called with the Cookie object itself for a full
|
---|
477 | check. Otherwise, return_ok is never called for that cookie path.
|
---|
478 |
|
---|
479 | Note that domain_return_ok is called for every *cookie* domain, not
|
---|
480 | just for the *request* domain. For example, the function might be
|
---|
481 | called with both ".acme.com" and "www.acme.com" if the request domain
|
---|
482 | is "www.acme.com". The same goes for path_return_ok.
|
---|
483 |
|
---|
484 | For argument documentation, see the docstring for return_ok.
|
---|
485 |
|
---|
486 | """
|
---|
487 | return True
|
---|
488 |
|
---|
489 | def path_return_ok(self, path, request):
|
---|
490 | """Return false if cookies should not be returned, given cookie path.
|
---|
491 |
|
---|
492 | See the docstring for domain_return_ok.
|
---|
493 |
|
---|
494 | """
|
---|
495 | return True
|
---|
496 |
|
---|
497 |
|
---|
498 | class DefaultCookiePolicy(CookiePolicy):
|
---|
499 | """Implements the standard rules for accepting and returning cookies.
|
---|
500 |
|
---|
501 | Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is
|
---|
502 | switched off by default.
|
---|
503 |
|
---|
504 | The easiest way to provide your own policy is to override this class and
|
---|
505 | call its methods in your overriden implementations before adding your own
|
---|
506 | additional checks.
|
---|
507 |
|
---|
508 | import mechanize
|
---|
509 | class MyCookiePolicy(mechanize.DefaultCookiePolicy):
|
---|
510 | def set_ok(self, cookie, request):
|
---|
511 | if not mechanize.DefaultCookiePolicy.set_ok(
|
---|
512 | self, cookie, request):
|
---|
513 | return False
|
---|
514 | if i_dont_want_to_store_this_cookie():
|
---|
515 | return False
|
---|
516 | return True
|
---|
517 |
|
---|
518 | In addition to the features required to implement the CookiePolicy
|
---|
519 | interface, this class allows you to block and allow domains from setting
|
---|
520 | and receiving cookies. There are also some strictness switches that allow
|
---|
521 | you to tighten up the rather loose Netscape protocol rules a little bit (at
|
---|
522 | the cost of blocking some benign cookies).
|
---|
523 |
|
---|
524 | A domain blacklist and whitelist is provided (both off by default). Only
|
---|
525 | domains not in the blacklist and present in the whitelist (if the whitelist
|
---|
526 | is active) participate in cookie setting and returning. Use the
|
---|
527 | blocked_domains constructor argument, and blocked_domains and
|
---|
528 | set_blocked_domains methods (and the corresponding argument and methods for
|
---|
529 | allowed_domains). If you set a whitelist, you can turn it off again by
|
---|
530 | setting it to None.
|
---|
531 |
|
---|
532 | Domains in block or allow lists that do not start with a dot must
|
---|
533 | string-compare equal. For example, "acme.com" matches a blacklist entry of
|
---|
534 | "acme.com", but "www.acme.com" does not. Domains that do start with a dot
|
---|
535 | are matched by more specific domains too. For example, both "www.acme.com"
|
---|
536 | and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does
|
---|
537 | not). IP addresses are an exception, and must match exactly. For example,
|
---|
538 | if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is
|
---|
539 | blocked, but 193.168.1.2 is not.
|
---|
540 |
|
---|
541 | Additional Public Attributes:
|
---|
542 |
|
---|
543 | General strictness switches
|
---|
544 |
|
---|
545 | strict_domain: don't allow sites to set two-component domains with
|
---|
546 | country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc.
|
---|
547 | This is far from perfect and isn't guaranteed to work!
|
---|
548 |
|
---|
549 | RFC 2965 protocol strictness switches
|
---|
550 |
|
---|
551 | strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable
|
---|
552 | transactions (usually, an unverifiable transaction is one resulting from
|
---|
553 | a redirect or an image hosted on another site); if this is false, cookies
|
---|
554 | are NEVER blocked on the basis of verifiability
|
---|
555 |
|
---|
556 | Netscape protocol strictness switches
|
---|
557 |
|
---|
558 | strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions
|
---|
559 | even to Netscape cookies
|
---|
560 | strict_ns_domain: flags indicating how strict to be with domain-matching
|
---|
561 | rules for Netscape cookies:
|
---|
562 | DomainStrictNoDots: when setting cookies, host prefix must not contain a
|
---|
563 | dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because
|
---|
564 | www.foo contains a dot)
|
---|
565 | DomainStrictNonDomain: cookies that did not explicitly specify a Domain
|
---|
566 | cookie-attribute can only be returned to a domain that string-compares
|
---|
567 | equal to the domain that set the cookie (eg. rockets.acme.com won't
|
---|
568 | be returned cookies from acme.com that had no Domain cookie-attribute)
|
---|
569 | DomainRFC2965Match: when setting cookies, require a full RFC 2965
|
---|
570 | domain-match
|
---|
571 | DomainLiberal and DomainStrict are the most useful combinations of the
|
---|
572 | above flags, for convenience
|
---|
573 | strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that
|
---|
574 | have names starting with '$'
|
---|
575 | strict_ns_set_path: don't allow setting cookies whose path doesn't
|
---|
576 | path-match request URI
|
---|
577 |
|
---|
578 | """
|
---|
579 |
|
---|
580 | DomainStrictNoDots = 1
|
---|
581 | DomainStrictNonDomain = 2
|
---|
582 | DomainRFC2965Match = 4
|
---|
583 |
|
---|
584 | DomainLiberal = 0
|
---|
585 | DomainStrict = DomainStrictNoDots | DomainStrictNonDomain
|
---|
586 |
|
---|
587 | def __init__(self,
|
---|
588 | blocked_domains=None, allowed_domains=None,
|
---|
589 | netscape=True, rfc2965=False,
|
---|
590 | # WARNING: this argument will change or go away if is not
|
---|
591 | # accepted into the Python standard library in this form!
|
---|
592 | # default, ie. treat 2109 as netscape iff not rfc2965
|
---|
593 | rfc2109_as_netscape=None,
|
---|
594 | hide_cookie2=False,
|
---|
595 | strict_domain=False,
|
---|
596 | strict_rfc2965_unverifiable=True,
|
---|
597 | strict_ns_unverifiable=False,
|
---|
598 | strict_ns_domain=DomainLiberal,
|
---|
599 | strict_ns_set_initial_dollar=False,
|
---|
600 | strict_ns_set_path=False,
|
---|
601 | ):
|
---|
602 | """
|
---|
603 | Constructor arguments should be used as keyword arguments only.
|
---|
604 |
|
---|
605 | blocked_domains: sequence of domain names that we never accept cookies
|
---|
606 | from, nor return cookies to
|
---|
607 | allowed_domains: if not None, this is a sequence of the only domains
|
---|
608 | for which we accept and return cookies
|
---|
609 |
|
---|
610 | For other arguments, see CookiePolicy.__doc__ and
|
---|
611 | DefaultCookiePolicy.__doc__..
|
---|
612 |
|
---|
613 | """
|
---|
614 | self.netscape = netscape
|
---|
615 | self.rfc2965 = rfc2965
|
---|
616 | self.rfc2109_as_netscape = rfc2109_as_netscape
|
---|
617 | self.hide_cookie2 = hide_cookie2
|
---|
618 | self.strict_domain = strict_domain
|
---|
619 | self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
|
---|
620 | self.strict_ns_unverifiable = strict_ns_unverifiable
|
---|
621 | self.strict_ns_domain = strict_ns_domain
|
---|
622 | self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
|
---|
623 | self.strict_ns_set_path = strict_ns_set_path
|
---|
624 |
|
---|
625 | if blocked_domains is not None:
|
---|
626 | self._blocked_domains = tuple(blocked_domains)
|
---|
627 | else:
|
---|
628 | self._blocked_domains = ()
|
---|
629 |
|
---|
630 | if allowed_domains is not None:
|
---|
631 | allowed_domains = tuple(allowed_domains)
|
---|
632 | self._allowed_domains = allowed_domains
|
---|
633 |
|
---|
634 | def blocked_domains(self):
|
---|
635 | """Return the sequence of blocked domains (as a tuple)."""
|
---|
636 | return self._blocked_domains
|
---|
637 | def set_blocked_domains(self, blocked_domains):
|
---|
638 | """Set the sequence of blocked domains."""
|
---|
639 | self._blocked_domains = tuple(blocked_domains)
|
---|
640 |
|
---|
641 | def is_blocked(self, domain):
|
---|
642 | for blocked_domain in self._blocked_domains:
|
---|
643 | if user_domain_match(domain, blocked_domain):
|
---|
644 | return True
|
---|
645 | return False
|
---|
646 |
|
---|
647 | def allowed_domains(self):
|
---|
648 | """Return None, or the sequence of allowed domains (as a tuple)."""
|
---|
649 | return self._allowed_domains
|
---|
650 | def set_allowed_domains(self, allowed_domains):
|
---|
651 | """Set the sequence of allowed domains, or None."""
|
---|
652 | if allowed_domains is not None:
|
---|
653 | allowed_domains = tuple(allowed_domains)
|
---|
654 | self._allowed_domains = allowed_domains
|
---|
655 |
|
---|
656 | def is_not_allowed(self, domain):
|
---|
657 | if self._allowed_domains is None:
|
---|
658 | return False
|
---|
659 | for allowed_domain in self._allowed_domains:
|
---|
660 | if user_domain_match(domain, allowed_domain):
|
---|
661 | return False
|
---|
662 | return True
|
---|
663 |
|
---|
664 | def set_ok(self, cookie, request):
|
---|
665 | """
|
---|
666 | If you override set_ok, be sure to call this method. If it returns
|
---|
667 | false, so should your subclass (assuming your subclass wants to be more
|
---|
668 | strict about which cookies to accept).
|
---|
669 |
|
---|
670 | """
|
---|
671 | debug(" - checking cookie %s", cookie)
|
---|
672 |
|
---|
673 | assert cookie.name is not None
|
---|
674 |
|
---|
675 | for n in "version", "verifiability", "name", "path", "domain", "port":
|
---|
676 | fn_name = "set_ok_" + n
|
---|
677 | fn = getattr(self, fn_name)
|
---|
678 | if not fn(cookie, request):
|
---|
679 | return False
|
---|
680 |
|
---|
681 | return True
|
---|
682 |
|
---|
683 | def set_ok_version(self, cookie, request):
|
---|
684 | if cookie.version is None:
|
---|
685 | # Version is always set to 0 by parse_ns_headers if it's a Netscape
|
---|
686 | # cookie, so this must be an invalid RFC 2965 cookie.
|
---|
687 | debug(" Set-Cookie2 without version attribute (%s)", cookie)
|
---|
688 | return False
|
---|
689 | if cookie.version > 0 and not self.rfc2965:
|
---|
690 | debug(" RFC 2965 cookies are switched off")
|
---|
691 | return False
|
---|
692 | elif cookie.version == 0 and not self.netscape:
|
---|
693 | debug(" Netscape cookies are switched off")
|
---|
694 | return False
|
---|
695 | return True
|
---|
696 |
|
---|
697 | def set_ok_verifiability(self, cookie, request):
|
---|
698 | if request_is_unverifiable(request) and is_third_party(request):
|
---|
699 | if cookie.version > 0 and self.strict_rfc2965_unverifiable:
|
---|
700 | debug(" third-party RFC 2965 cookie during "
|
---|
701 | "unverifiable transaction")
|
---|
702 | return False
|
---|
703 | elif cookie.version == 0 and self.strict_ns_unverifiable:
|
---|
704 | debug(" third-party Netscape cookie during "
|
---|
705 | "unverifiable transaction")
|
---|
706 | return False
|
---|
707 | return True
|
---|
708 |
|
---|
709 | def set_ok_name(self, cookie, request):
|
---|
710 | # Try and stop servers setting V0 cookies designed to hack other
|
---|
711 | # servers that know both V0 and V1 protocols.
|
---|
712 | if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
|
---|
713 | cookie.name.startswith("$")):
|
---|
714 | debug(" illegal name (starts with '$'): '%s'", cookie.name)
|
---|
715 | return False
|
---|
716 | return True
|
---|
717 |
|
---|
718 | def set_ok_path(self, cookie, request):
|
---|
719 | if cookie.path_specified:
|
---|
720 | req_path = request_path(request)
|
---|
721 | if ((cookie.version > 0 or
|
---|
722 | (cookie.version == 0 and self.strict_ns_set_path)) and
|
---|
723 | not req_path.startswith(cookie.path)):
|
---|
724 | debug(" path attribute %s is not a prefix of request "
|
---|
725 | "path %s", cookie.path, req_path)
|
---|
726 | return False
|
---|
727 | return True
|
---|
728 |
|
---|
729 | def set_ok_countrycode_domain(self, cookie, request):
|
---|
730 | """Return False if explicit cookie domain is not acceptable.
|
---|
731 |
|
---|
732 | Called by set_ok_domain, for convenience of overriding by
|
---|
733 | subclasses.
|
---|
734 |
|
---|
735 | """
|
---|
736 | if cookie.domain_specified and self.strict_domain:
|
---|
737 | domain = cookie.domain
|
---|
738 | # since domain was specified, we know that:
|
---|
739 | assert domain.startswith(".")
|
---|
740 | if domain.count(".") == 2:
|
---|
741 | # domain like .foo.bar
|
---|
742 | i = domain.rfind(".")
|
---|
743 | tld = domain[i + 1:]
|
---|
744 | sld = domain[1:i]
|
---|
745 | if (sld.lower() in [
|
---|
746 | "co", "ac",
|
---|
747 | "com", "edu", "org", "net", "gov", "mil", "int",
|
---|
748 | "aero", "biz", "cat", "coop", "info", "jobs", "mobi",
|
---|
749 | "museum", "name", "pro", "travel",
|
---|
750 | ] and
|
---|
751 | len(tld) == 2):
|
---|
752 | # domain like .co.uk
|
---|
753 | return False
|
---|
754 | return True
|
---|
755 |
|
---|
756 | def set_ok_domain(self, cookie, request):
|
---|
757 | if self.is_blocked(cookie.domain):
|
---|
758 | debug(" domain %s is in user block-list", cookie.domain)
|
---|
759 | return False
|
---|
760 | if self.is_not_allowed(cookie.domain):
|
---|
761 | debug(" domain %s is not in user allow-list", cookie.domain)
|
---|
762 | return False
|
---|
763 | if not self.set_ok_countrycode_domain(cookie, request):
|
---|
764 | debug(" country-code second level domain %s", cookie.domain)
|
---|
765 | return False
|
---|
766 | if cookie.domain_specified:
|
---|
767 | req_host, erhn = eff_request_host_lc(request)
|
---|
768 | domain = cookie.domain
|
---|
769 | if domain.startswith("."):
|
---|
770 | undotted_domain = domain[1:]
|
---|
771 | else:
|
---|
772 | undotted_domain = domain
|
---|
773 | embedded_dots = (undotted_domain.find(".") >= 0)
|
---|
774 | if not embedded_dots and domain != ".local":
|
---|
775 | debug(" non-local domain %s contains no embedded dot",
|
---|
776 | domain)
|
---|
777 | return False
|
---|
778 | if cookie.version == 0:
|
---|
779 | if (not erhn.endswith(domain) and
|
---|
780 | (not erhn.startswith(".") and
|
---|
781 | not ("." + erhn).endswith(domain))):
|
---|
782 | debug(" effective request-host %s (even with added "
|
---|
783 | "initial dot) does not end end with %s",
|
---|
784 | erhn, domain)
|
---|
785 | return False
|
---|
786 | if (cookie.version > 0 or
|
---|
787 | (self.strict_ns_domain & self.DomainRFC2965Match)):
|
---|
788 | if not domain_match(erhn, domain):
|
---|
789 | debug(" effective request-host %s does not domain-match "
|
---|
790 | "%s", erhn, domain)
|
---|
791 | return False
|
---|
792 | if (cookie.version > 0 or
|
---|
793 | (self.strict_ns_domain & self.DomainStrictNoDots)):
|
---|
794 | host_prefix = req_host[:-len(domain)]
|
---|
795 | if (host_prefix.find(".") >= 0 and
|
---|
796 | not IPV4_RE.search(req_host)):
|
---|
797 | debug(" host prefix %s for domain %s contains a dot",
|
---|
798 | host_prefix, domain)
|
---|
799 | return False
|
---|
800 | return True
|
---|
801 |
|
---|
802 | def set_ok_port(self, cookie, request):
|
---|
803 | if cookie.port_specified:
|
---|
804 | req_port = request_port(request)
|
---|
805 | if req_port is None:
|
---|
806 | req_port = "80"
|
---|
807 | else:
|
---|
808 | req_port = str(req_port)
|
---|
809 | for p in cookie.port.split(","):
|
---|
810 | try:
|
---|
811 | int(p)
|
---|
812 | except ValueError:
|
---|
813 | debug(" bad port %s (not numeric)", p)
|
---|
814 | return False
|
---|
815 | if p == req_port:
|
---|
816 | break
|
---|
817 | else:
|
---|
818 | debug(" request port (%s) not found in %s",
|
---|
819 | req_port, cookie.port)
|
---|
820 | return False
|
---|
821 | return True
|
---|
822 |
|
---|
823 | def return_ok(self, cookie, request):
|
---|
824 | """
|
---|
825 | If you override return_ok, be sure to call this method. If it returns
|
---|
826 | false, so should your subclass (assuming your subclass wants to be more
|
---|
827 | strict about which cookies to return).
|
---|
828 |
|
---|
829 | """
|
---|
830 | # Path has already been checked by path_return_ok, and domain blocking
|
---|
831 | # done by domain_return_ok.
|
---|
832 | debug(" - checking cookie %s", cookie)
|
---|
833 |
|
---|
834 | for n in ("version", "verifiability", "secure", "expires", "port",
|
---|
835 | "domain"):
|
---|
836 | fn_name = "return_ok_" + n
|
---|
837 | fn = getattr(self, fn_name)
|
---|
838 | if not fn(cookie, request):
|
---|
839 | return False
|
---|
840 | return True
|
---|
841 |
|
---|
842 | def return_ok_version(self, cookie, request):
|
---|
843 | if cookie.version > 0 and not self.rfc2965:
|
---|
844 | debug(" RFC 2965 cookies are switched off")
|
---|
845 | return False
|
---|
846 | elif cookie.version == 0 and not self.netscape:
|
---|
847 | debug(" Netscape cookies are switched off")
|
---|
848 | return False
|
---|
849 | return True
|
---|
850 |
|
---|
851 | def return_ok_verifiability(self, cookie, request):
|
---|
852 | if request_is_unverifiable(request) and is_third_party(request):
|
---|
853 | if cookie.version > 0 and self.strict_rfc2965_unverifiable:
|
---|
854 | debug(" third-party RFC 2965 cookie during unverifiable "
|
---|
855 | "transaction")
|
---|
856 | return False
|
---|
857 | elif cookie.version == 0 and self.strict_ns_unverifiable:
|
---|
858 | debug(" third-party Netscape cookie during unverifiable "
|
---|
859 | "transaction")
|
---|
860 | return False
|
---|
861 | return True
|
---|
862 |
|
---|
863 | def return_ok_secure(self, cookie, request):
|
---|
864 | if cookie.secure and request.get_type() != "https":
|
---|
865 | debug(" secure cookie with non-secure request")
|
---|
866 | return False
|
---|
867 | return True
|
---|
868 |
|
---|
869 | def return_ok_expires(self, cookie, request):
|
---|
870 | if cookie.is_expired(self._now):
|
---|
871 | debug(" cookie expired")
|
---|
872 | return False
|
---|
873 | return True
|
---|
874 |
|
---|
875 | def return_ok_port(self, cookie, request):
|
---|
876 | if cookie.port:
|
---|
877 | req_port = request_port(request)
|
---|
878 | if req_port is None:
|
---|
879 | req_port = "80"
|
---|
880 | for p in cookie.port.split(","):
|
---|
881 | if p == req_port:
|
---|
882 | break
|
---|
883 | else:
|
---|
884 | debug(" request port %s does not match cookie port %s",
|
---|
885 | req_port, cookie.port)
|
---|
886 | return False
|
---|
887 | return True
|
---|
888 |
|
---|
889 | def return_ok_domain(self, cookie, request):
|
---|
890 | req_host, erhn = eff_request_host_lc(request)
|
---|
891 | domain = cookie.domain
|
---|
892 |
|
---|
893 | # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
|
---|
894 | if (cookie.version == 0 and
|
---|
895 | (self.strict_ns_domain & self.DomainStrictNonDomain) and
|
---|
896 | not cookie.domain_specified and domain != erhn):
|
---|
897 | debug(" cookie with unspecified domain does not string-compare "
|
---|
898 | "equal to request domain")
|
---|
899 | return False
|
---|
900 |
|
---|
901 | if cookie.version > 0 and not domain_match(erhn, domain):
|
---|
902 | debug(" effective request-host name %s does not domain-match "
|
---|
903 | "RFC 2965 cookie domain %s", erhn, domain)
|
---|
904 | return False
|
---|
905 | if cookie.version == 0 and not ("." + erhn).endswith(domain):
|
---|
906 | debug(" request-host %s does not match Netscape cookie domain "
|
---|
907 | "%s", req_host, domain)
|
---|
908 | return False
|
---|
909 | return True
|
---|
910 |
|
---|
911 | def domain_return_ok(self, domain, request):
|
---|
912 | # Liberal check of domain. This is here as an optimization to avoid
|
---|
913 | # having to load lots of MSIE cookie files unless necessary.
|
---|
914 |
|
---|
915 | # Munge req_host and erhn to always start with a dot, so as to err on
|
---|
916 | # the side of letting cookies through.
|
---|
917 | dotted_req_host, dotted_erhn = eff_request_host_lc(request)
|
---|
918 | if not dotted_req_host.startswith("."):
|
---|
919 | dotted_req_host = "." + dotted_req_host
|
---|
920 | if not dotted_erhn.startswith("."):
|
---|
921 | dotted_erhn = "." + dotted_erhn
|
---|
922 | if not (dotted_req_host.endswith(domain) or
|
---|
923 | dotted_erhn.endswith(domain)):
|
---|
924 | #debug(" request domain %s does not match cookie domain %s",
|
---|
925 | # req_host, domain)
|
---|
926 | return False
|
---|
927 |
|
---|
928 | if self.is_blocked(domain):
|
---|
929 | debug(" domain %s is in user block-list", domain)
|
---|
930 | return False
|
---|
931 | if self.is_not_allowed(domain):
|
---|
932 | debug(" domain %s is not in user allow-list", domain)
|
---|
933 | return False
|
---|
934 |
|
---|
935 | return True
|
---|
936 |
|
---|
937 | def path_return_ok(self, path, request):
|
---|
938 | debug("- checking cookie path=%s", path)
|
---|
939 | req_path = request_path(request)
|
---|
940 | if not req_path.startswith(path):
|
---|
941 | debug(" %s does not path-match %s", req_path, path)
|
---|
942 | return False
|
---|
943 | return True
|
---|
944 |
|
---|
945 |
|
---|
946 | def vals_sorted_by_key(adict):
|
---|
947 | keys = adict.keys()
|
---|
948 | keys.sort()
|
---|
949 | return map(adict.get, keys)
|
---|
950 |
|
---|
951 | class MappingIterator:
|
---|
952 | """Iterates over nested mapping, depth-first, in sorted order by key."""
|
---|
953 | def __init__(self, mapping):
|
---|
954 | self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack
|
---|
955 |
|
---|
956 | def __iter__(self): return self
|
---|
957 |
|
---|
958 | def next(self):
|
---|
959 | # this is hairy because of lack of generators
|
---|
960 | while 1:
|
---|
961 | try:
|
---|
962 | vals, i, prev_item = self._s.pop()
|
---|
963 | except IndexError:
|
---|
964 | raise StopIteration()
|
---|
965 | if i < len(vals):
|
---|
966 | item = vals[i]
|
---|
967 | i = i + 1
|
---|
968 | self._s.append((vals, i, prev_item))
|
---|
969 | try:
|
---|
970 | item.items
|
---|
971 | except AttributeError:
|
---|
972 | # non-mapping
|
---|
973 | break
|
---|
974 | else:
|
---|
975 | # mapping
|
---|
976 | self._s.append((vals_sorted_by_key(item), 0, item))
|
---|
977 | continue
|
---|
978 | return item
|
---|
979 |
|
---|
980 |
|
---|
981 | # Used as second parameter to dict.get method, to distinguish absent
|
---|
982 | # dict key from one with a None value.
|
---|
983 | class Absent: pass
|
---|
984 |
|
---|
985 | class CookieJar:
|
---|
986 | """Collection of HTTP cookies.
|
---|
987 |
|
---|
988 | You may not need to know about this class: try mechanize.urlopen().
|
---|
989 |
|
---|
990 | The major methods are extract_cookies and add_cookie_header; these are all
|
---|
991 | you are likely to need.
|
---|
992 |
|
---|
993 | CookieJar supports the iterator protocol:
|
---|
994 |
|
---|
995 | for cookie in cookiejar:
|
---|
996 | # do something with cookie
|
---|
997 |
|
---|
998 | Methods:
|
---|
999 |
|
---|
1000 | add_cookie_header(request)
|
---|
1001 | extract_cookies(response, request)
|
---|
1002 | get_policy()
|
---|
1003 | set_policy(policy)
|
---|
1004 | cookies_for_request(request)
|
---|
1005 | make_cookies(response, request)
|
---|
1006 | set_cookie_if_ok(cookie, request)
|
---|
1007 | set_cookie(cookie)
|
---|
1008 | clear_session_cookies()
|
---|
1009 | clear_expired_cookies()
|
---|
1010 | clear(domain=None, path=None, name=None)
|
---|
1011 |
|
---|
1012 | Public attributes
|
---|
1013 |
|
---|
1014 | policy: CookiePolicy object
|
---|
1015 |
|
---|
1016 | """
|
---|
1017 |
|
---|
1018 | non_word_re = re.compile(r"\W")
|
---|
1019 | quote_re = re.compile(r"([\"\\])")
|
---|
1020 | strict_domain_re = re.compile(r"\.?[^.]*")
|
---|
1021 | domain_re = re.compile(r"[^.]*")
|
---|
1022 | dots_re = re.compile(r"^\.+")
|
---|
1023 |
|
---|
1024 | def __init__(self, policy=None):
|
---|
1025 | """
|
---|
1026 | See CookieJar.__doc__ for argument documentation.
|
---|
1027 |
|
---|
1028 | """
|
---|
1029 | if policy is None:
|
---|
1030 | policy = DefaultCookiePolicy()
|
---|
1031 | self._policy = policy
|
---|
1032 |
|
---|
1033 | self._cookies = {}
|
---|
1034 |
|
---|
1035 | # for __getitem__ iteration in pre-2.2 Pythons
|
---|
1036 | self._prev_getitem_index = 0
|
---|
1037 |
|
---|
1038 | def get_policy(self):
|
---|
1039 | return self._policy
|
---|
1040 |
|
---|
1041 | def set_policy(self, policy):
|
---|
1042 | self._policy = policy
|
---|
1043 |
|
---|
1044 | def _cookies_for_domain(self, domain, request):
|
---|
1045 | cookies = []
|
---|
1046 | if not self._policy.domain_return_ok(domain, request):
|
---|
1047 | return []
|
---|
1048 | debug("Checking %s for cookies to return", domain)
|
---|
1049 | cookies_by_path = self._cookies[domain]
|
---|
1050 | for path in cookies_by_path.keys():
|
---|
1051 | if not self._policy.path_return_ok(path, request):
|
---|
1052 | continue
|
---|
1053 | cookies_by_name = cookies_by_path[path]
|
---|
1054 | for cookie in cookies_by_name.values():
|
---|
1055 | if not self._policy.return_ok(cookie, request):
|
---|
1056 | debug(" not returning cookie")
|
---|
1057 | continue
|
---|
1058 | debug(" it's a match")
|
---|
1059 | cookies.append(cookie)
|
---|
1060 | return cookies
|
---|
1061 |
|
---|
1062 | def cookies_for_request(self, request):
|
---|
1063 | """Return a list of cookies to be returned to server.
|
---|
1064 |
|
---|
1065 | The returned list of cookie instances is sorted in the order they
|
---|
1066 | should appear in the Cookie: header for return to the server.
|
---|
1067 |
|
---|
1068 | See add_cookie_header.__doc__ for the interface required of the
|
---|
1069 | request argument.
|
---|
1070 |
|
---|
1071 | New in version 0.1.10
|
---|
1072 |
|
---|
1073 | """
|
---|
1074 | self._policy._now = self._now = int(time.time())
|
---|
1075 | cookies = self._cookies_for_request(request)
|
---|
1076 | # add cookies in order of most specific (i.e. longest) path first
|
---|
1077 | def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
|
---|
1078 | cookies.sort(decreasing_size)
|
---|
1079 | return cookies
|
---|
1080 |
|
---|
1081 | def _cookies_for_request(self, request):
|
---|
1082 | """Return a list of cookies to be returned to server."""
|
---|
1083 | # this method still exists (alongside cookies_for_request) because it
|
---|
1084 | # is part of an implied protected interface for subclasses of cookiejar
|
---|
1085 | # XXX document that implied interface, or provide another way of
|
---|
1086 | # implementing cookiejars than subclassing
|
---|
1087 | cookies = []
|
---|
1088 | for domain in self._cookies.keys():
|
---|
1089 | cookies.extend(self._cookies_for_domain(domain, request))
|
---|
1090 | return cookies
|
---|
1091 |
|
---|
1092 | def _cookie_attrs(self, cookies):
|
---|
1093 | """Return a list of cookie-attributes to be returned to server.
|
---|
1094 |
|
---|
1095 | The $Version attribute is also added when appropriate (currently only
|
---|
1096 | once per request).
|
---|
1097 |
|
---|
1098 | >>> jar = CookieJar()
|
---|
1099 | >>> ns_cookie = Cookie(0, "foo", '"bar"', None, False,
|
---|
1100 | ... "example.com", False, False,
|
---|
1101 | ... "/", False, False, None, True,
|
---|
1102 | ... None, None, {})
|
---|
1103 | >>> jar._cookie_attrs([ns_cookie])
|
---|
1104 | ['foo="bar"']
|
---|
1105 | >>> rfc2965_cookie = Cookie(1, "foo", "bar", None, False,
|
---|
1106 | ... ".example.com", True, False,
|
---|
1107 | ... "/", False, False, None, True,
|
---|
1108 | ... None, None, {})
|
---|
1109 | >>> jar._cookie_attrs([rfc2965_cookie])
|
---|
1110 | ['$Version=1', 'foo=bar', '$Domain="example.com"']
|
---|
1111 |
|
---|
1112 | """
|
---|
1113 | version_set = False
|
---|
1114 |
|
---|
1115 | attrs = []
|
---|
1116 | for cookie in cookies:
|
---|
1117 | # set version of Cookie header
|
---|
1118 | # XXX
|
---|
1119 | # What should it be if multiple matching Set-Cookie headers have
|
---|
1120 | # different versions themselves?
|
---|
1121 | # Answer: there is no answer; was supposed to be settled by
|
---|
1122 | # RFC 2965 errata, but that may never appear...
|
---|
1123 | version = cookie.version
|
---|
1124 | if not version_set:
|
---|
1125 | version_set = True
|
---|
1126 | if version > 0:
|
---|
1127 | attrs.append("$Version=%s" % version)
|
---|
1128 |
|
---|
1129 | # quote cookie value if necessary
|
---|
1130 | # (not for Netscape protocol, which already has any quotes
|
---|
1131 | # intact, due to the poorly-specified Netscape Cookie: syntax)
|
---|
1132 | if ((cookie.value is not None) and
|
---|
1133 | self.non_word_re.search(cookie.value) and version > 0):
|
---|
1134 | value = self.quote_re.sub(r"\\\1", cookie.value)
|
---|
1135 | else:
|
---|
1136 | value = cookie.value
|
---|
1137 |
|
---|
1138 | # add cookie-attributes to be returned in Cookie header
|
---|
1139 | if cookie.value is None:
|
---|
1140 | attrs.append(cookie.name)
|
---|
1141 | else:
|
---|
1142 | attrs.append("%s=%s" % (cookie.name, value))
|
---|
1143 | if version > 0:
|
---|
1144 | if cookie.path_specified:
|
---|
1145 | attrs.append('$Path="%s"' % cookie.path)
|
---|
1146 | if cookie.domain.startswith("."):
|
---|
1147 | domain = cookie.domain
|
---|
1148 | if (not cookie.domain_initial_dot and
|
---|
1149 | domain.startswith(".")):
|
---|
1150 | domain = domain[1:]
|
---|
1151 | attrs.append('$Domain="%s"' % domain)
|
---|
1152 | if cookie.port is not None:
|
---|
1153 | p = "$Port"
|
---|
1154 | if cookie.port_specified:
|
---|
1155 | p = p + ('="%s"' % cookie.port)
|
---|
1156 | attrs.append(p)
|
---|
1157 |
|
---|
1158 | return attrs
|
---|
1159 |
|
---|
1160 | def add_cookie_header(self, request):
|
---|
1161 | """Add correct Cookie: header to request (urllib2.Request object).
|
---|
1162 |
|
---|
1163 | The Cookie2 header is also added unless policy.hide_cookie2 is true.
|
---|
1164 |
|
---|
1165 | The request object (usually a urllib2.Request instance) must support
|
---|
1166 | the methods get_full_url, get_host, is_unverifiable, get_type,
|
---|
1167 | has_header, get_header, header_items and add_unredirected_header, as
|
---|
1168 | documented by urllib2, and the port attribute (the port number).
|
---|
1169 | Actually, RequestUpgradeProcessor will automatically upgrade your
|
---|
1170 | Request object to one with has_header, get_header, header_items and
|
---|
1171 | add_unredirected_header, if it lacks those methods, for compatibility
|
---|
1172 | with pre-2.4 versions of urllib2.
|
---|
1173 |
|
---|
1174 | """
|
---|
1175 | debug("add_cookie_header")
|
---|
1176 | cookies = self.cookies_for_request(request)
|
---|
1177 |
|
---|
1178 | attrs = self._cookie_attrs(cookies)
|
---|
1179 | if attrs:
|
---|
1180 | if not request.has_header("Cookie"):
|
---|
1181 | request.add_unredirected_header("Cookie", "; ".join(attrs))
|
---|
1182 |
|
---|
1183 | # if necessary, advertise that we know RFC 2965
|
---|
1184 | if self._policy.rfc2965 and not self._policy.hide_cookie2:
|
---|
1185 | for cookie in cookies:
|
---|
1186 | if cookie.version != 1 and not request.has_header("Cookie2"):
|
---|
1187 | request.add_unredirected_header("Cookie2", '$Version="1"')
|
---|
1188 | break
|
---|
1189 |
|
---|
1190 | self.clear_expired_cookies()
|
---|
1191 |
|
---|
1192 | def _normalized_cookie_tuples(self, attrs_set):
|
---|
1193 | """Return list of tuples containing normalised cookie information.
|
---|
1194 |
|
---|
1195 | attrs_set is the list of lists of key,value pairs extracted from
|
---|
1196 | the Set-Cookie or Set-Cookie2 headers.
|
---|
1197 |
|
---|
1198 | Tuples are name, value, standard, rest, where name and value are the
|
---|
1199 | cookie name and value, standard is a dictionary containing the standard
|
---|
1200 | cookie-attributes (discard, secure, version, expires or max-age,
|
---|
1201 | domain, path and port) and rest is a dictionary containing the rest of
|
---|
1202 | the cookie-attributes.
|
---|
1203 |
|
---|
1204 | """
|
---|
1205 | cookie_tuples = []
|
---|
1206 |
|
---|
1207 | boolean_attrs = "discard", "secure"
|
---|
1208 | value_attrs = ("version",
|
---|
1209 | "expires", "max-age",
|
---|
1210 | "domain", "path", "port",
|
---|
1211 | "comment", "commenturl")
|
---|
1212 |
|
---|
1213 | for cookie_attrs in attrs_set:
|
---|
1214 | name, value = cookie_attrs[0]
|
---|
1215 |
|
---|
1216 | # Build dictionary of standard cookie-attributes (standard) and
|
---|
1217 | # dictionary of other cookie-attributes (rest).
|
---|
1218 |
|
---|
1219 | # Note: expiry time is normalised to seconds since epoch. V0
|
---|
1220 | # cookies should have the Expires cookie-attribute, and V1 cookies
|
---|
1221 | # should have Max-Age, but since V1 includes RFC 2109 cookies (and
|
---|
1222 | # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
|
---|
1223 | # accept either (but prefer Max-Age).
|
---|
1224 | max_age_set = False
|
---|
1225 |
|
---|
1226 | bad_cookie = False
|
---|
1227 |
|
---|
1228 | standard = {}
|
---|
1229 | rest = {}
|
---|
1230 | for k, v in cookie_attrs[1:]:
|
---|
1231 | lc = k.lower()
|
---|
1232 | # don't lose case distinction for unknown fields
|
---|
1233 | if lc in value_attrs or lc in boolean_attrs:
|
---|
1234 | k = lc
|
---|
1235 | if k in boolean_attrs and v is None:
|
---|
1236 | # boolean cookie-attribute is present, but has no value
|
---|
1237 | # (like "discard", rather than "port=80")
|
---|
1238 | v = True
|
---|
1239 | if standard.has_key(k):
|
---|
1240 | # only first value is significant
|
---|
1241 | continue
|
---|
1242 | if k == "domain":
|
---|
1243 | if v is None:
|
---|
1244 | debug(" missing value for domain attribute")
|
---|
1245 | bad_cookie = True
|
---|
1246 | break
|
---|
1247 | # RFC 2965 section 3.3.3
|
---|
1248 | v = v.lower()
|
---|
1249 | if k == "expires":
|
---|
1250 | if max_age_set:
|
---|
1251 | # Prefer max-age to expires (like Mozilla)
|
---|
1252 | continue
|
---|
1253 | if v is None:
|
---|
1254 | debug(" missing or invalid value for expires "
|
---|
1255 | "attribute: treating as session cookie")
|
---|
1256 | continue
|
---|
1257 | if k == "max-age":
|
---|
1258 | max_age_set = True
|
---|
1259 | if v is None:
|
---|
1260 | debug(" missing value for max-age attribute")
|
---|
1261 | bad_cookie = True
|
---|
1262 | break
|
---|
1263 | try:
|
---|
1264 | v = int(v)
|
---|
1265 | except ValueError:
|
---|
1266 | debug(" missing or invalid (non-numeric) value for "
|
---|
1267 | "max-age attribute")
|
---|
1268 | bad_cookie = True
|
---|
1269 | break
|
---|
1270 | # convert RFC 2965 Max-Age to seconds since epoch
|
---|
1271 | # XXX Strictly you're supposed to follow RFC 2616
|
---|
1272 | # age-calculation rules. Remember that zero Max-Age is a
|
---|
1273 | # is a request to discard (old and new) cookie, though.
|
---|
1274 | k = "expires"
|
---|
1275 | v = self._now + v
|
---|
1276 | if (k in value_attrs) or (k in boolean_attrs):
|
---|
1277 | if (v is None and
|
---|
1278 | k not in ["port", "comment", "commenturl"]):
|
---|
1279 | debug(" missing value for %s attribute" % k)
|
---|
1280 | bad_cookie = True
|
---|
1281 | break
|
---|
1282 | standard[k] = v
|
---|
1283 | else:
|
---|
1284 | rest[k] = v
|
---|
1285 |
|
---|
1286 | if bad_cookie:
|
---|
1287 | continue
|
---|
1288 |
|
---|
1289 | cookie_tuples.append((name, value, standard, rest))
|
---|
1290 |
|
---|
1291 | return cookie_tuples
|
---|
1292 |
|
---|
1293 | def _cookie_from_cookie_tuple(self, tup, request):
|
---|
1294 | # standard is dict of standard cookie-attributes, rest is dict of the
|
---|
1295 | # rest of them
|
---|
1296 | name, value, standard, rest = tup
|
---|
1297 |
|
---|
1298 | domain = standard.get("domain", Absent)
|
---|
1299 | path = standard.get("path", Absent)
|
---|
1300 | port = standard.get("port", Absent)
|
---|
1301 | expires = standard.get("expires", Absent)
|
---|
1302 |
|
---|
1303 | # set the easy defaults
|
---|
1304 | version = standard.get("version", None)
|
---|
1305 | if version is not None:
|
---|
1306 | try:
|
---|
1307 | version = int(version)
|
---|
1308 | except ValueError:
|
---|
1309 | return None # invalid version, ignore cookie
|
---|
1310 | secure = standard.get("secure", False)
|
---|
1311 | # (discard is also set if expires is Absent)
|
---|
1312 | discard = standard.get("discard", False)
|
---|
1313 | comment = standard.get("comment", None)
|
---|
1314 | comment_url = standard.get("commenturl", None)
|
---|
1315 |
|
---|
1316 | # set default path
|
---|
1317 | if path is not Absent and path != "":
|
---|
1318 | path_specified = True
|
---|
1319 | path = escape_path(path)
|
---|
1320 | else:
|
---|
1321 | path_specified = False
|
---|
1322 | path = request_path(request)
|
---|
1323 | i = path.rfind("/")
|
---|
1324 | if i != -1:
|
---|
1325 | if version == 0:
|
---|
1326 | # Netscape spec parts company from reality here
|
---|
1327 | path = path[:i]
|
---|
1328 | else:
|
---|
1329 | path = path[:i + 1]
|
---|
1330 | if len(path) == 0: path = "/"
|
---|
1331 |
|
---|
1332 | # set default domain
|
---|
1333 | domain_specified = domain is not Absent
|
---|
1334 | # but first we have to remember whether it starts with a dot
|
---|
1335 | domain_initial_dot = False
|
---|
1336 | if domain_specified:
|
---|
1337 | domain_initial_dot = bool(domain.startswith("."))
|
---|
1338 | if domain is Absent:
|
---|
1339 | req_host, erhn = eff_request_host_lc(request)
|
---|
1340 | domain = erhn
|
---|
1341 | elif not domain.startswith("."):
|
---|
1342 | domain = "." + domain
|
---|
1343 |
|
---|
1344 | # set default port
|
---|
1345 | port_specified = False
|
---|
1346 | if port is not Absent:
|
---|
1347 | if port is None:
|
---|
1348 | # Port attr present, but has no value: default to request port.
|
---|
1349 | # Cookie should then only be sent back on that port.
|
---|
1350 | port = request_port(request)
|
---|
1351 | else:
|
---|
1352 | port_specified = True
|
---|
1353 | port = re.sub(r"\s+", "", port)
|
---|
1354 | else:
|
---|
1355 | # No port attr present. Cookie can be sent back on any port.
|
---|
1356 | port = None
|
---|
1357 |
|
---|
1358 | # set default expires and discard
|
---|
1359 | if expires is Absent:
|
---|
1360 | expires = None
|
---|
1361 | discard = True
|
---|
1362 |
|
---|
1363 | return Cookie(version,
|
---|
1364 | name, value,
|
---|
1365 | port, port_specified,
|
---|
1366 | domain, domain_specified, domain_initial_dot,
|
---|
1367 | path, path_specified,
|
---|
1368 | secure,
|
---|
1369 | expires,
|
---|
1370 | discard,
|
---|
1371 | comment,
|
---|
1372 | comment_url,
|
---|
1373 | rest)
|
---|
1374 |
|
---|
1375 | def _cookies_from_attrs_set(self, attrs_set, request):
|
---|
1376 | cookie_tuples = self._normalized_cookie_tuples(attrs_set)
|
---|
1377 |
|
---|
1378 | cookies = []
|
---|
1379 | for tup in cookie_tuples:
|
---|
1380 | cookie = self._cookie_from_cookie_tuple(tup, request)
|
---|
1381 | if cookie: cookies.append(cookie)
|
---|
1382 | return cookies
|
---|
1383 |
|
---|
1384 | def _process_rfc2109_cookies(self, cookies):
|
---|
1385 | if self._policy.rfc2109_as_netscape is None:
|
---|
1386 | rfc2109_as_netscape = not self._policy.rfc2965
|
---|
1387 | else:
|
---|
1388 | rfc2109_as_netscape = self._policy.rfc2109_as_netscape
|
---|
1389 | for cookie in cookies:
|
---|
1390 | if cookie.version == 1:
|
---|
1391 | cookie.rfc2109 = True
|
---|
1392 | if rfc2109_as_netscape:
|
---|
1393 | # treat 2109 cookies as Netscape cookies rather than
|
---|
1394 | # as RFC2965 cookies
|
---|
1395 | cookie.version = 0
|
---|
1396 |
|
---|
1397 | def _make_cookies(self, response, request):
|
---|
1398 | # get cookie-attributes for RFC 2965 and Netscape protocols
|
---|
1399 | headers = response.info()
|
---|
1400 | rfc2965_hdrs = headers.getheaders("Set-Cookie2")
|
---|
1401 | ns_hdrs = headers.getheaders("Set-Cookie")
|
---|
1402 |
|
---|
1403 | rfc2965 = self._policy.rfc2965
|
---|
1404 | netscape = self._policy.netscape
|
---|
1405 |
|
---|
1406 | if ((not rfc2965_hdrs and not ns_hdrs) or
|
---|
1407 | (not ns_hdrs and not rfc2965) or
|
---|
1408 | (not rfc2965_hdrs and not netscape) or
|
---|
1409 | (not netscape and not rfc2965)):
|
---|
1410 | return [] # no relevant cookie headers: quick exit
|
---|
1411 |
|
---|
1412 | try:
|
---|
1413 | cookies = self._cookies_from_attrs_set(
|
---|
1414 | split_header_words(rfc2965_hdrs), request)
|
---|
1415 | except:
|
---|
1416 | reraise_unmasked_exceptions()
|
---|
1417 | cookies = []
|
---|
1418 |
|
---|
1419 | if ns_hdrs and netscape:
|
---|
1420 | try:
|
---|
1421 | # RFC 2109 and Netscape cookies
|
---|
1422 | ns_cookies = self._cookies_from_attrs_set(
|
---|
1423 | parse_ns_headers(ns_hdrs), request)
|
---|
1424 | except:
|
---|
1425 | reraise_unmasked_exceptions()
|
---|
1426 | ns_cookies = []
|
---|
1427 | self._process_rfc2109_cookies(ns_cookies)
|
---|
1428 |
|
---|
1429 | # Look for Netscape cookies (from Set-Cookie headers) that match
|
---|
1430 | # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
|
---|
1431 | # For each match, keep the RFC 2965 cookie and ignore the Netscape
|
---|
1432 | # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
|
---|
1433 | # bundled in with the Netscape cookies for this purpose, which is
|
---|
1434 | # reasonable behaviour.
|
---|
1435 | if rfc2965:
|
---|
1436 | lookup = {}
|
---|
1437 | for cookie in cookies:
|
---|
1438 | lookup[(cookie.domain, cookie.path, cookie.name)] = None
|
---|
1439 |
|
---|
1440 | def no_matching_rfc2965(ns_cookie, lookup=lookup):
|
---|
1441 | key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
|
---|
1442 | return not lookup.has_key(key)
|
---|
1443 | ns_cookies = filter(no_matching_rfc2965, ns_cookies)
|
---|
1444 |
|
---|
1445 | if ns_cookies:
|
---|
1446 | cookies.extend(ns_cookies)
|
---|
1447 |
|
---|
1448 | return cookies
|
---|
1449 |
|
---|
1450 | def make_cookies(self, response, request):
|
---|
1451 | """Return sequence of Cookie objects extracted from response object.
|
---|
1452 |
|
---|
1453 | See extract_cookies.__doc__ for the interface required of the
|
---|
1454 | response and request arguments.
|
---|
1455 |
|
---|
1456 | """
|
---|
1457 | self._policy._now = self._now = int(time.time())
|
---|
1458 | return [cookie for cookie in self._make_cookies(response, request)
|
---|
1459 | if cookie.expires is None or not cookie.expires <= self._now]
|
---|
1460 |
|
---|
1461 | def set_cookie_if_ok(self, cookie, request):
|
---|
1462 | """Set a cookie if policy says it's OK to do so.
|
---|
1463 |
|
---|
1464 | cookie: mechanize.Cookie instance
|
---|
1465 | request: see extract_cookies.__doc__ for the required interface
|
---|
1466 |
|
---|
1467 | """
|
---|
1468 | self._policy._now = self._now = int(time.time())
|
---|
1469 |
|
---|
1470 | if self._policy.set_ok(cookie, request):
|
---|
1471 | self.set_cookie(cookie)
|
---|
1472 |
|
---|
1473 | def set_cookie(self, cookie):
|
---|
1474 | """Set a cookie, without checking whether or not it should be set.
|
---|
1475 |
|
---|
1476 | cookie: mechanize.Cookie instance
|
---|
1477 | """
|
---|
1478 | c = self._cookies
|
---|
1479 | if not c.has_key(cookie.domain): c[cookie.domain] = {}
|
---|
1480 | c2 = c[cookie.domain]
|
---|
1481 | if not c2.has_key(cookie.path): c2[cookie.path] = {}
|
---|
1482 | c3 = c2[cookie.path]
|
---|
1483 | c3[cookie.name] = cookie
|
---|
1484 |
|
---|
1485 | def extract_cookies(self, response, request):
|
---|
1486 | """Extract cookies from response, where allowable given the request.
|
---|
1487 |
|
---|
1488 | Look for allowable Set-Cookie: and Set-Cookie2: headers in the response
|
---|
1489 | object passed as argument. Any of these headers that are found are
|
---|
1490 | used to update the state of the object (subject to the policy.set_ok
|
---|
1491 | method's approval).
|
---|
1492 |
|
---|
1493 | The response object (usually be the result of a call to
|
---|
1494 | mechanize.urlopen, or similar) should support an info method, which
|
---|
1495 | returns a mimetools.Message object (in fact, the 'mimetools.Message
|
---|
1496 | object' may be any object that provides a getheaders method).
|
---|
1497 |
|
---|
1498 | The request object (usually a urllib2.Request instance) must support
|
---|
1499 | the methods get_full_url, get_type, get_host, and is_unverifiable, as
|
---|
1500 | documented by urllib2, and the port attribute (the port number). The
|
---|
1501 | request is used to set default values for cookie-attributes as well as
|
---|
1502 | for checking that the cookie is OK to be set.
|
---|
1503 |
|
---|
1504 | """
|
---|
1505 | debug("extract_cookies: %s", response.info())
|
---|
1506 | self._policy._now = self._now = int(time.time())
|
---|
1507 |
|
---|
1508 | for cookie in self._make_cookies(response, request):
|
---|
1509 | if cookie.expires is not None and cookie.expires <= self._now:
|
---|
1510 | # Expiry date in past is request to delete cookie. This can't be
|
---|
1511 | # in DefaultCookiePolicy, because can't delete cookies there.
|
---|
1512 | try:
|
---|
1513 | self.clear(cookie.domain, cookie.path, cookie.name)
|
---|
1514 | except KeyError:
|
---|
1515 | pass
|
---|
1516 | debug("Expiring cookie, domain='%s', path='%s', name='%s'",
|
---|
1517 | cookie.domain, cookie.path, cookie.name)
|
---|
1518 | elif self._policy.set_ok(cookie, request):
|
---|
1519 | debug(" setting cookie: %s", cookie)
|
---|
1520 | self.set_cookie(cookie)
|
---|
1521 |
|
---|
1522 | def clear(self, domain=None, path=None, name=None):
|
---|
1523 | """Clear some cookies.
|
---|
1524 |
|
---|
1525 | Invoking this method without arguments will clear all cookies. If
|
---|
1526 | given a single argument, only cookies belonging to that domain will be
|
---|
1527 | removed. If given two arguments, cookies belonging to the specified
|
---|
1528 | path within that domain are removed. If given three arguments, then
|
---|
1529 | the cookie with the specified name, path and domain is removed.
|
---|
1530 |
|
---|
1531 | Raises KeyError if no matching cookie exists.
|
---|
1532 |
|
---|
1533 | """
|
---|
1534 | if name is not None:
|
---|
1535 | if (domain is None) or (path is None):
|
---|
1536 | raise ValueError(
|
---|
1537 | "domain and path must be given to remove a cookie by name")
|
---|
1538 | del self._cookies[domain][path][name]
|
---|
1539 | elif path is not None:
|
---|
1540 | if domain is None:
|
---|
1541 | raise ValueError(
|
---|
1542 | "domain must be given to remove cookies by path")
|
---|
1543 | del self._cookies[domain][path]
|
---|
1544 | elif domain is not None:
|
---|
1545 | del self._cookies[domain]
|
---|
1546 | else:
|
---|
1547 | self._cookies = {}
|
---|
1548 |
|
---|
1549 | def clear_session_cookies(self):
|
---|
1550 | """Discard all session cookies.
|
---|
1551 |
|
---|
1552 | Discards all cookies held by object which had either no Max-Age or
|
---|
1553 | Expires cookie-attribute or an explicit Discard cookie-attribute, or
|
---|
1554 | which otherwise have ended up with a true discard attribute. For
|
---|
1555 | interactive browsers, the end of a session usually corresponds to
|
---|
1556 | closing the browser window.
|
---|
1557 |
|
---|
1558 | Note that the save method won't save session cookies anyway, unless you
|
---|
1559 | ask otherwise by passing a true ignore_discard argument.
|
---|
1560 |
|
---|
1561 | """
|
---|
1562 | for cookie in self:
|
---|
1563 | if cookie.discard:
|
---|
1564 | self.clear(cookie.domain, cookie.path, cookie.name)
|
---|
1565 |
|
---|
1566 | def clear_expired_cookies(self):
|
---|
1567 | """Discard all expired cookies.
|
---|
1568 |
|
---|
1569 | You probably don't need to call this method: expired cookies are never
|
---|
1570 | sent back to the server (provided you're using DefaultCookiePolicy),
|
---|
1571 | this method is called by CookieJar itself every so often, and the save
|
---|
1572 | method won't save expired cookies anyway (unless you ask otherwise by
|
---|
1573 | passing a true ignore_expires argument).
|
---|
1574 |
|
---|
1575 | """
|
---|
1576 | now = time.time()
|
---|
1577 | for cookie in self:
|
---|
1578 | if cookie.is_expired(now):
|
---|
1579 | self.clear(cookie.domain, cookie.path, cookie.name)
|
---|
1580 |
|
---|
1581 | def __getitem__(self, i):
|
---|
1582 | if i == 0:
|
---|
1583 | self._getitem_iterator = self.__iter__()
|
---|
1584 | elif self._prev_getitem_index != i - 1: raise IndexError(
|
---|
1585 | "CookieJar.__getitem__ only supports sequential iteration")
|
---|
1586 | self._prev_getitem_index = i
|
---|
1587 | try:
|
---|
1588 | return self._getitem_iterator.next()
|
---|
1589 | except StopIteration:
|
---|
1590 | raise IndexError()
|
---|
1591 |
|
---|
1592 | def __iter__(self):
|
---|
1593 | return MappingIterator(self._cookies)
|
---|
1594 |
|
---|
1595 | def __len__(self):
|
---|
1596 | """Return number of contained cookies."""
|
---|
1597 | i = 0
|
---|
1598 | for cookie in self: i = i + 1
|
---|
1599 | return i
|
---|
1600 |
|
---|
1601 | def __repr__(self):
|
---|
1602 | r = []
|
---|
1603 | for cookie in self: r.append(repr(cookie))
|
---|
1604 | return "<%s[%s]>" % (self.__class__, ", ".join(r))
|
---|
1605 |
|
---|
1606 | def __str__(self):
|
---|
1607 | r = []
|
---|
1608 | for cookie in self: r.append(str(cookie))
|
---|
1609 | return "<%s[%s]>" % (self.__class__, ", ".join(r))
|
---|
1610 |
|
---|
1611 |
|
---|
1612 | class LoadError(Exception): pass
|
---|
1613 |
|
---|
1614 | class FileCookieJar(CookieJar):
|
---|
1615 | """CookieJar that can be loaded from and saved to a file.
|
---|
1616 |
|
---|
1617 | Additional methods
|
---|
1618 |
|
---|
1619 | save(filename=None, ignore_discard=False, ignore_expires=False)
|
---|
1620 | load(filename=None, ignore_discard=False, ignore_expires=False)
|
---|
1621 | revert(filename=None, ignore_discard=False, ignore_expires=False)
|
---|
1622 |
|
---|
1623 | Additional public attributes
|
---|
1624 |
|
---|
1625 | filename: filename for loading and saving cookies
|
---|
1626 |
|
---|
1627 | Additional public readable attributes
|
---|
1628 |
|
---|
1629 | delayload: request that cookies are lazily loaded from disk; this is only
|
---|
1630 | a hint since this only affects performance, not behaviour (unless the
|
---|
1631 | cookies on disk are changing); a CookieJar object may ignore it (in fact,
|
---|
1632 | only MSIECookieJar lazily loads cookies at the moment)
|
---|
1633 |
|
---|
1634 | """
|
---|
1635 |
|
---|
1636 | def __init__(self, filename=None, delayload=False, policy=None):
|
---|
1637 | """
|
---|
1638 | See FileCookieJar.__doc__ for argument documentation.
|
---|
1639 |
|
---|
1640 | Cookies are NOT loaded from the named file until either the load or
|
---|
1641 | revert method is called.
|
---|
1642 |
|
---|
1643 | """
|
---|
1644 | CookieJar.__init__(self, policy)
|
---|
1645 | if filename is not None and not isstringlike(filename):
|
---|
1646 | raise ValueError("filename must be string-like")
|
---|
1647 | self.filename = filename
|
---|
1648 | self.delayload = bool(delayload)
|
---|
1649 |
|
---|
1650 | def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
---|
1651 | """Save cookies to a file.
|
---|
1652 |
|
---|
1653 | filename: name of file in which to save cookies
|
---|
1654 | ignore_discard: save even cookies set to be discarded
|
---|
1655 | ignore_expires: save even cookies that have expired
|
---|
1656 |
|
---|
1657 | The file is overwritten if it already exists, thus wiping all its
|
---|
1658 | cookies. Saved cookies can be restored later using the load or revert
|
---|
1659 | methods. If filename is not specified, self.filename is used; if
|
---|
1660 | self.filename is None, ValueError is raised.
|
---|
1661 |
|
---|
1662 | """
|
---|
1663 | raise NotImplementedError()
|
---|
1664 |
|
---|
1665 | def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
---|
1666 | """Load cookies from a file.
|
---|
1667 |
|
---|
1668 | Old cookies are kept unless overwritten by newly loaded ones.
|
---|
1669 |
|
---|
1670 | Arguments are as for .save().
|
---|
1671 |
|
---|
1672 | If filename is not specified, self.filename is used; if self.filename
|
---|
1673 | is None, ValueError is raised. The named file must be in the format
|
---|
1674 | understood by the class, or LoadError will be raised. This format will
|
---|
1675 | be identical to that written by the save method, unless the load format
|
---|
1676 | is not sufficiently well understood (as is the case for MSIECookieJar).
|
---|
1677 |
|
---|
1678 | """
|
---|
1679 | if filename is None:
|
---|
1680 | if self.filename is not None: filename = self.filename
|
---|
1681 | else: raise ValueError(MISSING_FILENAME_TEXT)
|
---|
1682 |
|
---|
1683 | f = open(filename)
|
---|
1684 | try:
|
---|
1685 | self._really_load(f, filename, ignore_discard, ignore_expires)
|
---|
1686 | finally:
|
---|
1687 | f.close()
|
---|
1688 |
|
---|
1689 | def revert(self, filename=None,
|
---|
1690 | ignore_discard=False, ignore_expires=False):
|
---|
1691 | """Clear all cookies and reload cookies from a saved file.
|
---|
1692 |
|
---|
1693 | Raises LoadError (or IOError) if reversion is not successful; the
|
---|
1694 | object's state will not be altered if this happens.
|
---|
1695 |
|
---|
1696 | """
|
---|
1697 | if filename is None:
|
---|
1698 | if self.filename is not None: filename = self.filename
|
---|
1699 | else: raise ValueError(MISSING_FILENAME_TEXT)
|
---|
1700 |
|
---|
1701 | old_state = copy.deepcopy(self._cookies)
|
---|
1702 | self._cookies = {}
|
---|
1703 | try:
|
---|
1704 | self.load(filename, ignore_discard, ignore_expires)
|
---|
1705 | except (LoadError, IOError):
|
---|
1706 | self._cookies = old_state
|
---|
1707 | raise
|
---|