source: py-scraping/mechanize/_lwpcookiejar.py@ 106

Last change on this file since 106 was 106, checked in by Rick van der Zwet, 15 years ago

Initial commit...

File size: 7.0 KB
Line 
1"""Load / save to libwww-perl (LWP) format files.
2
3Actually, the format is slightly extended from that used by LWP's
4(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
5not recorded by LWP.
6
7It uses the version string "2.0", though really there isn't an LWP Cookies
82.0 format. This indicates that there is extra information in here
9(domain_dot and port_spec) while still being compatible with libwww-perl,
10I hope.
11
12Copyright 2002-2006 John J Lee <jjl@pobox.com>
13Copyright 1997-1999 Gisle Aas (original libwww-perl code)
14
15This code is free software; you can redistribute it and/or modify it
16under the terms of the BSD or ZPL 2.1 licenses (see the file
17COPYING.txt included with the distribution).
18
19"""
20
21import time, re, logging
22
23from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
24 MISSING_FILENAME_TEXT, LoadError
25from _headersutil import join_header_words, split_header_words
26from _util import iso2time, time2isoz
27
28debug = logging.getLogger("mechanize").debug
29
30
31def lwp_cookie_str(cookie):
32 """Return string representation of Cookie in an the LWP cookie file format.
33
34 Actually, the format is extended a bit -- see module docstring.
35
36 """
37 h = [(cookie.name, cookie.value),
38 ("path", cookie.path),
39 ("domain", cookie.domain)]
40 if cookie.port is not None: h.append(("port", cookie.port))
41 if cookie.path_specified: h.append(("path_spec", None))
42 if cookie.port_specified: h.append(("port_spec", None))
43 if cookie.domain_initial_dot: h.append(("domain_dot", None))
44 if cookie.secure: h.append(("secure", None))
45 if cookie.expires: h.append(("expires",
46 time2isoz(float(cookie.expires))))
47 if cookie.discard: h.append(("discard", None))
48 if cookie.comment: h.append(("comment", cookie.comment))
49 if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
50 if cookie.rfc2109: h.append(("rfc2109", None))
51
52 keys = cookie.nonstandard_attr_keys()
53 keys.sort()
54 for k in keys:
55 h.append((k, str(cookie.get_nonstandard_attr(k))))
56
57 h.append(("version", str(cookie.version)))
58
59 return join_header_words([h])
60
61class LWPCookieJar(FileCookieJar):
62 """
63 The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
64 "Set-Cookie3" is the format used by the libwww-perl libary, not known
65 to be compatible with any browser, but which is easy to read and
66 doesn't lose information about RFC 2965 cookies.
67
68 Additional methods
69
70 as_lwp_str(ignore_discard=True, ignore_expired=True)
71
72 """
73
74 magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
75
76 def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
77 """Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
78
79 ignore_discard and ignore_expires: see docstring for FileCookieJar.save
80
81 """
82 now = time.time()
83 r = []
84 for cookie in self:
85 if not ignore_discard and cookie.discard:
86 debug(" Not saving %s: marked for discard", cookie.name)
87 continue
88 if not ignore_expires and cookie.is_expired(now):
89 debug(" Not saving %s: expired", cookie.name)
90 continue
91 r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
92 return "\n".join(r + [""])
93
94 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
95 if filename is None:
96 if self.filename is not None: filename = self.filename
97 else: raise ValueError(MISSING_FILENAME_TEXT)
98
99 f = open(filename, "w")
100 try:
101 debug("Saving LWP cookies file")
102 # There really isn't an LWP Cookies 2.0 format, but this indicates
103 # that there is extra information in here (domain_dot and
104 # port_spec) while still being compatible with libwww-perl, I hope.
105 f.write("#LWP-Cookies-2.0\n")
106 f.write(self.as_lwp_str(ignore_discard, ignore_expires))
107 finally:
108 f.close()
109
110 def _really_load(self, f, filename, ignore_discard, ignore_expires):
111 magic = f.readline()
112 if not re.search(self.magic_re, magic):
113 msg = "%s does not seem to contain cookies" % filename
114 raise LoadError(msg)
115
116 now = time.time()
117
118 header = "Set-Cookie3:"
119 boolean_attrs = ("port_spec", "path_spec", "domain_dot",
120 "secure", "discard", "rfc2109")
121 value_attrs = ("version",
122 "port", "path", "domain",
123 "expires",
124 "comment", "commenturl")
125
126 try:
127 while 1:
128 line = f.readline()
129 if line == "": break
130 if not line.startswith(header):
131 continue
132 line = line[len(header):].strip()
133
134 for data in split_header_words([line]):
135 name, value = data[0]
136 standard = {}
137 rest = {}
138 for k in boolean_attrs:
139 standard[k] = False
140 for k, v in data[1:]:
141 if k is not None:
142 lc = k.lower()
143 else:
144 lc = None
145 # don't lose case distinction for unknown fields
146 if (lc in value_attrs) or (lc in boolean_attrs):
147 k = lc
148 if k in boolean_attrs:
149 if v is None: v = True
150 standard[k] = v
151 elif k in value_attrs:
152 standard[k] = v
153 else:
154 rest[k] = v
155
156 h = standard.get
157 expires = h("expires")
158 discard = h("discard")
159 if expires is not None:
160 expires = iso2time(expires)
161 if expires is None:
162 discard = True
163 domain = h("domain")
164 domain_specified = domain.startswith(".")
165 c = Cookie(h("version"), name, value,
166 h("port"), h("port_spec"),
167 domain, domain_specified, h("domain_dot"),
168 h("path"), h("path_spec"),
169 h("secure"),
170 expires,
171 discard,
172 h("comment"),
173 h("commenturl"),
174 rest,
175 h("rfc2109"),
176 )
177 if not ignore_discard and c.discard:
178 continue
179 if not ignore_expires and c.is_expired(now):
180 continue
181 self.set_cookie(c)
182 except:
183 reraise_unmasked_exceptions((IOError,))
184 raise LoadError("invalid Set-Cookie3 format file %s" % filename)
185
Note: See TracBrowser for help on using the repository browser.