source: py-scraping/mechanize/_mozillacookiejar.py@ 203

Last change on this file since 203 was 106, checked in by Rick van der Zwet, 15 years ago

Initial commit...

File size: 6.2 KB
Line 
1"""Mozilla / Netscape cookie loading / saving.
2
3Copyright 2002-2006 John J Lee <jjl@pobox.com>
4Copyright 1997-1999 Gisle Aas (original libwww-perl code)
5
6This code is free software; you can redistribute it and/or modify it
7under the terms of the BSD or ZPL 2.1 licenses (see the file
8COPYING.txt included with the distribution).
9
10"""
11
12import re, time, logging
13
14from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
15 MISSING_FILENAME_TEXT, LoadError
16debug = logging.getLogger("ClientCookie").debug
17
18
19class MozillaCookieJar(FileCookieJar):
20 """
21
22 WARNING: you may want to backup your browser's cookies file if you use
23 this class to save cookies. I *think* it works, but there have been
24 bugs in the past!
25
26 This class differs from CookieJar only in the format it uses to save and
27 load cookies to and from a file. This class uses the Mozilla/Netscape
28 `cookies.txt' format. lynx uses this file format, too.
29
30 Don't expect cookies saved while the browser is running to be noticed by
31 the browser (in fact, Mozilla on unix will overwrite your saved cookies if
32 you change them on disk while it's running; on Windows, you probably can't
33 save at all while the browser is running).
34
35 Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
36 Netscape cookies on saving.
37
38 In particular, the cookie version and port number information is lost,
39 together with information about whether or not Path, Port and Discard were
40 specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
41 domain as set in the HTTP header started with a dot (yes, I'm aware some
42 domains in Netscape files start with a dot and some don't -- trust me, you
43 really don't want to know any more about this).
44
45 Note that though Mozilla and Netscape use the same format, they use
46 slightly different headers. The class saves cookies using the Netscape
47 header by default (Mozilla can cope with that).
48
49 """
50 magic_re = "#( Netscape)? HTTP Cookie File"
51 header = """\
52 # Netscape HTTP Cookie File
53 # http://www.netscape.com/newsref/std/cookie_spec.html
54 # This is a generated file! Do not edit.
55
56"""
57
58 def _really_load(self, f, filename, ignore_discard, ignore_expires):
59 now = time.time()
60
61 magic = f.readline()
62 if not re.search(self.magic_re, magic):
63 f.close()
64 raise LoadError(
65 "%s does not look like a Netscape format cookies file" %
66 filename)
67
68 try:
69 while 1:
70 line = f.readline()
71 if line == "": break
72
73 # last field may be absent, so keep any trailing tab
74 if line.endswith("\n"): line = line[:-1]
75
76 # skip comments and blank lines XXX what is $ for?
77 if (line.strip().startswith("#") or
78 line.strip().startswith("$") or
79 line.strip() == ""):
80 continue
81
82 domain, domain_specified, path, secure, expires, name, value = \
83 line.split("\t", 6)
84 secure = (secure == "TRUE")
85 domain_specified = (domain_specified == "TRUE")
86 if name == "":
87 name = value
88 value = None
89
90 initial_dot = domain.startswith(".")
91 if domain_specified != initial_dot:
92 raise LoadError("domain and domain specified flag don't "
93 "match in %s: %s" % (filename, line))
94
95 discard = False
96 if expires == "":
97 expires = None
98 discard = True
99
100 # assume path_specified is false
101 c = Cookie(0, name, value,
102 None, False,
103 domain, domain_specified, initial_dot,
104 path, False,
105 secure,
106 expires,
107 discard,
108 None,
109 None,
110 {})
111 if not ignore_discard and c.discard:
112 continue
113 if not ignore_expires and c.is_expired(now):
114 continue
115 self.set_cookie(c)
116
117 except:
118 reraise_unmasked_exceptions((IOError, LoadError))
119 raise LoadError("invalid Netscape format file %s: %s" %
120 (filename, line))
121
122 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
123 if filename is None:
124 if self.filename is not None: filename = self.filename
125 else: raise ValueError(MISSING_FILENAME_TEXT)
126
127 f = open(filename, "w")
128 try:
129 debug("Saving Netscape cookies.txt file")
130 f.write(self.header)
131 now = time.time()
132 for cookie in self:
133 if not ignore_discard and cookie.discard:
134 debug(" Not saving %s: marked for discard", cookie.name)
135 continue
136 if not ignore_expires and cookie.is_expired(now):
137 debug(" Not saving %s: expired", cookie.name)
138 continue
139 if cookie.secure: secure = "TRUE"
140 else: secure = "FALSE"
141 if cookie.domain.startswith("."): initial_dot = "TRUE"
142 else: initial_dot = "FALSE"
143 if cookie.expires is not None:
144 expires = str(cookie.expires)
145 else:
146 expires = ""
147 if cookie.value is None:
148 # cookies.txt regards 'Set-Cookie: foo' as a cookie
149 # with no name, whereas cookielib regards it as a
150 # cookie with no value.
151 name = ""
152 value = cookie.name
153 else:
154 name = cookie.name
155 value = cookie.value
156 f.write(
157 "\t".join([cookie.domain, initial_dot, cookie.path,
158 secure, expires, name, value]) +
159 "\n")
160 finally:
161 f.close()
Note: See TracBrowser for help on using the repository browser.