[106] | 1 | """HTML form handling for web clients.
|
---|
| 2 |
|
---|
| 3 | ClientForm is a Python module for handling HTML forms on the client
|
---|
| 4 | side, useful for parsing HTML forms, filling them in and returning the
|
---|
| 5 | completed forms to the server. It has developed from a port of Gisle
|
---|
| 6 | Aas' Perl module HTML::Form, from the libwww-perl library, but the
|
---|
| 7 | interface is not the same.
|
---|
| 8 |
|
---|
| 9 | The most useful docstring is the one for HTMLForm.
|
---|
| 10 |
|
---|
| 11 | RFC 1866: HTML 2.0
|
---|
| 12 | RFC 1867: Form-based File Upload in HTML
|
---|
| 13 | RFC 2388: Returning Values from Forms: multipart/form-data
|
---|
| 14 | HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
|
---|
| 15 | HTML 4.01 Specification, W3C Recommendation 24 December 1999
|
---|
| 16 |
|
---|
| 17 |
|
---|
| 18 | Copyright 2002-2007 John J. Lee <jjl@pobox.com>
|
---|
| 19 | Copyright 2005 Gary Poster
|
---|
| 20 | Copyright 2005 Zope Corporation
|
---|
| 21 | Copyright 1998-2000 Gisle Aas.
|
---|
| 22 |
|
---|
| 23 | This code is free software; you can redistribute it and/or modify it
|
---|
| 24 | under the terms of the BSD or ZPL 2.1 licenses (see the file
|
---|
| 25 | COPYING.txt included with the distribution).
|
---|
| 26 |
|
---|
| 27 | """
|
---|
| 28 |
|
---|
| 29 | # XXX
|
---|
| 30 | # Remove parser testing hack
|
---|
| 31 | # safeUrl()-ize action
|
---|
| 32 | # Switch to unicode throughout (would be 0.3.x)
|
---|
| 33 | # See Wichert Akkerman's 2004-01-22 message to c.l.py.
|
---|
| 34 | # Add charset parameter to Content-type headers? How to find value??
|
---|
| 35 | # Add some more functional tests
|
---|
| 36 | # Especially single and multiple file upload on the internet.
|
---|
| 37 | # Does file upload work when name is missing? Sourceforge tracker form
|
---|
| 38 | # doesn't like it. Check standards, and test with Apache. Test
|
---|
| 39 | # binary upload with Apache.
|
---|
| 40 | # mailto submission & enctype text/plain
|
---|
| 41 | # I'm not going to fix this unless somebody tells me what real servers
|
---|
| 42 | # that want this encoding actually expect: If enctype is
|
---|
| 43 | # application/x-www-form-urlencoded and there's a FILE control present.
|
---|
| 44 | # Strictly, it should be 'name=data' (see HTML 4.01 spec., section
|
---|
| 45 | # 17.13.2), but I send "name=" ATM. What about multiple file upload??
|
---|
| 46 |
|
---|
| 47 | # Would be nice, but I'm not going to do it myself:
|
---|
| 48 | # -------------------------------------------------
|
---|
| 49 | # Maybe a 0.4.x?
|
---|
| 50 | # Replace by_label etc. with moniker / selector concept. Allows, eg.,
|
---|
| 51 | # a choice between selection by value / id / label / element
|
---|
| 52 | # contents. Or choice between matching labels exactly or by
|
---|
| 53 | # substring. Etc.
|
---|
| 54 | # Remove deprecated methods.
|
---|
| 55 | # ...what else?
|
---|
| 56 | # Work on DOMForm.
|
---|
| 57 | # XForms? Don't know if there's a need here.
|
---|
| 58 |
|
---|
| 59 | __all__ = ['AmbiguityError', 'CheckboxControl', 'Control',
|
---|
| 60 | 'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm',
|
---|
| 61 | 'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl',
|
---|
| 62 | 'Item', 'ItemCountError', 'ItemNotFoundError', 'Label',
|
---|
| 63 | 'ListControl', 'LocateError', 'Missing', 'ParseError', 'ParseFile',
|
---|
| 64 | 'ParseFileEx', 'ParseResponse', 'ParseResponseEx', 'PasswordControl',
|
---|
| 65 | 'RadioControl', 'ScalarControl', 'SelectControl',
|
---|
| 66 | 'SubmitButtonControl', 'SubmitControl', 'TextControl',
|
---|
| 67 | 'TextareaControl', 'XHTMLCompatibleFormParser']
|
---|
| 68 |
|
---|
| 69 | try: True
|
---|
| 70 | except NameError:
|
---|
| 71 | True = 1
|
---|
| 72 | False = 0
|
---|
| 73 |
|
---|
| 74 | try: bool
|
---|
| 75 | except NameError:
|
---|
| 76 | def bool(expr):
|
---|
| 77 | if expr: return True
|
---|
| 78 | else: return False
|
---|
| 79 |
|
---|
| 80 | try:
|
---|
| 81 | import logging
|
---|
| 82 | import inspect
|
---|
| 83 | except ImportError:
|
---|
| 84 | def debug(msg, *args, **kwds):
|
---|
| 85 | pass
|
---|
| 86 | else:
|
---|
| 87 | _logger = logging.getLogger("ClientForm")
|
---|
| 88 | OPTIMIZATION_HACK = True
|
---|
| 89 |
|
---|
| 90 | def debug(msg, *args, **kwds):
|
---|
| 91 | if OPTIMIZATION_HACK:
|
---|
| 92 | return
|
---|
| 93 |
|
---|
| 94 | caller_name = inspect.stack()[1][3]
|
---|
| 95 | extended_msg = '%%s %s' % msg
|
---|
| 96 | extended_args = (caller_name,) + args
|
---|
| 97 | debug = _logger.debug(extended_msg, *extended_args, **kwds)
|
---|
| 98 |
|
---|
| 99 | def _show_debug_messages():
|
---|
| 100 | global OPTIMIZATION_HACK
|
---|
| 101 | OPTIMIZATION_HACK = False
|
---|
| 102 | _logger.setLevel(logging.DEBUG)
|
---|
| 103 | handler = logging.StreamHandler(sys.stdout)
|
---|
| 104 | handler.setLevel(logging.DEBUG)
|
---|
| 105 | _logger.addHandler(handler)
|
---|
| 106 |
|
---|
| 107 | import sys, urllib, urllib2, types, mimetools, copy, urlparse, \
|
---|
| 108 | htmlentitydefs, re, random
|
---|
| 109 | from cStringIO import StringIO
|
---|
| 110 |
|
---|
| 111 | import sgmllib
|
---|
| 112 | # monkeypatch to fix http://www.python.org/sf/803422 :-(
|
---|
| 113 | sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
|
---|
| 114 |
|
---|
| 115 | # HTMLParser.HTMLParser is recent, so live without it if it's not available
|
---|
| 116 | # (also, sgmllib.SGMLParser is much more tolerant of bad HTML)
|
---|
| 117 | try:
|
---|
| 118 | import HTMLParser
|
---|
| 119 | except ImportError:
|
---|
| 120 | HAVE_MODULE_HTMLPARSER = False
|
---|
| 121 | else:
|
---|
| 122 | HAVE_MODULE_HTMLPARSER = True
|
---|
| 123 |
|
---|
| 124 | try:
|
---|
| 125 | import warnings
|
---|
| 126 | except ImportError:
|
---|
| 127 | def deprecation(message, stack_offset=0):
|
---|
| 128 | pass
|
---|
| 129 | else:
|
---|
| 130 | def deprecation(message, stack_offset=0):
|
---|
| 131 | warnings.warn(message, DeprecationWarning, stacklevel=3 + stack_offset)
|
---|
| 132 |
|
---|
| 133 | VERSION = "0.2.10"
|
---|
| 134 |
|
---|
| 135 | CHUNK = 1024 # size of chunks fed to parser, in bytes
|
---|
| 136 |
|
---|
| 137 | DEFAULT_ENCODING = "latin-1"
|
---|
| 138 |
|
---|
| 139 | class Missing: pass
|
---|
| 140 |
|
---|
| 141 | _compress_re = re.compile(r"\s+")
|
---|
| 142 | def compress_text(text): return _compress_re.sub(" ", text.strip())
|
---|
| 143 |
|
---|
| 144 | def normalize_line_endings(text):
|
---|
| 145 | return re.sub(r"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text)
|
---|
| 146 |
|
---|
| 147 |
|
---|
| 148 | # This version of urlencode is from my Python 1.5.2 back-port of the
|
---|
| 149 | # Python 2.1 CVS maintenance branch of urllib. It will accept a sequence
|
---|
| 150 | # of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
|
---|
| 151 | def urlencode(query, doseq=False,):
|
---|
| 152 | """Encode a sequence of two-element tuples or dictionary into a URL query \
|
---|
| 153 | string.
|
---|
| 154 |
|
---|
| 155 | If any values in the query arg are sequences and doseq is true, each
|
---|
| 156 | sequence element is converted to a separate parameter.
|
---|
| 157 |
|
---|
| 158 | If the query arg is a sequence of two-element tuples, the order of the
|
---|
| 159 | parameters in the output will match the order of parameters in the
|
---|
| 160 | input.
|
---|
| 161 | """
|
---|
| 162 |
|
---|
| 163 | if hasattr(query, "items"):
|
---|
| 164 | # mapping objects
|
---|
| 165 | query = query.items()
|
---|
| 166 | else:
|
---|
| 167 | # it's a bother at times that strings and string-like objects are
|
---|
| 168 | # sequences...
|
---|
| 169 | try:
|
---|
| 170 | # non-sequence items should not work with len()
|
---|
| 171 | x = len(query)
|
---|
| 172 | # non-empty strings will fail this
|
---|
| 173 | if len(query) and type(query[0]) != types.TupleType:
|
---|
| 174 | raise TypeError()
|
---|
| 175 | # zero-length sequences of all types will get here and succeed,
|
---|
| 176 | # but that's a minor nit - since the original implementation
|
---|
| 177 | # allowed empty dicts that type of behavior probably should be
|
---|
| 178 | # preserved for consistency
|
---|
| 179 | except TypeError:
|
---|
| 180 | ty, va, tb = sys.exc_info()
|
---|
| 181 | raise TypeError("not a valid non-string sequence or mapping "
|
---|
| 182 | "object", tb)
|
---|
| 183 |
|
---|
| 184 | l = []
|
---|
| 185 | if not doseq:
|
---|
| 186 | # preserve old behavior
|
---|
| 187 | for k, v in query:
|
---|
| 188 | k = urllib.quote_plus(str(k))
|
---|
| 189 | v = urllib.quote_plus(str(v))
|
---|
| 190 | l.append(k + '=' + v)
|
---|
| 191 | else:
|
---|
| 192 | for k, v in query:
|
---|
| 193 | k = urllib.quote_plus(str(k))
|
---|
| 194 | if type(v) == types.StringType:
|
---|
| 195 | v = urllib.quote_plus(v)
|
---|
| 196 | l.append(k + '=' + v)
|
---|
| 197 | elif type(v) == types.UnicodeType:
|
---|
| 198 | # is there a reasonable way to convert to ASCII?
|
---|
| 199 | # encode generates a string, but "replace" or "ignore"
|
---|
| 200 | # lose information and "strict" can raise UnicodeError
|
---|
| 201 | v = urllib.quote_plus(v.encode("ASCII", "replace"))
|
---|
| 202 | l.append(k + '=' + v)
|
---|
| 203 | else:
|
---|
| 204 | try:
|
---|
| 205 | # is this a sufficient test for sequence-ness?
|
---|
| 206 | x = len(v)
|
---|
| 207 | except TypeError:
|
---|
| 208 | # not a sequence
|
---|
| 209 | v = urllib.quote_plus(str(v))
|
---|
| 210 | l.append(k + '=' + v)
|
---|
| 211 | else:
|
---|
| 212 | # loop over the sequence
|
---|
| 213 | for elt in v:
|
---|
| 214 | l.append(k + '=' + urllib.quote_plus(str(elt)))
|
---|
| 215 | return '&'.join(l)
|
---|
| 216 |
|
---|
| 217 | def unescape(data, entities, encoding=DEFAULT_ENCODING):
|
---|
| 218 | if data is None or "&" not in data:
|
---|
| 219 | return data
|
---|
| 220 |
|
---|
| 221 | def replace_entities(match, entities=entities, encoding=encoding):
|
---|
| 222 | ent = match.group()
|
---|
| 223 | if ent[1] == "#":
|
---|
| 224 | return unescape_charref(ent[2:-1], encoding)
|
---|
| 225 |
|
---|
| 226 | repl = entities.get(ent)
|
---|
| 227 | if repl is not None:
|
---|
| 228 | if type(repl) != type(""):
|
---|
| 229 | try:
|
---|
| 230 | repl = repl.encode(encoding)
|
---|
| 231 | except UnicodeError:
|
---|
| 232 | repl = ent
|
---|
| 233 | else:
|
---|
| 234 | repl = ent
|
---|
| 235 |
|
---|
| 236 | return repl
|
---|
| 237 |
|
---|
| 238 | return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
|
---|
| 239 |
|
---|
| 240 | def unescape_charref(data, encoding):
|
---|
| 241 | name, base = data, 10
|
---|
| 242 | if name.startswith("x"):
|
---|
| 243 | name, base = name[1:], 16
|
---|
| 244 | uc = unichr(int(name, base))
|
---|
| 245 | if encoding is None:
|
---|
| 246 | return uc
|
---|
| 247 | else:
|
---|
| 248 | try:
|
---|
| 249 | repl = uc.encode(encoding)
|
---|
| 250 | except UnicodeError:
|
---|
| 251 | repl = "&#%s;" % data
|
---|
| 252 | return repl
|
---|
| 253 |
|
---|
| 254 | def get_entitydefs():
|
---|
| 255 | import htmlentitydefs
|
---|
| 256 | from codecs import latin_1_decode
|
---|
| 257 | entitydefs = {}
|
---|
| 258 | try:
|
---|
| 259 | htmlentitydefs.name2codepoint
|
---|
| 260 | except AttributeError:
|
---|
| 261 | entitydefs = {}
|
---|
| 262 | for name, char in htmlentitydefs.entitydefs.items():
|
---|
| 263 | uc = latin_1_decode(char)[0]
|
---|
| 264 | if uc.startswith("&#") and uc.endswith(";"):
|
---|
| 265 | uc = unescape_charref(uc[2:-1], None)
|
---|
| 266 | entitydefs["&%s;" % name] = uc
|
---|
| 267 | else:
|
---|
| 268 | for name, codepoint in htmlentitydefs.name2codepoint.items():
|
---|
| 269 | entitydefs["&%s;" % name] = unichr(codepoint)
|
---|
| 270 | return entitydefs
|
---|
| 271 |
|
---|
| 272 |
|
---|
| 273 | def issequence(x):
|
---|
| 274 | try:
|
---|
| 275 | x[0]
|
---|
| 276 | except (TypeError, KeyError):
|
---|
| 277 | return False
|
---|
| 278 | except IndexError:
|
---|
| 279 | pass
|
---|
| 280 | return True
|
---|
| 281 |
|
---|
| 282 | def isstringlike(x):
|
---|
| 283 | try: x + ""
|
---|
| 284 | except: return False
|
---|
| 285 | else: return True
|
---|
| 286 |
|
---|
| 287 |
|
---|
| 288 | def choose_boundary():
|
---|
| 289 | """Return a string usable as a multipart boundary."""
|
---|
| 290 | # follow IE and firefox
|
---|
| 291 | nonce = "".join([str(random.randint(0, sys.maxint - 1)) for i in 0, 1, 2])
|
---|
| 292 | return "-" * 27 + nonce
|
---|
| 293 |
|
---|
| 294 | # This cut-n-pasted MimeWriter from standard library is here so can add
|
---|
| 295 | # to HTTP headers rather than message body when appropriate. It also uses
|
---|
| 296 | # \r\n in place of \n. This is a bit nasty.
|
---|
| 297 | class MimeWriter:
|
---|
| 298 |
|
---|
| 299 | """Generic MIME writer.
|
---|
| 300 |
|
---|
| 301 | Methods:
|
---|
| 302 |
|
---|
| 303 | __init__()
|
---|
| 304 | addheader()
|
---|
| 305 | flushheaders()
|
---|
| 306 | startbody()
|
---|
| 307 | startmultipartbody()
|
---|
| 308 | nextpart()
|
---|
| 309 | lastpart()
|
---|
| 310 |
|
---|
| 311 | A MIME writer is much more primitive than a MIME parser. It
|
---|
| 312 | doesn't seek around on the output file, and it doesn't use large
|
---|
| 313 | amounts of buffer space, so you have to write the parts in the
|
---|
| 314 | order they should occur on the output file. It does buffer the
|
---|
| 315 | headers you add, allowing you to rearrange their order.
|
---|
| 316 |
|
---|
| 317 | General usage is:
|
---|
| 318 |
|
---|
| 319 | f = <open the output file>
|
---|
| 320 | w = MimeWriter(f)
|
---|
| 321 | ...call w.addheader(key, value) 0 or more times...
|
---|
| 322 |
|
---|
| 323 | followed by either:
|
---|
| 324 |
|
---|
| 325 | f = w.startbody(content_type)
|
---|
| 326 | ...call f.write(data) for body data...
|
---|
| 327 |
|
---|
| 328 | or:
|
---|
| 329 |
|
---|
| 330 | w.startmultipartbody(subtype)
|
---|
| 331 | for each part:
|
---|
| 332 | subwriter = w.nextpart()
|
---|
| 333 | ...use the subwriter's methods to create the subpart...
|
---|
| 334 | w.lastpart()
|
---|
| 335 |
|
---|
| 336 | The subwriter is another MimeWriter instance, and should be
|
---|
| 337 | treated in the same way as the toplevel MimeWriter. This way,
|
---|
| 338 | writing recursive body parts is easy.
|
---|
| 339 |
|
---|
| 340 | Warning: don't forget to call lastpart()!
|
---|
| 341 |
|
---|
| 342 | XXX There should be more state so calls made in the wrong order
|
---|
| 343 | are detected.
|
---|
| 344 |
|
---|
| 345 | Some special cases:
|
---|
| 346 |
|
---|
| 347 | - startbody() just returns the file passed to the constructor;
|
---|
| 348 | but don't use this knowledge, as it may be changed.
|
---|
| 349 |
|
---|
| 350 | - startmultipartbody() actually returns a file as well;
|
---|
| 351 | this can be used to write the initial 'if you can read this your
|
---|
| 352 | mailer is not MIME-aware' message.
|
---|
| 353 |
|
---|
| 354 | - If you call flushheaders(), the headers accumulated so far are
|
---|
| 355 | written out (and forgotten); this is useful if you don't need a
|
---|
| 356 | body part at all, e.g. for a subpart of type message/rfc822
|
---|
| 357 | that's (mis)used to store some header-like information.
|
---|
| 358 |
|
---|
| 359 | - Passing a keyword argument 'prefix=<flag>' to addheader(),
|
---|
| 360 | start*body() affects where the header is inserted; 0 means
|
---|
| 361 | append at the end, 1 means insert at the start; default is
|
---|
| 362 | append for addheader(), but insert for start*body(), which use
|
---|
| 363 | it to determine where the Content-type header goes.
|
---|
| 364 |
|
---|
| 365 | """
|
---|
| 366 |
|
---|
| 367 | def __init__(self, fp, http_hdrs=None):
|
---|
| 368 | self._http_hdrs = http_hdrs
|
---|
| 369 | self._fp = fp
|
---|
| 370 | self._headers = []
|
---|
| 371 | self._boundary = []
|
---|
| 372 | self._first_part = True
|
---|
| 373 |
|
---|
| 374 | def addheader(self, key, value, prefix=0,
|
---|
| 375 | add_to_http_hdrs=0):
|
---|
| 376 | """
|
---|
| 377 | prefix is ignored if add_to_http_hdrs is true.
|
---|
| 378 | """
|
---|
| 379 | lines = value.split("\r\n")
|
---|
| 380 | while lines and not lines[-1]: del lines[-1]
|
---|
| 381 | while lines and not lines[0]: del lines[0]
|
---|
| 382 | if add_to_http_hdrs:
|
---|
| 383 | value = "".join(lines)
|
---|
| 384 | # 2.2 urllib2 doesn't normalize header case
|
---|
| 385 | self._http_hdrs.append((key.capitalize(), value))
|
---|
| 386 | else:
|
---|
| 387 | for i in range(1, len(lines)):
|
---|
| 388 | lines[i] = " " + lines[i].strip()
|
---|
| 389 | value = "\r\n".join(lines) + "\r\n"
|
---|
| 390 | line = key.title() + ": " + value
|
---|
| 391 | if prefix:
|
---|
| 392 | self._headers.insert(0, line)
|
---|
| 393 | else:
|
---|
| 394 | self._headers.append(line)
|
---|
| 395 |
|
---|
| 396 | def flushheaders(self):
|
---|
| 397 | self._fp.writelines(self._headers)
|
---|
| 398 | self._headers = []
|
---|
| 399 |
|
---|
| 400 | def startbody(self, ctype=None, plist=[], prefix=1,
|
---|
| 401 | add_to_http_hdrs=0, content_type=1):
|
---|
| 402 | """
|
---|
| 403 | prefix is ignored if add_to_http_hdrs is true.
|
---|
| 404 | """
|
---|
| 405 | if content_type and ctype:
|
---|
| 406 | for name, value in plist:
|
---|
| 407 | ctype = ctype + ';\r\n %s=%s' % (name, value)
|
---|
| 408 | self.addheader("Content-Type", ctype, prefix=prefix,
|
---|
| 409 | add_to_http_hdrs=add_to_http_hdrs)
|
---|
| 410 | self.flushheaders()
|
---|
| 411 | if not add_to_http_hdrs: self._fp.write("\r\n")
|
---|
| 412 | self._first_part = True
|
---|
| 413 | return self._fp
|
---|
| 414 |
|
---|
| 415 | def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
|
---|
| 416 | add_to_http_hdrs=0, content_type=1):
|
---|
| 417 | boundary = boundary or choose_boundary()
|
---|
| 418 | self._boundary.append(boundary)
|
---|
| 419 | return self.startbody("multipart/" + subtype,
|
---|
| 420 | [("boundary", boundary)] + plist,
|
---|
| 421 | prefix=prefix,
|
---|
| 422 | add_to_http_hdrs=add_to_http_hdrs,
|
---|
| 423 | content_type=content_type)
|
---|
| 424 |
|
---|
| 425 | def nextpart(self):
|
---|
| 426 | boundary = self._boundary[-1]
|
---|
| 427 | if self._first_part:
|
---|
| 428 | self._first_part = False
|
---|
| 429 | else:
|
---|
| 430 | self._fp.write("\r\n")
|
---|
| 431 | self._fp.write("--" + boundary + "\r\n")
|
---|
| 432 | return self.__class__(self._fp)
|
---|
| 433 |
|
---|
| 434 | def lastpart(self):
|
---|
| 435 | if self._first_part:
|
---|
| 436 | self.nextpart()
|
---|
| 437 | boundary = self._boundary.pop()
|
---|
| 438 | self._fp.write("\r\n--" + boundary + "--\r\n")
|
---|
| 439 |
|
---|
| 440 |
|
---|
| 441 | class LocateError(ValueError): pass
|
---|
| 442 | class AmbiguityError(LocateError): pass
|
---|
| 443 | class ControlNotFoundError(LocateError): pass
|
---|
| 444 | class ItemNotFoundError(LocateError): pass
|
---|
| 445 |
|
---|
| 446 | class ItemCountError(ValueError): pass
|
---|
| 447 |
|
---|
| 448 | # for backwards compatibility, ParseError derives from exceptions that were
|
---|
| 449 | # raised by versions of ClientForm <= 0.2.5
|
---|
| 450 | if HAVE_MODULE_HTMLPARSER:
|
---|
| 451 | SGMLLIB_PARSEERROR = sgmllib.SGMLParseError
|
---|
| 452 | class ParseError(sgmllib.SGMLParseError,
|
---|
| 453 | HTMLParser.HTMLParseError,
|
---|
| 454 | ):
|
---|
| 455 | pass
|
---|
| 456 | else:
|
---|
| 457 | if hasattr(sgmllib, "SGMLParseError"):
|
---|
| 458 | SGMLLIB_PARSEERROR = sgmllib.SGMLParseError
|
---|
| 459 | class ParseError(sgmllib.SGMLParseError):
|
---|
| 460 | pass
|
---|
| 461 | else:
|
---|
| 462 | SGMLLIB_PARSEERROR = RuntimeError
|
---|
| 463 | class ParseError(RuntimeError):
|
---|
| 464 | pass
|
---|
| 465 |
|
---|
| 466 |
|
---|
| 467 | class _AbstractFormParser:
|
---|
| 468 | """forms attribute contains HTMLForm instances on completion."""
|
---|
| 469 | # thanks to Moshe Zadka for an example of sgmllib/htmllib usage
|
---|
| 470 | def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
|
---|
| 471 | if entitydefs is None:
|
---|
| 472 | entitydefs = get_entitydefs()
|
---|
| 473 | self._entitydefs = entitydefs
|
---|
| 474 | self._encoding = encoding
|
---|
| 475 |
|
---|
| 476 | self.base = None
|
---|
| 477 | self.forms = []
|
---|
| 478 | self.labels = []
|
---|
| 479 | self._current_label = None
|
---|
| 480 | self._current_form = None
|
---|
| 481 | self._select = None
|
---|
| 482 | self._optgroup = None
|
---|
| 483 | self._option = None
|
---|
| 484 | self._textarea = None
|
---|
| 485 |
|
---|
| 486 | # forms[0] will contain all controls that are outside of any form
|
---|
| 487 | # self._global_form is an alias for self.forms[0]
|
---|
| 488 | self._global_form = None
|
---|
| 489 | self.start_form([])
|
---|
| 490 | self.end_form()
|
---|
| 491 | self._current_form = self._global_form = self.forms[0]
|
---|
| 492 |
|
---|
| 493 | def do_base(self, attrs):
|
---|
| 494 | debug("%s", attrs)
|
---|
| 495 | for key, value in attrs:
|
---|
| 496 | if key == "href":
|
---|
| 497 | self.base = self.unescape_attr_if_required(value)
|
---|
| 498 |
|
---|
| 499 | def end_body(self):
|
---|
| 500 | debug("")
|
---|
| 501 | if self._current_label is not None:
|
---|
| 502 | self.end_label()
|
---|
| 503 | if self._current_form is not self._global_form:
|
---|
| 504 | self.end_form()
|
---|
| 505 |
|
---|
| 506 | def start_form(self, attrs):
|
---|
| 507 | debug("%s", attrs)
|
---|
| 508 | if self._current_form is not self._global_form:
|
---|
| 509 | raise ParseError("nested FORMs")
|
---|
| 510 | name = None
|
---|
| 511 | action = None
|
---|
| 512 | enctype = "application/x-www-form-urlencoded"
|
---|
| 513 | method = "GET"
|
---|
| 514 | d = {}
|
---|
| 515 | for key, value in attrs:
|
---|
| 516 | if key == "name":
|
---|
| 517 | name = self.unescape_attr_if_required(value)
|
---|
| 518 | elif key == "action":
|
---|
| 519 | action = self.unescape_attr_if_required(value)
|
---|
| 520 | elif key == "method":
|
---|
| 521 | method = self.unescape_attr_if_required(value.upper())
|
---|
| 522 | elif key == "enctype":
|
---|
| 523 | enctype = self.unescape_attr_if_required(value.lower())
|
---|
| 524 | d[key] = self.unescape_attr_if_required(value)
|
---|
| 525 | controls = []
|
---|
| 526 | self._current_form = (name, action, method, enctype), d, controls
|
---|
| 527 |
|
---|
| 528 | def end_form(self):
|
---|
| 529 | debug("")
|
---|
| 530 | if self._current_label is not None:
|
---|
| 531 | self.end_label()
|
---|
| 532 | if self._current_form is self._global_form:
|
---|
| 533 | raise ParseError("end of FORM before start")
|
---|
| 534 | self.forms.append(self._current_form)
|
---|
| 535 | self._current_form = self._global_form
|
---|
| 536 |
|
---|
| 537 | def start_select(self, attrs):
|
---|
| 538 | debug("%s", attrs)
|
---|
| 539 | if self._select is not None:
|
---|
| 540 | raise ParseError("nested SELECTs")
|
---|
| 541 | if self._textarea is not None:
|
---|
| 542 | raise ParseError("SELECT inside TEXTAREA")
|
---|
| 543 | d = {}
|
---|
| 544 | for key, val in attrs:
|
---|
| 545 | d[key] = self.unescape_attr_if_required(val)
|
---|
| 546 |
|
---|
| 547 | self._select = d
|
---|
| 548 | self._add_label(d)
|
---|
| 549 |
|
---|
| 550 | self._append_select_control({"__select": d})
|
---|
| 551 |
|
---|
| 552 | def end_select(self):
|
---|
| 553 | debug("")
|
---|
| 554 | if self._select is None:
|
---|
| 555 | raise ParseError("end of SELECT before start")
|
---|
| 556 |
|
---|
| 557 | if self._option is not None:
|
---|
| 558 | self._end_option()
|
---|
| 559 |
|
---|
| 560 | self._select = None
|
---|
| 561 |
|
---|
| 562 | def start_optgroup(self, attrs):
|
---|
| 563 | debug("%s", attrs)
|
---|
| 564 | if self._select is None:
|
---|
| 565 | raise ParseError("OPTGROUP outside of SELECT")
|
---|
| 566 | d = {}
|
---|
| 567 | for key, val in attrs:
|
---|
| 568 | d[key] = self.unescape_attr_if_required(val)
|
---|
| 569 |
|
---|
| 570 | self._optgroup = d
|
---|
| 571 |
|
---|
| 572 | def end_optgroup(self):
|
---|
| 573 | debug("")
|
---|
| 574 | if self._optgroup is None:
|
---|
| 575 | raise ParseError("end of OPTGROUP before start")
|
---|
| 576 | self._optgroup = None
|
---|
| 577 |
|
---|
| 578 | def _start_option(self, attrs):
|
---|
| 579 | debug("%s", attrs)
|
---|
| 580 | if self._select is None:
|
---|
| 581 | raise ParseError("OPTION outside of SELECT")
|
---|
| 582 | if self._option is not None:
|
---|
| 583 | self._end_option()
|
---|
| 584 |
|
---|
| 585 | d = {}
|
---|
| 586 | for key, val in attrs:
|
---|
| 587 | d[key] = self.unescape_attr_if_required(val)
|
---|
| 588 |
|
---|
| 589 | self._option = {}
|
---|
| 590 | self._option.update(d)
|
---|
| 591 | if (self._optgroup and self._optgroup.has_key("disabled") and
|
---|
| 592 | not self._option.has_key("disabled")):
|
---|
| 593 | self._option["disabled"] = None
|
---|
| 594 |
|
---|
| 595 | def _end_option(self):
|
---|
| 596 | debug("")
|
---|
| 597 | if self._option is None:
|
---|
| 598 | raise ParseError("end of OPTION before start")
|
---|
| 599 |
|
---|
| 600 | contents = self._option.get("contents", "").strip()
|
---|
| 601 | self._option["contents"] = contents
|
---|
| 602 | if not self._option.has_key("value"):
|
---|
| 603 | self._option["value"] = contents
|
---|
| 604 | if not self._option.has_key("label"):
|
---|
| 605 | self._option["label"] = contents
|
---|
| 606 | # stuff dict of SELECT HTML attrs into a special private key
|
---|
| 607 | # (gets deleted again later)
|
---|
| 608 | self._option["__select"] = self._select
|
---|
| 609 | self._append_select_control(self._option)
|
---|
| 610 | self._option = None
|
---|
| 611 |
|
---|
| 612 | def _append_select_control(self, attrs):
|
---|
| 613 | debug("%s", attrs)
|
---|
| 614 | controls = self._current_form[2]
|
---|
| 615 | name = self._select.get("name")
|
---|
| 616 | controls.append(("select", name, attrs))
|
---|
| 617 |
|
---|
| 618 | def start_textarea(self, attrs):
|
---|
| 619 | debug("%s", attrs)
|
---|
| 620 | if self._textarea is not None:
|
---|
| 621 | raise ParseError("nested TEXTAREAs")
|
---|
| 622 | if self._select is not None:
|
---|
| 623 | raise ParseError("TEXTAREA inside SELECT")
|
---|
| 624 | d = {}
|
---|
| 625 | for key, val in attrs:
|
---|
| 626 | d[key] = self.unescape_attr_if_required(val)
|
---|
| 627 | self._add_label(d)
|
---|
| 628 |
|
---|
| 629 | self._textarea = d
|
---|
| 630 |
|
---|
| 631 | def end_textarea(self):
|
---|
| 632 | debug("")
|
---|
| 633 | if self._textarea is None:
|
---|
| 634 | raise ParseError("end of TEXTAREA before start")
|
---|
| 635 | controls = self._current_form[2]
|
---|
| 636 | name = self._textarea.get("name")
|
---|
| 637 | controls.append(("textarea", name, self._textarea))
|
---|
| 638 | self._textarea = None
|
---|
| 639 |
|
---|
| 640 | def start_label(self, attrs):
|
---|
| 641 | debug("%s", attrs)
|
---|
| 642 | if self._current_label:
|
---|
| 643 | self.end_label()
|
---|
| 644 | d = {}
|
---|
| 645 | for key, val in attrs:
|
---|
| 646 | d[key] = self.unescape_attr_if_required(val)
|
---|
| 647 | taken = bool(d.get("for")) # empty id is invalid
|
---|
| 648 | d["__text"] = ""
|
---|
| 649 | d["__taken"] = taken
|
---|
| 650 | if taken:
|
---|
| 651 | self.labels.append(d)
|
---|
| 652 | self._current_label = d
|
---|
| 653 |
|
---|
| 654 | def end_label(self):
|
---|
| 655 | debug("")
|
---|
| 656 | label = self._current_label
|
---|
| 657 | if label is None:
|
---|
| 658 | # something is ugly in the HTML, but we're ignoring it
|
---|
| 659 | return
|
---|
| 660 | self._current_label = None
|
---|
| 661 | # if it is staying around, it is True in all cases
|
---|
| 662 | del label["__taken"]
|
---|
| 663 |
|
---|
| 664 | def _add_label(self, d):
|
---|
| 665 | #debug("%s", d)
|
---|
| 666 | if self._current_label is not None:
|
---|
| 667 | if not self._current_label["__taken"]:
|
---|
| 668 | self._current_label["__taken"] = True
|
---|
| 669 | d["__label"] = self._current_label
|
---|
| 670 |
|
---|
| 671 | def handle_data(self, data):
|
---|
| 672 | debug("%s", data)
|
---|
| 673 |
|
---|
| 674 | if self._option is not None:
|
---|
| 675 | # self._option is a dictionary of the OPTION element's HTML
|
---|
| 676 | # attributes, but it has two special keys, one of which is the
|
---|
| 677 | # special "contents" key contains text between OPTION tags (the
|
---|
| 678 | # other is the "__select" key: see the end_option method)
|
---|
| 679 | map = self._option
|
---|
| 680 | key = "contents"
|
---|
| 681 | elif self._textarea is not None:
|
---|
| 682 | map = self._textarea
|
---|
| 683 | key = "value"
|
---|
| 684 | data = normalize_line_endings(data)
|
---|
| 685 | # not if within option or textarea
|
---|
| 686 | elif self._current_label is not None:
|
---|
| 687 | map = self._current_label
|
---|
| 688 | key = "__text"
|
---|
| 689 | else:
|
---|
| 690 | return
|
---|
| 691 |
|
---|
| 692 | if data and not map.has_key(key):
|
---|
| 693 | # according to
|
---|
| 694 | # http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 line break
|
---|
| 695 | # immediately after start tags or immediately before end tags must
|
---|
| 696 | # be ignored, but real browsers only ignore a line break after a
|
---|
| 697 | # start tag, so we'll do that.
|
---|
| 698 | if data[0:2] == "\r\n":
|
---|
| 699 | data = data[2:]
|
---|
| 700 | elif data[0:1] in ["\n", "\r"]:
|
---|
| 701 | data = data[1:]
|
---|
| 702 | map[key] = data
|
---|
| 703 | else:
|
---|
| 704 | map[key] = map[key] + data
|
---|
| 705 |
|
---|
| 706 | def do_button(self, attrs):
|
---|
| 707 | debug("%s", attrs)
|
---|
| 708 | d = {}
|
---|
| 709 | d["type"] = "submit" # default
|
---|
| 710 | for key, val in attrs:
|
---|
| 711 | d[key] = self.unescape_attr_if_required(val)
|
---|
| 712 | controls = self._current_form[2]
|
---|
| 713 |
|
---|
| 714 | type = d["type"]
|
---|
| 715 | name = d.get("name")
|
---|
| 716 | # we don't want to lose information, so use a type string that
|
---|
| 717 | # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
|
---|
| 718 | # e.g. type for BUTTON/RESET is "resetbutton"
|
---|
| 719 | # (type for INPUT/RESET is "reset")
|
---|
| 720 | type = type + "button"
|
---|
| 721 | self._add_label(d)
|
---|
| 722 | controls.append((type, name, d))
|
---|
| 723 |
|
---|
| 724 | def do_input(self, attrs):
|
---|
| 725 | debug("%s", attrs)
|
---|
| 726 | d = {}
|
---|
| 727 | d["type"] = "text" # default
|
---|
| 728 | for key, val in attrs:
|
---|
| 729 | d[key] = self.unescape_attr_if_required(val)
|
---|
| 730 | controls = self._current_form[2]
|
---|
| 731 |
|
---|
| 732 | type = d["type"]
|
---|
| 733 | name = d.get("name")
|
---|
| 734 | self._add_label(d)
|
---|
| 735 | controls.append((type, name, d))
|
---|
| 736 |
|
---|
| 737 | def do_isindex(self, attrs):
|
---|
| 738 | debug("%s", attrs)
|
---|
| 739 | d = {}
|
---|
| 740 | for key, val in attrs:
|
---|
| 741 | d[key] = self.unescape_attr_if_required(val)
|
---|
| 742 | controls = self._current_form[2]
|
---|
| 743 |
|
---|
| 744 | self._add_label(d)
|
---|
| 745 | # isindex doesn't have type or name HTML attributes
|
---|
| 746 | controls.append(("isindex", None, d))
|
---|
| 747 |
|
---|
| 748 | def handle_entityref(self, name):
|
---|
| 749 | #debug("%s", name)
|
---|
| 750 | self.handle_data(unescape(
|
---|
| 751 | '&%s;' % name, self._entitydefs, self._encoding))
|
---|
| 752 |
|
---|
| 753 | def handle_charref(self, name):
|
---|
| 754 | #debug("%s", name)
|
---|
| 755 | self.handle_data(unescape_charref(name, self._encoding))
|
---|
| 756 |
|
---|
| 757 | def unescape_attr(self, name):
|
---|
| 758 | #debug("%s", name)
|
---|
| 759 | return unescape(name, self._entitydefs, self._encoding)
|
---|
| 760 |
|
---|
| 761 | def unescape_attrs(self, attrs):
|
---|
| 762 | #debug("%s", attrs)
|
---|
| 763 | escaped_attrs = {}
|
---|
| 764 | for key, val in attrs.items():
|
---|
| 765 | try:
|
---|
| 766 | val.items
|
---|
| 767 | except AttributeError:
|
---|
| 768 | escaped_attrs[key] = self.unescape_attr(val)
|
---|
| 769 | else:
|
---|
| 770 | # e.g. "__select" -- yuck!
|
---|
| 771 | escaped_attrs[key] = self.unescape_attrs(val)
|
---|
| 772 | return escaped_attrs
|
---|
| 773 |
|
---|
| 774 | def unknown_entityref(self, ref): self.handle_data("&%s;" % ref)
|
---|
| 775 | def unknown_charref(self, ref): self.handle_data("&#%s;" % ref)
|
---|
| 776 |
|
---|
| 777 |
|
---|
| 778 | if not HAVE_MODULE_HTMLPARSER:
|
---|
| 779 | class XHTMLCompatibleFormParser:
|
---|
| 780 | def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
|
---|
| 781 | raise ValueError("HTMLParser could not be imported")
|
---|
| 782 | else:
|
---|
| 783 | class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
|
---|
| 784 | """Good for XHTML, bad for tolerance of incorrect HTML."""
|
---|
| 785 | # thanks to Michael Howitz for this!
|
---|
| 786 | def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
|
---|
| 787 | HTMLParser.HTMLParser.__init__(self)
|
---|
| 788 | _AbstractFormParser.__init__(self, entitydefs, encoding)
|
---|
| 789 |
|
---|
| 790 | def feed(self, data):
|
---|
| 791 | try:
|
---|
| 792 | HTMLParser.HTMLParser.feed(self, data)
|
---|
| 793 | except HTMLParser.HTMLParseError, exc:
|
---|
| 794 | raise ParseError(exc)
|
---|
| 795 |
|
---|
| 796 | def start_option(self, attrs):
|
---|
| 797 | _AbstractFormParser._start_option(self, attrs)
|
---|
| 798 |
|
---|
| 799 | def end_option(self):
|
---|
| 800 | _AbstractFormParser._end_option(self)
|
---|
| 801 |
|
---|
| 802 | def handle_starttag(self, tag, attrs):
|
---|
| 803 | try:
|
---|
| 804 | method = getattr(self, "start_" + tag)
|
---|
| 805 | except AttributeError:
|
---|
| 806 | try:
|
---|
| 807 | method = getattr(self, "do_" + tag)
|
---|
| 808 | except AttributeError:
|
---|
| 809 | pass # unknown tag
|
---|
| 810 | else:
|
---|
| 811 | method(attrs)
|
---|
| 812 | else:
|
---|
| 813 | method(attrs)
|
---|
| 814 |
|
---|
| 815 | def handle_endtag(self, tag):
|
---|
| 816 | try:
|
---|
| 817 | method = getattr(self, "end_" + tag)
|
---|
| 818 | except AttributeError:
|
---|
| 819 | pass # unknown tag
|
---|
| 820 | else:
|
---|
| 821 | method()
|
---|
| 822 |
|
---|
| 823 | def unescape(self, name):
|
---|
| 824 | # Use the entitydefs passed into constructor, not
|
---|
| 825 | # HTMLParser.HTMLParser's entitydefs.
|
---|
| 826 | return self.unescape_attr(name)
|
---|
| 827 |
|
---|
| 828 | def unescape_attr_if_required(self, name):
|
---|
| 829 | return name # HTMLParser.HTMLParser already did it
|
---|
| 830 | def unescape_attrs_if_required(self, attrs):
|
---|
| 831 | return attrs # ditto
|
---|
| 832 |
|
---|
| 833 | def close(self):
|
---|
| 834 | HTMLParser.HTMLParser.close(self)
|
---|
| 835 | self.end_body()
|
---|
| 836 |
|
---|
| 837 |
|
---|
| 838 | class _AbstractSgmllibParser(_AbstractFormParser):
|
---|
| 839 |
|
---|
| 840 | def do_option(self, attrs):
|
---|
| 841 | _AbstractFormParser._start_option(self, attrs)
|
---|
| 842 |
|
---|
| 843 | if sys.version_info[:2] >= (2, 5):
|
---|
| 844 | # we override this attr to decode hex charrefs
|
---|
| 845 | entity_or_charref = re.compile(
|
---|
| 846 | '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)')
|
---|
| 847 | def convert_entityref(self, name):
|
---|
| 848 | return unescape("&%s;" % name, self._entitydefs, self._encoding)
|
---|
| 849 | def convert_charref(self, name):
|
---|
| 850 | return unescape_charref("%s" % name, self._encoding)
|
---|
| 851 | def unescape_attr_if_required(self, name):
|
---|
| 852 | return name # sgmllib already did it
|
---|
| 853 | def unescape_attrs_if_required(self, attrs):
|
---|
| 854 | return attrs # ditto
|
---|
| 855 | else:
|
---|
| 856 | def unescape_attr_if_required(self, name):
|
---|
| 857 | return self.unescape_attr(name)
|
---|
| 858 | def unescape_attrs_if_required(self, attrs):
|
---|
| 859 | return self.unescape_attrs(attrs)
|
---|
| 860 |
|
---|
| 861 |
|
---|
| 862 | class FormParser(_AbstractSgmllibParser, sgmllib.SGMLParser):
|
---|
| 863 | """Good for tolerance of incorrect HTML, bad for XHTML."""
|
---|
| 864 | def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
|
---|
| 865 | sgmllib.SGMLParser.__init__(self)
|
---|
| 866 | _AbstractFormParser.__init__(self, entitydefs, encoding)
|
---|
| 867 |
|
---|
| 868 | def feed(self, data):
|
---|
| 869 | try:
|
---|
| 870 | sgmllib.SGMLParser.feed(self, data)
|
---|
| 871 | except SGMLLIB_PARSEERROR, exc:
|
---|
| 872 | raise ParseError(exc)
|
---|
| 873 |
|
---|
| 874 | def close(self):
|
---|
| 875 | sgmllib.SGMLParser.close(self)
|
---|
| 876 | self.end_body()
|
---|
| 877 |
|
---|
| 878 |
|
---|
| 879 | # sigh, must support mechanize by allowing dynamic creation of classes based on
|
---|
| 880 | # its bundled copy of BeautifulSoup (which was necessary because of dependency
|
---|
| 881 | # problems)
|
---|
| 882 |
|
---|
| 883 | def _create_bs_classes(bs,
|
---|
| 884 | icbinbs,
|
---|
| 885 | ):
|
---|
| 886 | class _AbstractBSFormParser(_AbstractSgmllibParser):
|
---|
| 887 | bs_base_class = None
|
---|
| 888 | def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
|
---|
| 889 | _AbstractFormParser.__init__(self, entitydefs, encoding)
|
---|
| 890 | self.bs_base_class.__init__(self)
|
---|
| 891 | def handle_data(self, data):
|
---|
| 892 | _AbstractFormParser.handle_data(self, data)
|
---|
| 893 | self.bs_base_class.handle_data(self, data)
|
---|
| 894 | def feed(self, data):
|
---|
| 895 | try:
|
---|
| 896 | self.bs_base_class.feed(self, data)
|
---|
| 897 | except SGMLLIB_PARSEERROR, exc:
|
---|
| 898 | raise ParseError(exc)
|
---|
| 899 | def close(self):
|
---|
| 900 | self.bs_base_class.close(self)
|
---|
| 901 | self.end_body()
|
---|
| 902 |
|
---|
| 903 | class RobustFormParser(_AbstractBSFormParser, bs):
|
---|
| 904 | """Tries to be highly tolerant of incorrect HTML."""
|
---|
| 905 | pass
|
---|
| 906 | RobustFormParser.bs_base_class = bs
|
---|
| 907 | class NestingRobustFormParser(_AbstractBSFormParser, icbinbs):
|
---|
| 908 | """Tries to be highly tolerant of incorrect HTML.
|
---|
| 909 |
|
---|
| 910 | Different from RobustFormParser in that it more often guesses nesting
|
---|
| 911 | above missing end tags (see BeautifulSoup docs).
|
---|
| 912 |
|
---|
| 913 | """
|
---|
| 914 | pass
|
---|
| 915 | NestingRobustFormParser.bs_base_class = icbinbs
|
---|
| 916 |
|
---|
| 917 | return RobustFormParser, NestingRobustFormParser
|
---|
| 918 |
|
---|
| 919 | try:
|
---|
| 920 | if sys.version_info[:2] < (2, 2):
|
---|
| 921 | raise ImportError # BeautifulSoup uses generators
|
---|
| 922 | import BeautifulSoup
|
---|
| 923 | except ImportError:
|
---|
| 924 | pass
|
---|
| 925 | else:
|
---|
| 926 | RobustFormParser, NestingRobustFormParser = _create_bs_classes(
|
---|
| 927 | BeautifulSoup.BeautifulSoup, BeautifulSoup.ICantBelieveItsBeautifulSoup
|
---|
| 928 | )
|
---|
| 929 | __all__ += ['RobustFormParser', 'NestingRobustFormParser']
|
---|
| 930 |
|
---|
| 931 |
|
---|
| 932 | #FormParser = XHTMLCompatibleFormParser # testing hack
|
---|
| 933 | #FormParser = RobustFormParser # testing hack
|
---|
| 934 |
|
---|
| 935 |
|
---|
| 936 | def ParseResponseEx(response,
|
---|
| 937 | select_default=False,
|
---|
| 938 | form_parser_class=FormParser,
|
---|
| 939 | request_class=urllib2.Request,
|
---|
| 940 | entitydefs=None,
|
---|
| 941 | encoding=DEFAULT_ENCODING,
|
---|
| 942 |
|
---|
| 943 | # private
|
---|
| 944 | _urljoin=urlparse.urljoin,
|
---|
| 945 | _urlparse=urlparse.urlparse,
|
---|
| 946 | _urlunparse=urlparse.urlunparse,
|
---|
| 947 | ):
|
---|
| 948 | """Identical to ParseResponse, except that:
|
---|
| 949 |
|
---|
| 950 | 1. The returned list contains an extra item. The first form in the list
|
---|
| 951 | contains all controls not contained in any FORM element.
|
---|
| 952 |
|
---|
| 953 | 2. The arguments ignore_errors and backwards_compat have been removed.
|
---|
| 954 |
|
---|
| 955 | 3. Backwards-compatibility mode (backwards_compat=True) is not available.
|
---|
| 956 | """
|
---|
| 957 | return _ParseFileEx(response, response.geturl(),
|
---|
| 958 | select_default,
|
---|
| 959 | False,
|
---|
| 960 | form_parser_class,
|
---|
| 961 | request_class,
|
---|
| 962 | entitydefs,
|
---|
| 963 | False,
|
---|
| 964 | encoding,
|
---|
| 965 | _urljoin=_urljoin,
|
---|
| 966 | _urlparse=_urlparse,
|
---|
| 967 | _urlunparse=_urlunparse,
|
---|
| 968 | )
|
---|
| 969 |
|
---|
| 970 | def ParseFileEx(file, base_uri,
|
---|
| 971 | select_default=False,
|
---|
| 972 | form_parser_class=FormParser,
|
---|
| 973 | request_class=urllib2.Request,
|
---|
| 974 | entitydefs=None,
|
---|
| 975 | encoding=DEFAULT_ENCODING,
|
---|
| 976 |
|
---|
| 977 | # private
|
---|
| 978 | _urljoin=urlparse.urljoin,
|
---|
| 979 | _urlparse=urlparse.urlparse,
|
---|
| 980 | _urlunparse=urlparse.urlunparse,
|
---|
| 981 | ):
|
---|
| 982 | """Identical to ParseFile, except that:
|
---|
| 983 |
|
---|
| 984 | 1. The returned list contains an extra item. The first form in the list
|
---|
| 985 | contains all controls not contained in any FORM element.
|
---|
| 986 |
|
---|
| 987 | 2. The arguments ignore_errors and backwards_compat have been removed.
|
---|
| 988 |
|
---|
| 989 | 3. Backwards-compatibility mode (backwards_compat=True) is not available.
|
---|
| 990 | """
|
---|
| 991 | return _ParseFileEx(file, base_uri,
|
---|
| 992 | select_default,
|
---|
| 993 | False,
|
---|
| 994 | form_parser_class,
|
---|
| 995 | request_class,
|
---|
| 996 | entitydefs,
|
---|
| 997 | False,
|
---|
| 998 | encoding,
|
---|
| 999 | _urljoin=_urljoin,
|
---|
| 1000 | _urlparse=_urlparse,
|
---|
| 1001 | _urlunparse=_urlunparse,
|
---|
| 1002 | )
|
---|
| 1003 |
|
---|
| 1004 | def ParseResponse(response, *args, **kwds):
|
---|
| 1005 | """Parse HTTP response and return a list of HTMLForm instances.
|
---|
| 1006 |
|
---|
| 1007 | The return value of urllib2.urlopen can be conveniently passed to this
|
---|
| 1008 | function as the response parameter.
|
---|
| 1009 |
|
---|
| 1010 | ClientForm.ParseError is raised on parse errors.
|
---|
| 1011 |
|
---|
| 1012 | response: file-like object (supporting read() method) with a method
|
---|
| 1013 | geturl(), returning the URI of the HTTP response
|
---|
| 1014 | select_default: for multiple-selection SELECT controls and RADIO controls,
|
---|
| 1015 | pick the first item as the default if none are selected in the HTML
|
---|
| 1016 | form_parser_class: class to instantiate and use to pass
|
---|
| 1017 | request_class: class to return from .click() method (default is
|
---|
| 1018 | urllib2.Request)
|
---|
| 1019 | entitydefs: mapping like {"&": "&", ...} containing HTML entity
|
---|
| 1020 | definitions (a sensible default is used)
|
---|
| 1021 | encoding: character encoding used for encoding numeric character references
|
---|
| 1022 | when matching link text. ClientForm does not attempt to find the encoding
|
---|
| 1023 | in a META HTTP-EQUIV attribute in the document itself (mechanize, for
|
---|
| 1024 | example, does do that and will pass the correct value to ClientForm using
|
---|
| 1025 | this parameter).
|
---|
| 1026 |
|
---|
| 1027 | backwards_compat: boolean that determines whether the returned HTMLForm
|
---|
| 1028 | objects are backwards-compatible with old code. If backwards_compat is
|
---|
| 1029 | true:
|
---|
| 1030 |
|
---|
| 1031 | - ClientForm 0.1 code will continue to work as before.
|
---|
| 1032 |
|
---|
| 1033 | - Label searches that do not specify a nr (number or count) will always
|
---|
| 1034 | get the first match, even if other controls match. If
|
---|
| 1035 | backwards_compat is False, label searches that have ambiguous results
|
---|
| 1036 | will raise an AmbiguityError.
|
---|
| 1037 |
|
---|
| 1038 | - Item label matching is done by strict string comparison rather than
|
---|
| 1039 | substring matching.
|
---|
| 1040 |
|
---|
| 1041 | - De-selecting individual list items is allowed even if the Item is
|
---|
| 1042 | disabled.
|
---|
| 1043 |
|
---|
| 1044 | The backwards_compat argument will be deprecated in a future release.
|
---|
| 1045 |
|
---|
| 1046 | Pass a true value for select_default if you want the behaviour specified by
|
---|
| 1047 | RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
|
---|
| 1048 | RADIO or multiple-selection SELECT control if none were selected in the
|
---|
| 1049 | HTML. Most browsers (including Microsoft Internet Explorer (IE) and
|
---|
| 1050 | Netscape Navigator) instead leave all items unselected in these cases. The
|
---|
| 1051 | W3C HTML 4.0 standard leaves this behaviour undefined in the case of
|
---|
| 1052 | multiple-selection SELECT controls, but insists that at least one RADIO
|
---|
| 1053 | button should be checked at all times, in contradiction to browser
|
---|
| 1054 | behaviour.
|
---|
| 1055 |
|
---|
| 1056 | There is a choice of parsers. ClientForm.XHTMLCompatibleFormParser (uses
|
---|
| 1057 | HTMLParser.HTMLParser) works best for XHTML, ClientForm.FormParser (uses
|
---|
| 1058 | sgmllib.SGMLParser) (the default) works better for ordinary grubby HTML.
|
---|
| 1059 | Note that HTMLParser is only available in Python 2.2 and later. You can
|
---|
| 1060 | pass your own class in here as a hack to work around bad HTML, but at your
|
---|
| 1061 | own risk: there is no well-defined interface.
|
---|
| 1062 |
|
---|
| 1063 | """
|
---|
| 1064 | return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:]
|
---|
| 1065 |
|
---|
| 1066 | def ParseFile(file, base_uri, *args, **kwds):
|
---|
| 1067 | """Parse HTML and return a list of HTMLForm instances.
|
---|
| 1068 |
|
---|
| 1069 | ClientForm.ParseError is raised on parse errors.
|
---|
| 1070 |
|
---|
| 1071 | file: file-like object (supporting read() method) containing HTML with zero
|
---|
| 1072 | or more forms to be parsed
|
---|
| 1073 | base_uri: the URI of the document (note that the base URI used to submit
|
---|
| 1074 | the form will be that given in the BASE element if present, not that of
|
---|
| 1075 | the document)
|
---|
| 1076 |
|
---|
| 1077 | For the other arguments and further details, see ParseResponse.__doc__.
|
---|
| 1078 |
|
---|
| 1079 | """
|
---|
| 1080 | return _ParseFileEx(file, base_uri, *args, **kwds)[1:]
|
---|
| 1081 |
|
---|
| 1082 | def _ParseFileEx(file, base_uri,
|
---|
| 1083 | select_default=False,
|
---|
| 1084 | ignore_errors=False,
|
---|
| 1085 | form_parser_class=FormParser,
|
---|
| 1086 | request_class=urllib2.Request,
|
---|
| 1087 | entitydefs=None,
|
---|
| 1088 | backwards_compat=True,
|
---|
| 1089 | encoding=DEFAULT_ENCODING,
|
---|
| 1090 | _urljoin=urlparse.urljoin,
|
---|
| 1091 | _urlparse=urlparse.urlparse,
|
---|
| 1092 | _urlunparse=urlparse.urlunparse,
|
---|
| 1093 | ):
|
---|
| 1094 | if backwards_compat:
|
---|
| 1095 | deprecation("operating in backwards-compatibility mode", 1)
|
---|
| 1096 | fp = form_parser_class(entitydefs, encoding)
|
---|
| 1097 | while 1:
|
---|
| 1098 | data = file.read(CHUNK)
|
---|
| 1099 | try:
|
---|
| 1100 | fp.feed(data)
|
---|
| 1101 | except ParseError, e:
|
---|
| 1102 | e.base_uri = base_uri
|
---|
| 1103 | raise
|
---|
| 1104 | if len(data) != CHUNK: break
|
---|
| 1105 | fp.close()
|
---|
| 1106 | if fp.base is not None:
|
---|
| 1107 | # HTML BASE element takes precedence over document URI
|
---|
| 1108 | base_uri = fp.base
|
---|
| 1109 | labels = [] # Label(label) for label in fp.labels]
|
---|
| 1110 | id_to_labels = {}
|
---|
| 1111 | for l in fp.labels:
|
---|
| 1112 | label = Label(l)
|
---|
| 1113 | labels.append(label)
|
---|
| 1114 | for_id = l["for"]
|
---|
| 1115 | coll = id_to_labels.get(for_id)
|
---|
| 1116 | if coll is None:
|
---|
| 1117 | id_to_labels[for_id] = [label]
|
---|
| 1118 | else:
|
---|
| 1119 | coll.append(label)
|
---|
| 1120 | forms = []
|
---|
| 1121 | for (name, action, method, enctype), attrs, controls in fp.forms:
|
---|
| 1122 | if action is None:
|
---|
| 1123 | action = base_uri
|
---|
| 1124 | else:
|
---|
| 1125 | action = _urljoin(base_uri, action)
|
---|
| 1126 | # would be nice to make HTMLForm class (form builder) pluggable
|
---|
| 1127 | form = HTMLForm(
|
---|
| 1128 | action, method, enctype, name, attrs, request_class,
|
---|
| 1129 | forms, labels, id_to_labels, backwards_compat)
|
---|
| 1130 | form._urlparse = _urlparse
|
---|
| 1131 | form._urlunparse = _urlunparse
|
---|
| 1132 | for ii in range(len(controls)):
|
---|
| 1133 | type, name, attrs = controls[ii]
|
---|
| 1134 | # index=ii*10 allows ImageControl to return multiple ordered pairs
|
---|
| 1135 | form.new_control(
|
---|
| 1136 | type, name, attrs, select_default=select_default, index=ii * 10)
|
---|
| 1137 | forms.append(form)
|
---|
| 1138 | for form in forms:
|
---|
| 1139 | form.fixup()
|
---|
| 1140 | return forms
|
---|
| 1141 |
|
---|
| 1142 |
|
---|
| 1143 | class Label:
|
---|
| 1144 | def __init__(self, attrs):
|
---|
| 1145 | self.id = attrs.get("for")
|
---|
| 1146 | self._text = attrs.get("__text").strip()
|
---|
| 1147 | self._ctext = compress_text(self._text)
|
---|
| 1148 | self.attrs = attrs
|
---|
| 1149 | self._backwards_compat = False # maintained by HTMLForm
|
---|
| 1150 |
|
---|
| 1151 | def __getattr__(self, name):
|
---|
| 1152 | if name == "text":
|
---|
| 1153 | if self._backwards_compat:
|
---|
| 1154 | return self._text
|
---|
| 1155 | else:
|
---|
| 1156 | return self._ctext
|
---|
| 1157 | return getattr(Label, name)
|
---|
| 1158 |
|
---|
| 1159 | def __setattr__(self, name, value):
|
---|
| 1160 | if name == "text":
|
---|
| 1161 | # don't see any need for this, so make it read-only
|
---|
| 1162 | raise AttributeError("text attribute is read-only")
|
---|
| 1163 | self.__dict__[name] = value
|
---|
| 1164 |
|
---|
| 1165 | def __str__(self):
|
---|
| 1166 | return "<Label(id=%r, text=%r)>" % (self.id, self.text)
|
---|
| 1167 |
|
---|
| 1168 |
|
---|
| 1169 | def _get_label(attrs):
|
---|
| 1170 | text = attrs.get("__label")
|
---|
| 1171 | if text is not None:
|
---|
| 1172 | return Label(text)
|
---|
| 1173 | else:
|
---|
| 1174 | return None
|
---|
| 1175 |
|
---|
| 1176 | class Control:
|
---|
| 1177 | """An HTML form control.
|
---|
| 1178 |
|
---|
| 1179 | An HTMLForm contains a sequence of Controls. The Controls in an HTMLForm
|
---|
| 1180 | are accessed using the HTMLForm.find_control method or the
|
---|
| 1181 | HTMLForm.controls attribute.
|
---|
| 1182 |
|
---|
| 1183 | Control instances are usually constructed using the ParseFile /
|
---|
| 1184 | ParseResponse functions. If you use those functions, you can ignore the
|
---|
| 1185 | rest of this paragraph. A Control is only properly initialised after the
|
---|
| 1186 | fixup method has been called. In fact, this is only strictly necessary for
|
---|
| 1187 | ListControl instances. This is necessary because ListControls are built up
|
---|
| 1188 | from ListControls each containing only a single item, and their initial
|
---|
| 1189 | value(s) can only be known after the sequence is complete.
|
---|
| 1190 |
|
---|
| 1191 | The types and values that are acceptable for assignment to the value
|
---|
| 1192 | attribute are defined by subclasses.
|
---|
| 1193 |
|
---|
| 1194 | If the disabled attribute is true, this represents the state typically
|
---|
| 1195 | represented by browsers by 'greying out' a control. If the disabled
|
---|
| 1196 | attribute is true, the Control will raise AttributeError if an attempt is
|
---|
| 1197 | made to change its value. In addition, the control will not be considered
|
---|
| 1198 | 'successful' as defined by the W3C HTML 4 standard -- ie. it will
|
---|
| 1199 | contribute no data to the return value of the HTMLForm.click* methods. To
|
---|
| 1200 | enable a control, set the disabled attribute to a false value.
|
---|
| 1201 |
|
---|
| 1202 | If the readonly attribute is true, the Control will raise AttributeError if
|
---|
| 1203 | an attempt is made to change its value. To make a control writable, set
|
---|
| 1204 | the readonly attribute to a false value.
|
---|
| 1205 |
|
---|
| 1206 | All controls have the disabled and readonly attributes, not only those that
|
---|
| 1207 | may have the HTML attributes of the same names.
|
---|
| 1208 |
|
---|
| 1209 | On assignment to the value attribute, the following exceptions are raised:
|
---|
| 1210 | TypeError, AttributeError (if the value attribute should not be assigned
|
---|
| 1211 | to, because the control is disabled, for example) and ValueError.
|
---|
| 1212 |
|
---|
| 1213 | If the name or value attributes are None, or the value is an empty list, or
|
---|
| 1214 | if the control is disabled, the control is not successful.
|
---|
| 1215 |
|
---|
| 1216 | Public attributes:
|
---|
| 1217 |
|
---|
| 1218 | type: string describing type of control (see the keys of the
|
---|
| 1219 | HTMLForm.type2class dictionary for the allowable values) (readonly)
|
---|
| 1220 | name: name of control (readonly)
|
---|
| 1221 | value: current value of control (subclasses may allow a single value, a
|
---|
| 1222 | sequence of values, or either)
|
---|
| 1223 | disabled: disabled state
|
---|
| 1224 | readonly: readonly state
|
---|
| 1225 | id: value of id HTML attribute
|
---|
| 1226 |
|
---|
| 1227 | """
|
---|
| 1228 | def __init__(self, type, name, attrs, index=None):
|
---|
| 1229 | """
|
---|
| 1230 | type: string describing type of control (see the keys of the
|
---|
| 1231 | HTMLForm.type2class dictionary for the allowable values)
|
---|
| 1232 | name: control name
|
---|
| 1233 | attrs: HTML attributes of control's HTML element
|
---|
| 1234 |
|
---|
| 1235 | """
|
---|
| 1236 | raise NotImplementedError()
|
---|
| 1237 |
|
---|
| 1238 | def add_to_form(self, form):
|
---|
| 1239 | self._form = form
|
---|
| 1240 | form.controls.append(self)
|
---|
| 1241 |
|
---|
| 1242 | def fixup(self):
|
---|
| 1243 | pass
|
---|
| 1244 |
|
---|
| 1245 | def is_of_kind(self, kind):
|
---|
| 1246 | raise NotImplementedError()
|
---|
| 1247 |
|
---|
| 1248 | def clear(self):
|
---|
| 1249 | raise NotImplementedError()
|
---|
| 1250 |
|
---|
| 1251 | def __getattr__(self, name): raise NotImplementedError()
|
---|
| 1252 | def __setattr__(self, name, value): raise NotImplementedError()
|
---|
| 1253 |
|
---|
| 1254 | def pairs(self):
|
---|
| 1255 | """Return list of (key, value) pairs suitable for passing to urlencode.
|
---|
| 1256 | """
|
---|
| 1257 | return [(k, v) for (i, k, v) in self._totally_ordered_pairs()]
|
---|
| 1258 |
|
---|
| 1259 | def _totally_ordered_pairs(self):
|
---|
| 1260 | """Return list of (key, value, index) tuples.
|
---|
| 1261 |
|
---|
| 1262 | Like pairs, but allows preserving correct ordering even where several
|
---|
| 1263 | controls are involved.
|
---|
| 1264 |
|
---|
| 1265 | """
|
---|
| 1266 | raise NotImplementedError()
|
---|
| 1267 |
|
---|
| 1268 | def _write_mime_data(self, mw, name, value):
|
---|
| 1269 | """Write data for a subitem of this control to a MimeWriter."""
|
---|
| 1270 | # called by HTMLForm
|
---|
| 1271 | mw2 = mw.nextpart()
|
---|
| 1272 | mw2.addheader("Content-Disposition",
|
---|
| 1273 | 'form-data; name="%s"' % name, 1)
|
---|
| 1274 | f = mw2.startbody(prefix=0)
|
---|
| 1275 | f.write(value)
|
---|
| 1276 |
|
---|
| 1277 | def __str__(self):
|
---|
| 1278 | raise NotImplementedError()
|
---|
| 1279 |
|
---|
| 1280 | def get_labels(self):
|
---|
| 1281 | """Return all labels (Label instances) for this control.
|
---|
| 1282 |
|
---|
| 1283 | If the control was surrounded by a <label> tag, that will be the first
|
---|
| 1284 | label; all other labels, connected by 'for' and 'id', are in the order
|
---|
| 1285 | that appear in the HTML.
|
---|
| 1286 |
|
---|
| 1287 | """
|
---|
| 1288 | res = []
|
---|
| 1289 | if self._label:
|
---|
| 1290 | res.append(self._label)
|
---|
| 1291 | if self.id:
|
---|
| 1292 | res.extend(self._form._id_to_labels.get(self.id, ()))
|
---|
| 1293 | return res
|
---|
| 1294 |
|
---|
| 1295 |
|
---|
| 1296 | #---------------------------------------------------
|
---|
| 1297 | class ScalarControl(Control):
|
---|
| 1298 | """Control whose value is not restricted to one of a prescribed set.
|
---|
| 1299 |
|
---|
| 1300 | Some ScalarControls don't accept any value attribute. Otherwise, takes a
|
---|
| 1301 | single value, which must be string-like.
|
---|
| 1302 |
|
---|
| 1303 | Additional read-only public attribute:
|
---|
| 1304 |
|
---|
| 1305 | attrs: dictionary mapping the names of original HTML attributes of the
|
---|
| 1306 | control to their values
|
---|
| 1307 |
|
---|
| 1308 | """
|
---|
| 1309 | def __init__(self, type, name, attrs, index=None):
|
---|
| 1310 | self._index = index
|
---|
| 1311 | self._label = _get_label(attrs)
|
---|
| 1312 | self.__dict__["type"] = type.lower()
|
---|
| 1313 | self.__dict__["name"] = name
|
---|
| 1314 | self._value = attrs.get("value")
|
---|
| 1315 | self.disabled = attrs.has_key("disabled")
|
---|
| 1316 | self.readonly = attrs.has_key("readonly")
|
---|
| 1317 | self.id = attrs.get("id")
|
---|
| 1318 |
|
---|
| 1319 | self.attrs = attrs.copy()
|
---|
| 1320 |
|
---|
| 1321 | self._clicked = False
|
---|
| 1322 |
|
---|
| 1323 | self._urlparse = urlparse.urlparse
|
---|
| 1324 | self._urlunparse = urlparse.urlunparse
|
---|
| 1325 |
|
---|
| 1326 | def __getattr__(self, name):
|
---|
| 1327 | if name == "value":
|
---|
| 1328 | return self.__dict__["_value"]
|
---|
| 1329 | else:
|
---|
| 1330 | raise AttributeError("%s instance has no attribute '%s'" %
|
---|
| 1331 | (self.__class__.__name__, name))
|
---|
| 1332 |
|
---|
| 1333 | def __setattr__(self, name, value):
|
---|
| 1334 | if name == "value":
|
---|
| 1335 | if not isstringlike(value):
|
---|
| 1336 | raise TypeError("must assign a string")
|
---|
| 1337 | elif self.readonly:
|
---|
| 1338 | raise AttributeError("control '%s' is readonly" % self.name)
|
---|
| 1339 | elif self.disabled:
|
---|
| 1340 | raise AttributeError("control '%s' is disabled" % self.name)
|
---|
| 1341 | self.__dict__["_value"] = value
|
---|
| 1342 | elif name in ("name", "type"):
|
---|
| 1343 | raise AttributeError("%s attribute is readonly" % name)
|
---|
| 1344 | else:
|
---|
| 1345 | self.__dict__[name] = value
|
---|
| 1346 |
|
---|
| 1347 | def _totally_ordered_pairs(self):
|
---|
| 1348 | name = self.name
|
---|
| 1349 | value = self.value
|
---|
| 1350 | if name is None or value is None or self.disabled:
|
---|
| 1351 | return []
|
---|
| 1352 | return [(self._index, name, value)]
|
---|
| 1353 |
|
---|
| 1354 | def clear(self):
|
---|
| 1355 | if self.readonly:
|
---|
| 1356 | raise AttributeError("control '%s' is readonly" % self.name)
|
---|
| 1357 | self.__dict__["_value"] = None
|
---|
| 1358 |
|
---|
| 1359 | def __str__(self):
|
---|
| 1360 | name = self.name
|
---|
| 1361 | value = self.value
|
---|
| 1362 | if name is None: name = "<None>"
|
---|
| 1363 | if value is None: value = "<None>"
|
---|
| 1364 |
|
---|
| 1365 | infos = []
|
---|
| 1366 | if self.disabled: infos.append("disabled")
|
---|
| 1367 | if self.readonly: infos.append("readonly")
|
---|
| 1368 | info = ", ".join(infos)
|
---|
| 1369 | if info: info = " (%s)" % info
|
---|
| 1370 |
|
---|
| 1371 | return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
|
---|
| 1372 |
|
---|
| 1373 |
|
---|
| 1374 | #---------------------------------------------------
|
---|
| 1375 | class TextControl(ScalarControl):
|
---|
| 1376 | """Textual input control.
|
---|
| 1377 |
|
---|
| 1378 | Covers:
|
---|
| 1379 |
|
---|
| 1380 | INPUT/TEXT
|
---|
| 1381 | INPUT/PASSWORD
|
---|
| 1382 | INPUT/HIDDEN
|
---|
| 1383 | TEXTAREA
|
---|
| 1384 |
|
---|
| 1385 | """
|
---|
| 1386 | def __init__(self, type, name, attrs, index=None):
|
---|
| 1387 | ScalarControl.__init__(self, type, name, attrs, index)
|
---|
| 1388 | if self.type == "hidden": self.readonly = True
|
---|
| 1389 | if self._value is None:
|
---|
| 1390 | self._value = ""
|
---|
| 1391 |
|
---|
| 1392 | def is_of_kind(self, kind): return kind == "text"
|
---|
| 1393 |
|
---|
| 1394 | #---------------------------------------------------
|
---|
| 1395 | class FileControl(ScalarControl):
|
---|
| 1396 | """File upload with INPUT TYPE=FILE.
|
---|
| 1397 |
|
---|
| 1398 | The value attribute of a FileControl is always None. Use add_file instead.
|
---|
| 1399 |
|
---|
| 1400 | Additional public method: add_file
|
---|
| 1401 |
|
---|
| 1402 | """
|
---|
| 1403 |
|
---|
| 1404 | def __init__(self, type, name, attrs, index=None):
|
---|
| 1405 | ScalarControl.__init__(self, type, name, attrs, index)
|
---|
| 1406 | self._value = None
|
---|
| 1407 | self._upload_data = []
|
---|
| 1408 |
|
---|
| 1409 | def is_of_kind(self, kind): return kind == "file"
|
---|
| 1410 |
|
---|
| 1411 | def clear(self):
|
---|
| 1412 | if self.readonly:
|
---|
| 1413 | raise AttributeError("control '%s' is readonly" % self.name)
|
---|
| 1414 | self._upload_data = []
|
---|
| 1415 |
|
---|
| 1416 | def __setattr__(self, name, value):
|
---|
| 1417 | if name in ("value", "name", "type"):
|
---|
| 1418 | raise AttributeError("%s attribute is readonly" % name)
|
---|
| 1419 | else:
|
---|
| 1420 | self.__dict__[name] = value
|
---|
| 1421 |
|
---|
| 1422 | def add_file(self, file_object, content_type=None, filename=None):
|
---|
| 1423 | if not hasattr(file_object, "read"):
|
---|
| 1424 | raise TypeError("file-like object must have read method")
|
---|
| 1425 | if content_type is not None and not isstringlike(content_type):
|
---|
| 1426 | raise TypeError("content type must be None or string-like")
|
---|
| 1427 | if filename is not None and not isstringlike(filename):
|
---|
| 1428 | raise TypeError("filename must be None or string-like")
|
---|
| 1429 | if content_type is None:
|
---|
| 1430 | content_type = "application/octet-stream"
|
---|
| 1431 | self._upload_data.append((file_object, content_type, filename))
|
---|
| 1432 |
|
---|
| 1433 | def _totally_ordered_pairs(self):
|
---|
| 1434 | # XXX should it be successful even if unnamed?
|
---|
| 1435 | if self.name is None or self.disabled:
|
---|
| 1436 | return []
|
---|
| 1437 | return [(self._index, self.name, "")]
|
---|
| 1438 |
|
---|
| 1439 | def _write_mime_data(self, mw, _name, _value):
|
---|
| 1440 | # called by HTMLForm
|
---|
| 1441 | # assert _name == self.name and _value == ''
|
---|
| 1442 | if len(self._upload_data) < 2:
|
---|
| 1443 | if len(self._upload_data) == 0:
|
---|
| 1444 | file_object = StringIO()
|
---|
| 1445 | content_type = "application/octet-stream"
|
---|
| 1446 | filename = ""
|
---|
| 1447 | else:
|
---|
| 1448 | file_object, content_type, filename = self._upload_data[0]
|
---|
| 1449 | if filename is None:
|
---|
| 1450 | filename = ""
|
---|
| 1451 | mw2 = mw.nextpart()
|
---|
| 1452 | fn_part = '; filename="%s"' % filename
|
---|
| 1453 | disp = 'form-data; name="%s"%s' % (self.name, fn_part)
|
---|
| 1454 | mw2.addheader("Content-Disposition", disp, prefix=1)
|
---|
| 1455 | fh = mw2.startbody(content_type, prefix=0)
|
---|
| 1456 | fh.write(file_object.read())
|
---|
| 1457 | else:
|
---|
| 1458 | # multiple files
|
---|
| 1459 | mw2 = mw.nextpart()
|
---|
| 1460 | disp = 'form-data; name="%s"' % self.name
|
---|
| 1461 | mw2.addheader("Content-Disposition", disp, prefix=1)
|
---|
| 1462 | fh = mw2.startmultipartbody("mixed", prefix=0)
|
---|
| 1463 | for file_object, content_type, filename in self._upload_data:
|
---|
| 1464 | mw3 = mw2.nextpart()
|
---|
| 1465 | if filename is None:
|
---|
| 1466 | filename = ""
|
---|
| 1467 | fn_part = '; filename="%s"' % filename
|
---|
| 1468 | disp = "file%s" % fn_part
|
---|
| 1469 | mw3.addheader("Content-Disposition", disp, prefix=1)
|
---|
| 1470 | fh2 = mw3.startbody(content_type, prefix=0)
|
---|
| 1471 | fh2.write(file_object.read())
|
---|
| 1472 | mw2.lastpart()
|
---|
| 1473 |
|
---|
| 1474 | def __str__(self):
|
---|
| 1475 | name = self.name
|
---|
| 1476 | if name is None: name = "<None>"
|
---|
| 1477 |
|
---|
| 1478 | if not self._upload_data:
|
---|
| 1479 | value = "<No files added>"
|
---|
| 1480 | else:
|
---|
| 1481 | value = []
|
---|
| 1482 | for file, ctype, filename in self._upload_data:
|
---|
| 1483 | if filename is None:
|
---|
| 1484 | value.append("<Unnamed file>")
|
---|
| 1485 | else:
|
---|
| 1486 | value.append(filename)
|
---|
| 1487 | value = ", ".join(value)
|
---|
| 1488 |
|
---|
| 1489 | info = []
|
---|
| 1490 | if self.disabled: info.append("disabled")
|
---|
| 1491 | if self.readonly: info.append("readonly")
|
---|
| 1492 | info = ", ".join(info)
|
---|
| 1493 | if info: info = " (%s)" % info
|
---|
| 1494 |
|
---|
| 1495 | return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
|
---|
| 1496 |
|
---|
| 1497 |
|
---|
| 1498 | #---------------------------------------------------
|
---|
| 1499 | class IsindexControl(ScalarControl):
|
---|
| 1500 | """ISINDEX control.
|
---|
| 1501 |
|
---|
| 1502 | ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really
|
---|
| 1503 | part of regular HTML forms at all, and predates it. You're only allowed
|
---|
| 1504 | one ISINDEX per HTML document. ISINDEX and regular form submission are
|
---|
| 1505 | mutually exclusive -- either submit a form, or the ISINDEX.
|
---|
| 1506 |
|
---|
| 1507 | Having said this, since ISINDEX controls may appear in forms (which is
|
---|
| 1508 | probably bad HTML), ParseFile / ParseResponse will include them in the
|
---|
| 1509 | HTMLForm instances it returns. You can set the ISINDEX's value, as with
|
---|
| 1510 | any other control (but note that ISINDEX controls have no name, so you'll
|
---|
| 1511 | need to use the type argument of set_value!). When you submit the form,
|
---|
| 1512 | the ISINDEX will not be successful (ie., no data will get returned to the
|
---|
| 1513 | server as a result of its presence), unless you click on the ISINDEX
|
---|
| 1514 | control, in which case the ISINDEX gets submitted instead of the form:
|
---|
| 1515 |
|
---|
| 1516 | form.set_value("my isindex value", type="isindex")
|
---|
| 1517 | urllib2.urlopen(form.click(type="isindex"))
|
---|
| 1518 |
|
---|
| 1519 | ISINDEX elements outside of FORMs are ignored. If you want to submit one
|
---|
| 1520 | by hand, do it like so:
|
---|
| 1521 |
|
---|
| 1522 | url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
|
---|
| 1523 | result = urllib2.urlopen(url)
|
---|
| 1524 |
|
---|
| 1525 | """
|
---|
| 1526 | def __init__(self, type, name, attrs, index=None):
|
---|
| 1527 | ScalarControl.__init__(self, type, name, attrs, index)
|
---|
| 1528 | if self._value is None:
|
---|
| 1529 | self._value = ""
|
---|
| 1530 |
|
---|
| 1531 | def is_of_kind(self, kind): return kind in ["text", "clickable"]
|
---|
| 1532 |
|
---|
| 1533 | def _totally_ordered_pairs(self):
|
---|
| 1534 | return []
|
---|
| 1535 |
|
---|
| 1536 | def _click(self, form, coord, return_type, request_class=urllib2.Request):
|
---|
| 1537 | # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
|
---|
| 1538 | # want "bar+baz".
|
---|
| 1539 | # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
|
---|
| 1540 | # deprecated in 4.01, but it should still say how to submit it).
|
---|
| 1541 | # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
|
---|
| 1542 | parts = self._urlparse(form.action)
|
---|
| 1543 | rest, (query, frag) = parts[:-2], parts[-2:]
|
---|
| 1544 | parts = rest + (urllib.quote_plus(self.value), None)
|
---|
| 1545 | url = self._urlunparse(parts)
|
---|
| 1546 | req_data = url, None, []
|
---|
| 1547 |
|
---|
| 1548 | if return_type == "pairs":
|
---|
| 1549 | return []
|
---|
| 1550 | elif return_type == "request_data":
|
---|
| 1551 | return req_data
|
---|
| 1552 | else:
|
---|
| 1553 | return request_class(url)
|
---|
| 1554 |
|
---|
| 1555 | def __str__(self):
|
---|
| 1556 | value = self.value
|
---|
| 1557 | if value is None: value = "<None>"
|
---|
| 1558 |
|
---|
| 1559 | infos = []
|
---|
| 1560 | if self.disabled: infos.append("disabled")
|
---|
| 1561 | if self.readonly: infos.append("readonly")
|
---|
| 1562 | info = ", ".join(infos)
|
---|
| 1563 | if info: info = " (%s)" % info
|
---|
| 1564 |
|
---|
| 1565 | return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
|
---|
| 1566 |
|
---|
| 1567 |
|
---|
| 1568 | #---------------------------------------------------
|
---|
| 1569 | class IgnoreControl(ScalarControl):
|
---|
| 1570 | """Control that we're not interested in.
|
---|
| 1571 |
|
---|
| 1572 | Covers:
|
---|
| 1573 |
|
---|
| 1574 | INPUT/RESET
|
---|
| 1575 | BUTTON/RESET
|
---|
| 1576 | INPUT/BUTTON
|
---|
| 1577 | BUTTON/BUTTON
|
---|
| 1578 |
|
---|
| 1579 | These controls are always unsuccessful, in the terminology of HTML 4 (ie.
|
---|
| 1580 | they never require any information to be returned to the server).
|
---|
| 1581 |
|
---|
| 1582 | BUTTON/BUTTON is used to generate events for script embedded in HTML.
|
---|
| 1583 |
|
---|
| 1584 | The value attribute of IgnoreControl is always None.
|
---|
| 1585 |
|
---|
| 1586 | """
|
---|
| 1587 | def __init__(self, type, name, attrs, index=None):
|
---|
| 1588 | ScalarControl.__init__(self, type, name, attrs, index)
|
---|
| 1589 | self._value = None
|
---|
| 1590 |
|
---|
| 1591 | def is_of_kind(self, kind): return False
|
---|
| 1592 |
|
---|
| 1593 | def __setattr__(self, name, value):
|
---|
| 1594 | if name == "value":
|
---|
| 1595 | raise AttributeError(
|
---|
| 1596 | "control '%s' is ignored, hence read-only" % self.name)
|
---|
| 1597 | elif name in ("name", "type"):
|
---|
| 1598 | raise AttributeError("%s attribute is readonly" % name)
|
---|
| 1599 | else:
|
---|
| 1600 | self.__dict__[name] = value
|
---|
| 1601 |
|
---|
| 1602 |
|
---|
| 1603 | #---------------------------------------------------
|
---|
| 1604 | # ListControls
|
---|
| 1605 |
|
---|
| 1606 | # helpers and subsidiary classes
|
---|
| 1607 |
|
---|
| 1608 | class Item:
|
---|
| 1609 | def __init__(self, control, attrs, index=None):
|
---|
| 1610 | label = _get_label(attrs)
|
---|
| 1611 | self.__dict__.update({
|
---|
| 1612 | "name": attrs["value"],
|
---|
| 1613 | "_labels": label and [label] or [],
|
---|
| 1614 | "attrs": attrs,
|
---|
| 1615 | "_control": control,
|
---|
| 1616 | "disabled": attrs.has_key("disabled"),
|
---|
| 1617 | "_selected": False,
|
---|
| 1618 | "id": attrs.get("id"),
|
---|
| 1619 | "_index": index,
|
---|
| 1620 | })
|
---|
| 1621 | control.items.append(self)
|
---|
| 1622 |
|
---|
| 1623 | def get_labels(self):
|
---|
| 1624 | """Return all labels (Label instances) for this item.
|
---|
| 1625 |
|
---|
| 1626 | For items that represent radio buttons or checkboxes, if the item was
|
---|
| 1627 | surrounded by a <label> tag, that will be the first label; all other
|
---|
| 1628 | labels, connected by 'for' and 'id', are in the order that appear in
|
---|
| 1629 | the HTML.
|
---|
| 1630 |
|
---|
| 1631 | For items that represent select options, if the option had a label
|
---|
| 1632 | attribute, that will be the first label. If the option has contents
|
---|
| 1633 | (text within the option tags) and it is not the same as the label
|
---|
| 1634 | attribute (if any), that will be a label. There is nothing in the
|
---|
| 1635 | spec to my knowledge that makes an option with an id unable to be the
|
---|
| 1636 | target of a label's for attribute, so those are included, if any, for
|
---|
| 1637 | the sake of consistency and completeness.
|
---|
| 1638 |
|
---|
| 1639 | """
|
---|
| 1640 | res = []
|
---|
| 1641 | res.extend(self._labels)
|
---|
| 1642 | if self.id:
|
---|
| 1643 | res.extend(self._control._form._id_to_labels.get(self.id, ()))
|
---|
| 1644 | return res
|
---|
| 1645 |
|
---|
| 1646 | def __getattr__(self, name):
|
---|
| 1647 | if name == "selected":
|
---|
| 1648 | return self._selected
|
---|
| 1649 | raise AttributeError(name)
|
---|
| 1650 |
|
---|
| 1651 | def __setattr__(self, name, value):
|
---|
| 1652 | if name == "selected":
|
---|
| 1653 | self._control._set_selected_state(self, value)
|
---|
| 1654 | elif name == "disabled":
|
---|
| 1655 | self.__dict__["disabled"] = bool(value)
|
---|
| 1656 | else:
|
---|
| 1657 | raise AttributeError(name)
|
---|
| 1658 |
|
---|
| 1659 | def __str__(self):
|
---|
| 1660 | res = self.name
|
---|
| 1661 | if self.selected:
|
---|
| 1662 | res = "*" + res
|
---|
| 1663 | if self.disabled:
|
---|
| 1664 | res = "(%s)" % res
|
---|
| 1665 | return res
|
---|
| 1666 |
|
---|
| 1667 | def __repr__(self):
|
---|
| 1668 | # XXX appending the attrs without distinguishing them from name and id
|
---|
| 1669 | # is silly
|
---|
| 1670 | attrs = [("name", self.name), ("id", self.id)] + self.attrs.items()
|
---|
| 1671 | return "<%s %s>" % (
|
---|
| 1672 | self.__class__.__name__,
|
---|
| 1673 | " ".join(["%s=%r" % (k, v) for k, v in attrs])
|
---|
| 1674 | )
|
---|
| 1675 |
|
---|
| 1676 | def disambiguate(items, nr, **kwds):
|
---|
| 1677 | msgs = []
|
---|
| 1678 | for key, value in kwds.items():
|
---|
| 1679 | msgs.append("%s=%r" % (key, value))
|
---|
| 1680 | msg = " ".join(msgs)
|
---|
| 1681 | if not items:
|
---|
| 1682 | raise ItemNotFoundError(msg)
|
---|
| 1683 | if nr is None:
|
---|
| 1684 | if len(items) > 1:
|
---|
| 1685 | raise AmbiguityError(msg)
|
---|
| 1686 | nr = 0
|
---|
| 1687 | if len(items) <= nr:
|
---|
| 1688 | raise ItemNotFoundError(msg)
|
---|
| 1689 | return items[nr]
|
---|
| 1690 |
|
---|
| 1691 | class ListControl(Control):
|
---|
| 1692 | """Control representing a sequence of items.
|
---|
| 1693 |
|
---|
| 1694 | The value attribute of a ListControl represents the successful list items
|
---|
| 1695 | in the control. The successful list items are those that are selected and
|
---|
| 1696 | not disabled.
|
---|
| 1697 |
|
---|
| 1698 | ListControl implements both list controls that take a length-1 value
|
---|
| 1699 | (single-selection) and those that take length >1 values
|
---|
| 1700 | (multiple-selection).
|
---|
| 1701 |
|
---|
| 1702 | ListControls accept sequence values only. Some controls only accept
|
---|
| 1703 | sequences of length 0 or 1 (RADIO, and single-selection SELECT).
|
---|
| 1704 | In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes
|
---|
| 1705 | and multiple-selection SELECTs (those having the "multiple" HTML attribute)
|
---|
| 1706 | accept sequences of any length.
|
---|
| 1707 |
|
---|
| 1708 | Note the following mistake:
|
---|
| 1709 |
|
---|
| 1710 | control.value = some_value
|
---|
| 1711 | assert control.value == some_value # not necessarily true
|
---|
| 1712 |
|
---|
| 1713 | The reason for this is that the value attribute always gives the list items
|
---|
| 1714 | in the order they were listed in the HTML.
|
---|
| 1715 |
|
---|
| 1716 | ListControl items can also be referred to by their labels instead of names.
|
---|
| 1717 | Use the label argument to .get(), and the .set_value_by_label(),
|
---|
| 1718 | .get_value_by_label() methods.
|
---|
| 1719 |
|
---|
| 1720 | Note that, rather confusingly, though SELECT controls are represented in
|
---|
| 1721 | HTML by SELECT elements (which contain OPTION elements, representing
|
---|
| 1722 | individual list items), CHECKBOXes and RADIOs are not represented by *any*
|
---|
| 1723 | element. Instead, those controls are represented by a collection of INPUT
|
---|
| 1724 | elements. For example, this is a SELECT control, named "control1":
|
---|
| 1725 |
|
---|
| 1726 | <select name="control1">
|
---|
| 1727 | <option>foo</option>
|
---|
| 1728 | <option value="1">bar</option>
|
---|
| 1729 | </select>
|
---|
| 1730 |
|
---|
| 1731 | and this is a CHECKBOX control, named "control2":
|
---|
| 1732 |
|
---|
| 1733 | <input type="checkbox" name="control2" value="foo" id="cbe1">
|
---|
| 1734 | <input type="checkbox" name="control2" value="bar" id="cbe2">
|
---|
| 1735 |
|
---|
| 1736 | The id attribute of a CHECKBOX or RADIO ListControl is always that of its
|
---|
| 1737 | first element (for example, "cbe1" above).
|
---|
| 1738 |
|
---|
| 1739 |
|
---|
| 1740 | Additional read-only public attribute: multiple.
|
---|
| 1741 |
|
---|
| 1742 | """
|
---|
| 1743 |
|
---|
| 1744 | # ListControls are built up by the parser from their component items by
|
---|
| 1745 | # creating one ListControl per item, consolidating them into a single
|
---|
| 1746 | # master ListControl held by the HTMLForm:
|
---|
| 1747 |
|
---|
| 1748 | # -User calls form.new_control(...)
|
---|
| 1749 | # -Form creates Control, and calls control.add_to_form(self).
|
---|
| 1750 | # -Control looks for a Control with the same name and type in the form,
|
---|
| 1751 | # and if it finds one, merges itself with that control by calling
|
---|
| 1752 | # control.merge_control(self). The first Control added to the form, of
|
---|
| 1753 | # a particular name and type, is the only one that survives in the
|
---|
| 1754 | # form.
|
---|
| 1755 | # -Form calls control.fixup for all its controls. ListControls in the
|
---|
| 1756 | # form know they can now safely pick their default values.
|
---|
| 1757 |
|
---|
| 1758 | # To create a ListControl without an HTMLForm, use:
|
---|
| 1759 |
|
---|
| 1760 | # control.merge_control(new_control)
|
---|
| 1761 |
|
---|
| 1762 | # (actually, it's much easier just to use ParseFile)
|
---|
| 1763 |
|
---|
| 1764 | _label = None
|
---|
| 1765 |
|
---|
| 1766 | def __init__(self, type, name, attrs={}, select_default=False,
|
---|
| 1767 | called_as_base_class=False, index=None):
|
---|
| 1768 | """
|
---|
| 1769 | select_default: for RADIO and multiple-selection SELECT controls, pick
|
---|
| 1770 | the first item as the default if no 'selected' HTML attribute is
|
---|
| 1771 | present
|
---|
| 1772 |
|
---|
| 1773 | """
|
---|
| 1774 | if not called_as_base_class:
|
---|
| 1775 | raise NotImplementedError()
|
---|
| 1776 |
|
---|
| 1777 | self.__dict__["type"] = type.lower()
|
---|
| 1778 | self.__dict__["name"] = name
|
---|
| 1779 | self._value = attrs.get("value")
|
---|
| 1780 | self.disabled = False
|
---|
| 1781 | self.readonly = False
|
---|
| 1782 | self.id = attrs.get("id")
|
---|
| 1783 | self._closed = False
|
---|
| 1784 |
|
---|
| 1785 | # As Controls are merged in with .merge_control(), self.attrs will
|
---|
| 1786 | # refer to each Control in turn -- always the most recently merged
|
---|
| 1787 | # control. Each merged-in Control instance corresponds to a single
|
---|
| 1788 | # list item: see ListControl.__doc__.
|
---|
| 1789 | self.items = []
|
---|
| 1790 | self._form = None
|
---|
| 1791 |
|
---|
| 1792 | self._select_default = select_default
|
---|
| 1793 | self._clicked = False
|
---|
| 1794 |
|
---|
| 1795 | def clear(self):
|
---|
| 1796 | self.value = []
|
---|
| 1797 |
|
---|
| 1798 | def is_of_kind(self, kind):
|
---|
| 1799 | if kind == "list":
|
---|
| 1800 | return True
|
---|
| 1801 | elif kind == "multilist":
|
---|
| 1802 | return bool(self.multiple)
|
---|
| 1803 | elif kind == "singlelist":
|
---|
| 1804 | return not self.multiple
|
---|
| 1805 | else:
|
---|
| 1806 | return False
|
---|
| 1807 |
|
---|
| 1808 | def get_items(self, name=None, label=None, id=None,
|
---|
| 1809 | exclude_disabled=False):
|
---|
| 1810 | """Return matching items by name or label.
|
---|
| 1811 |
|
---|
| 1812 | For argument docs, see the docstring for .get()
|
---|
| 1813 |
|
---|
| 1814 | """
|
---|
| 1815 | if name is not None and not isstringlike(name):
|
---|
| 1816 | raise TypeError("item name must be string-like")
|
---|
| 1817 | if label is not None and not isstringlike(label):
|
---|
| 1818 | raise TypeError("item label must be string-like")
|
---|
| 1819 | if id is not None and not isstringlike(id):
|
---|
| 1820 | raise TypeError("item id must be string-like")
|
---|
| 1821 | items = [] # order is important
|
---|
| 1822 | compat = self._form.backwards_compat
|
---|
| 1823 | for o in self.items:
|
---|
| 1824 | if exclude_disabled and o.disabled:
|
---|
| 1825 | continue
|
---|
| 1826 | if name is not None and o.name != name:
|
---|
| 1827 | continue
|
---|
| 1828 | if label is not None:
|
---|
| 1829 | for l in o.get_labels():
|
---|
| 1830 | if ((compat and l.text == label) or
|
---|
| 1831 | (not compat and l.text.find(label) > -1)):
|
---|
| 1832 | break
|
---|
| 1833 | else:
|
---|
| 1834 | continue
|
---|
| 1835 | if id is not None and o.id != id:
|
---|
| 1836 | continue
|
---|
| 1837 | items.append(o)
|
---|
| 1838 | return items
|
---|
| 1839 |
|
---|
| 1840 | def get(self, name=None, label=None, id=None, nr=None,
|
---|
| 1841 | exclude_disabled=False):
|
---|
| 1842 | """Return item by name or label, disambiguating if necessary with nr.
|
---|
| 1843 |
|
---|
| 1844 | All arguments must be passed by name, with the exception of 'name',
|
---|
| 1845 | which may be used as a positional argument.
|
---|
| 1846 |
|
---|
| 1847 | If name is specified, then the item must have the indicated name.
|
---|
| 1848 |
|
---|
| 1849 | If label is specified, then the item must have a label whose
|
---|
| 1850 | whitespace-compressed, stripped, text substring-matches the indicated
|
---|
| 1851 | label string (eg. label="please choose" will match
|
---|
| 1852 | " Do please choose an item ").
|
---|
| 1853 |
|
---|
| 1854 | If id is specified, then the item must have the indicated id.
|
---|
| 1855 |
|
---|
| 1856 | nr is an optional 0-based index of the items matching the query.
|
---|
| 1857 |
|
---|
| 1858 | If nr is the default None value and more than item is found, raises
|
---|
| 1859 | AmbiguityError (unless the HTMLForm instance's backwards_compat
|
---|
| 1860 | attribute is true).
|
---|
| 1861 |
|
---|
| 1862 | If no item is found, or if items are found but nr is specified and not
|
---|
| 1863 | found, raises ItemNotFoundError.
|
---|
| 1864 |
|
---|
| 1865 | Optionally excludes disabled items.
|
---|
| 1866 |
|
---|
| 1867 | """
|
---|
| 1868 | if nr is None and self._form.backwards_compat:
|
---|
| 1869 | nr = 0 # :-/
|
---|
| 1870 | items = self.get_items(name, label, id, exclude_disabled)
|
---|
| 1871 | return disambiguate(items, nr, name=name, label=label, id=id)
|
---|
| 1872 |
|
---|
| 1873 | def _get(self, name, by_label=False, nr=None, exclude_disabled=False):
|
---|
| 1874 | # strictly for use by deprecated methods
|
---|
| 1875 | if by_label:
|
---|
| 1876 | name, label = None, name
|
---|
| 1877 | else:
|
---|
| 1878 | name, label = name, None
|
---|
| 1879 | return self.get(name, label, nr, exclude_disabled)
|
---|
| 1880 |
|
---|
| 1881 | def toggle(self, name, by_label=False, nr=None):
|
---|
| 1882 | """Deprecated: given a name or label and optional disambiguating index
|
---|
| 1883 | nr, toggle the matching item's selection.
|
---|
| 1884 |
|
---|
| 1885 | Selecting items follows the behavior described in the docstring of the
|
---|
| 1886 | 'get' method.
|
---|
| 1887 |
|
---|
| 1888 | if the item is disabled, or this control is disabled or readonly,
|
---|
| 1889 | raise AttributeError.
|
---|
| 1890 |
|
---|
| 1891 | """
|
---|
| 1892 | deprecation(
|
---|
| 1893 | "item = control.get(...); item.selected = not item.selected")
|
---|
| 1894 | o = self._get(name, by_label, nr)
|
---|
| 1895 | self._set_selected_state(o, not o.selected)
|
---|
| 1896 |
|
---|
| 1897 | def set(self, selected, name, by_label=False, nr=None):
|
---|
| 1898 | """Deprecated: given a name or label and optional disambiguating index
|
---|
| 1899 | nr, set the matching item's selection to the bool value of selected.
|
---|
| 1900 |
|
---|
| 1901 | Selecting items follows the behavior described in the docstring of the
|
---|
| 1902 | 'get' method.
|
---|
| 1903 |
|
---|
| 1904 | if the item is disabled, or this control is disabled or readonly,
|
---|
| 1905 | raise AttributeError.
|
---|
| 1906 |
|
---|
| 1907 | """
|
---|
| 1908 | deprecation(
|
---|
| 1909 | "control.get(...).selected = <boolean>")
|
---|
| 1910 | self._set_selected_state(self._get(name, by_label, nr), selected)
|
---|
| 1911 |
|
---|
| 1912 | def _set_selected_state(self, item, action):
|
---|
| 1913 | # action:
|
---|
| 1914 | # bool False: off
|
---|
| 1915 | # bool True: on
|
---|
| 1916 | if self.disabled:
|
---|
| 1917 | raise AttributeError("control '%s' is disabled" % self.name)
|
---|
| 1918 | if self.readonly:
|
---|
| 1919 | raise AttributeError("control '%s' is readonly" % self.name)
|
---|
| 1920 | action == bool(action)
|
---|
| 1921 | compat = self._form.backwards_compat
|
---|
| 1922 | if not compat and item.disabled:
|
---|
| 1923 | raise AttributeError("item is disabled")
|
---|
| 1924 | else:
|
---|
| 1925 | if compat and item.disabled and action:
|
---|
| 1926 | raise AttributeError("item is disabled")
|
---|
| 1927 | if self.multiple:
|
---|
| 1928 | item.__dict__["_selected"] = action
|
---|
| 1929 | else:
|
---|
| 1930 | if not action:
|
---|
| 1931 | item.__dict__["_selected"] = False
|
---|
| 1932 | else:
|
---|
| 1933 | for o in self.items:
|
---|
| 1934 | o.__dict__["_selected"] = False
|
---|
| 1935 | item.__dict__["_selected"] = True
|
---|
| 1936 |
|
---|
| 1937 | def toggle_single(self, by_label=None):
|
---|
| 1938 | """Deprecated: toggle the selection of the single item in this control.
|
---|
| 1939 |
|
---|
| 1940 | Raises ItemCountError if the control does not contain only one item.
|
---|
| 1941 |
|
---|
| 1942 | by_label argument is ignored, and included only for backwards
|
---|
| 1943 | compatibility.
|
---|
| 1944 |
|
---|
| 1945 | """
|
---|
| 1946 | deprecation(
|
---|
| 1947 | "control.items[0].selected = not control.items[0].selected")
|
---|
| 1948 | if len(self.items) != 1:
|
---|
| 1949 | raise ItemCountError(
|
---|
| 1950 | "'%s' is not a single-item control" % self.name)
|
---|
| 1951 | item = self.items[0]
|
---|
| 1952 | self._set_selected_state(item, not item.selected)
|
---|
| 1953 |
|
---|
| 1954 | def set_single(self, selected, by_label=None):
|
---|
| 1955 | """Deprecated: set the selection of the single item in this control.
|
---|
| 1956 |
|
---|
| 1957 | Raises ItemCountError if the control does not contain only one item.
|
---|
| 1958 |
|
---|
| 1959 | by_label argument is ignored, and included only for backwards
|
---|
| 1960 | compatibility.
|
---|
| 1961 |
|
---|
| 1962 | """
|
---|
| 1963 | deprecation(
|
---|
| 1964 | "control.items[0].selected = <boolean>")
|
---|
| 1965 | if len(self.items) != 1:
|
---|
| 1966 | raise ItemCountError(
|
---|
| 1967 | "'%s' is not a single-item control" % self.name)
|
---|
| 1968 | self._set_selected_state(self.items[0], selected)
|
---|
| 1969 |
|
---|
| 1970 | def get_item_disabled(self, name, by_label=False, nr=None):
|
---|
| 1971 | """Get disabled state of named list item in a ListControl."""
|
---|
| 1972 | deprecation(
|
---|
| 1973 | "control.get(...).disabled")
|
---|
| 1974 | return self._get(name, by_label, nr).disabled
|
---|
| 1975 |
|
---|
| 1976 | def set_item_disabled(self, disabled, name, by_label=False, nr=None):
|
---|
| 1977 | """Set disabled state of named list item in a ListControl.
|
---|
| 1978 |
|
---|
| 1979 | disabled: boolean disabled state
|
---|
| 1980 |
|
---|
| 1981 | """
|
---|
| 1982 | deprecation(
|
---|
| 1983 | "control.get(...).disabled = <boolean>")
|
---|
| 1984 | self._get(name, by_label, nr).disabled = disabled
|
---|
| 1985 |
|
---|
| 1986 | def set_all_items_disabled(self, disabled):
|
---|
| 1987 | """Set disabled state of all list items in a ListControl.
|
---|
| 1988 |
|
---|
| 1989 | disabled: boolean disabled state
|
---|
| 1990 |
|
---|
| 1991 | """
|
---|
| 1992 | for o in self.items:
|
---|
| 1993 | o.disabled = disabled
|
---|
| 1994 |
|
---|
| 1995 | def get_item_attrs(self, name, by_label=False, nr=None):
|
---|
| 1996 | """Return dictionary of HTML attributes for a single ListControl item.
|
---|
| 1997 |
|
---|
| 1998 | The HTML element types that describe list items are: OPTION for SELECT
|
---|
| 1999 | controls, INPUT for the rest. These elements have HTML attributes that
|
---|
| 2000 | you may occasionally want to know about -- for example, the "alt" HTML
|
---|
| 2001 | attribute gives a text string describing the item (graphical browsers
|
---|
| 2002 | usually display this as a tooltip).
|
---|
| 2003 |
|
---|
| 2004 | The returned dictionary maps HTML attribute names to values. The names
|
---|
| 2005 | and values are taken from the original HTML.
|
---|
| 2006 |
|
---|
| 2007 | """
|
---|
| 2008 | deprecation(
|
---|
| 2009 | "control.get(...).attrs")
|
---|
| 2010 | return self._get(name, by_label, nr).attrs
|
---|
| 2011 |
|
---|
| 2012 | def close_control(self):
|
---|
| 2013 | self._closed = True
|
---|
| 2014 |
|
---|
| 2015 | def add_to_form(self, form):
|
---|
| 2016 | assert self._form is None or form == self._form, (
|
---|
| 2017 | "can't add control to more than one form")
|
---|
| 2018 | self._form = form
|
---|
| 2019 | if self.name is None:
|
---|
| 2020 | # always count nameless elements as separate controls
|
---|
| 2021 | Control.add_to_form(self, form)
|
---|
| 2022 | else:
|
---|
| 2023 | for ii in range(len(form.controls) - 1, -1, -1):
|
---|
| 2024 | control = form.controls[ii]
|
---|
| 2025 | if control.name == self.name and control.type == self.type:
|
---|
| 2026 | if control._closed:
|
---|
| 2027 | Control.add_to_form(self, form)
|
---|
| 2028 | else:
|
---|
| 2029 | control.merge_control(self)
|
---|
| 2030 | break
|
---|
| 2031 | else:
|
---|
| 2032 | Control.add_to_form(self, form)
|
---|
| 2033 |
|
---|
| 2034 | def merge_control(self, control):
|
---|
| 2035 | assert bool(control.multiple) == bool(self.multiple)
|
---|
| 2036 | # usually, isinstance(control, self.__class__)
|
---|
| 2037 | self.items.extend(control.items)
|
---|
| 2038 |
|
---|
| 2039 | def fixup(self):
|
---|
| 2040 | """
|
---|
| 2041 | ListControls are built up from component list items (which are also
|
---|
| 2042 | ListControls) during parsing. This method should be called after all
|
---|
| 2043 | items have been added. See ListControl.__doc__ for the reason this is
|
---|
| 2044 | required.
|
---|
| 2045 |
|
---|
| 2046 | """
|
---|
| 2047 | # Need to set default selection where no item was indicated as being
|
---|
| 2048 | # selected by the HTML:
|
---|
| 2049 |
|
---|
| 2050 | # CHECKBOX:
|
---|
| 2051 | # Nothing should be selected.
|
---|
| 2052 | # SELECT/single, SELECT/multiple and RADIO:
|
---|
| 2053 | # RFC 1866 (HTML 2.0): says first item should be selected.
|
---|
| 2054 | # W3C HTML 4.01 Specification: says that client behaviour is
|
---|
| 2055 | # undefined in this case. For RADIO, exactly one must be selected,
|
---|
| 2056 | # though which one is undefined.
|
---|
| 2057 | # Both Netscape and Microsoft Internet Explorer (IE) choose first
|
---|
| 2058 | # item for SELECT/single. However, both IE5 and Mozilla (both 1.0
|
---|
| 2059 | # and Firebird 0.6) leave all items unselected for RADIO and
|
---|
| 2060 | # SELECT/multiple.
|
---|
| 2061 |
|
---|
| 2062 | # Since both Netscape and IE all choose the first item for
|
---|
| 2063 | # SELECT/single, we do the same. OTOH, both Netscape and IE
|
---|
| 2064 | # leave SELECT/multiple with nothing selected, in violation of RFC 1866
|
---|
| 2065 | # (but not in violation of the W3C HTML 4 standard); the same is true
|
---|
| 2066 | # of RADIO (which *is* in violation of the HTML 4 standard). We follow
|
---|
| 2067 | # RFC 1866 if the _select_default attribute is set, and Netscape and IE
|
---|
| 2068 | # otherwise. RFC 1866 and HTML 4 are always violated insofar as you
|
---|
| 2069 | # can deselect all items in a RadioControl.
|
---|
| 2070 |
|
---|
| 2071 | for o in self.items:
|
---|
| 2072 | # set items' controls to self, now that we've merged
|
---|
| 2073 | o.__dict__["_control"] = self
|
---|
| 2074 |
|
---|
| 2075 | def __getattr__(self, name):
|
---|
| 2076 | if name == "value":
|
---|
| 2077 | compat = self._form.backwards_compat
|
---|
| 2078 | if self.name is None:
|
---|
| 2079 | return []
|
---|
| 2080 | return [o.name for o in self.items if o.selected and
|
---|
| 2081 | (not o.disabled or compat)]
|
---|
| 2082 | else:
|
---|
| 2083 | raise AttributeError("%s instance has no attribute '%s'" %
|
---|
| 2084 | (self.__class__.__name__, name))
|
---|
| 2085 |
|
---|
| 2086 | def __setattr__(self, name, value):
|
---|
| 2087 | if name == "value":
|
---|
| 2088 | if self.disabled:
|
---|
| 2089 | raise AttributeError("control '%s' is disabled" % self.name)
|
---|
| 2090 | if self.readonly:
|
---|
| 2091 | raise AttributeError("control '%s' is readonly" % self.name)
|
---|
| 2092 | self._set_value(value)
|
---|
| 2093 | elif name in ("name", "type", "multiple"):
|
---|
| 2094 | raise AttributeError("%s attribute is readonly" % name)
|
---|
| 2095 | else:
|
---|
| 2096 | self.__dict__[name] = value
|
---|
| 2097 |
|
---|
| 2098 | def _set_value(self, value):
|
---|
| 2099 | if value is None or isstringlike(value):
|
---|
| 2100 | raise TypeError("ListControl, must set a sequence")
|
---|
| 2101 | if not value:
|
---|
| 2102 | compat = self._form.backwards_compat
|
---|
| 2103 | for o in self.items:
|
---|
| 2104 | if not o.disabled or compat:
|
---|
| 2105 | o.selected = False
|
---|
| 2106 | elif self.multiple:
|
---|
| 2107 | self._multiple_set_value(value)
|
---|
| 2108 | elif len(value) > 1:
|
---|
| 2109 | raise ItemCountError(
|
---|
| 2110 | "single selection list, must set sequence of "
|
---|
| 2111 | "length 0 or 1")
|
---|
| 2112 | else:
|
---|
| 2113 | self._single_set_value(value)
|
---|
| 2114 |
|
---|
| 2115 | def _get_items(self, name, target=1):
|
---|
| 2116 | all_items = self.get_items(name)
|
---|
| 2117 | items = [o for o in all_items if not o.disabled]
|
---|
| 2118 | if len(items) < target:
|
---|
| 2119 | if len(all_items) < target:
|
---|
| 2120 | raise ItemNotFoundError(
|
---|
| 2121 | "insufficient items with name %r" % name)
|
---|
| 2122 | else:
|
---|
| 2123 | raise AttributeError(
|
---|
| 2124 | "insufficient non-disabled items with name %s" % name)
|
---|
| 2125 | on = []
|
---|
| 2126 | off = []
|
---|
| 2127 | for o in items:
|
---|
| 2128 | if o.selected:
|
---|
| 2129 | on.append(o)
|
---|
| 2130 | else:
|
---|
| 2131 | off.append(o)
|
---|
| 2132 | return on, off
|
---|
| 2133 |
|
---|
| 2134 | def _single_set_value(self, value):
|
---|
| 2135 | assert len(value) == 1
|
---|
| 2136 | on, off = self._get_items(value[0])
|
---|
| 2137 | assert len(on) <= 1
|
---|
| 2138 | if not on:
|
---|
| 2139 | off[0].selected = True
|
---|
| 2140 |
|
---|
| 2141 | def _multiple_set_value(self, value):
|
---|
| 2142 | compat = self._form.backwards_compat
|
---|
| 2143 | turn_on = [] # transactional-ish
|
---|
| 2144 | turn_off = [item for item in self.items if
|
---|
| 2145 | item.selected and (not item.disabled or compat)]
|
---|
| 2146 | names = {}
|
---|
| 2147 | for nn in value:
|
---|
| 2148 | if nn in names.keys():
|
---|
| 2149 | names[nn] += 1
|
---|
| 2150 | else:
|
---|
| 2151 | names[nn] = 1
|
---|
| 2152 | for name, count in names.items():
|
---|
| 2153 | on, off = self._get_items(name, count)
|
---|
| 2154 | for i in range(count):
|
---|
| 2155 | if on:
|
---|
| 2156 | item = on[0]
|
---|
| 2157 | del on[0]
|
---|
| 2158 | del turn_off[turn_off.index(item)]
|
---|
| 2159 | else:
|
---|
| 2160 | item = off[0]
|
---|
| 2161 | del off[0]
|
---|
| 2162 | turn_on.append(item)
|
---|
| 2163 | for item in turn_off:
|
---|
| 2164 | item.selected = False
|
---|
| 2165 | for item in turn_on:
|
---|
| 2166 | item.selected = True
|
---|
| 2167 |
|
---|
| 2168 | def set_value_by_label(self, value):
|
---|
| 2169 | """Set the value of control by item labels.
|
---|
| 2170 |
|
---|
| 2171 | value is expected to be an iterable of strings that are substrings of
|
---|
| 2172 | the item labels that should be selected. Before substring matching is
|
---|
| 2173 | performed, the original label text is whitespace-compressed
|
---|
| 2174 | (consecutive whitespace characters are converted to a single space
|
---|
| 2175 | character) and leading and trailing whitespace is stripped. Ambiguous
|
---|
| 2176 | labels are accepted without complaint if the form's backwards_compat is
|
---|
| 2177 | True; otherwise, it will not complain as long as all ambiguous labels
|
---|
| 2178 | share the same item name (e.g. OPTION value).
|
---|
| 2179 |
|
---|
| 2180 | """
|
---|
| 2181 | if isstringlike(value):
|
---|
| 2182 | raise TypeError(value)
|
---|
| 2183 | if not self.multiple and len(value) > 1:
|
---|
| 2184 | raise ItemCountError(
|
---|
| 2185 | "single selection list, must set sequence of "
|
---|
| 2186 | "length 0 or 1")
|
---|
| 2187 | items = []
|
---|
| 2188 | for nn in value:
|
---|
| 2189 | found = self.get_items(label=nn)
|
---|
| 2190 | if len(found) > 1:
|
---|
| 2191 | if not self._form.backwards_compat:
|
---|
| 2192 | # ambiguous labels are fine as long as item names (e.g.
|
---|
| 2193 | # OPTION values) are same
|
---|
| 2194 | opt_name = found[0].name
|
---|
| 2195 | if [o for o in found[1:] if o.name != opt_name]:
|
---|
| 2196 | raise AmbiguityError(nn)
|
---|
| 2197 | else:
|
---|
| 2198 | # OK, we'll guess :-( Assume first available item.
|
---|
| 2199 | found = found[:1]
|
---|
| 2200 | for o in found:
|
---|
| 2201 | # For the multiple-item case, we could try to be smarter,
|
---|
| 2202 | # saving them up and trying to resolve, but that's too much.
|
---|
| 2203 | if self._form.backwards_compat or o not in items:
|
---|
| 2204 | items.append(o)
|
---|
| 2205 | break
|
---|
| 2206 | else: # all of them are used
|
---|
| 2207 | raise ItemNotFoundError(nn)
|
---|
| 2208 | # now we have all the items that should be on
|
---|
| 2209 | # let's just turn everything off and then back on.
|
---|
| 2210 | self.value = []
|
---|
| 2211 | for o in items:
|
---|
| 2212 | o.selected = True
|
---|
| 2213 |
|
---|
| 2214 | def get_value_by_label(self):
|
---|
| 2215 | """Return the value of the control as given by normalized labels."""
|
---|
| 2216 | res = []
|
---|
| 2217 | compat = self._form.backwards_compat
|
---|
| 2218 | for o in self.items:
|
---|
| 2219 | if (not o.disabled or compat) and o.selected:
|
---|
| 2220 | for l in o.get_labels():
|
---|
| 2221 | if l.text:
|
---|
| 2222 | res.append(l.text)
|
---|
| 2223 | break
|
---|
| 2224 | else:
|
---|
| 2225 | res.append(None)
|
---|
| 2226 | return res
|
---|
| 2227 |
|
---|
| 2228 | def possible_items(self, by_label=False):
|
---|
| 2229 | """Deprecated: return the names or labels of all possible items.
|
---|
| 2230 |
|
---|
| 2231 | Includes disabled items, which may be misleading for some use cases.
|
---|
| 2232 |
|
---|
| 2233 | """
|
---|
| 2234 | deprecation(
|
---|
| 2235 | "[item.name for item in self.items]")
|
---|
| 2236 | if by_label:
|
---|
| 2237 | res = []
|
---|
| 2238 | for o in self.items:
|
---|
| 2239 | for l in o.get_labels():
|
---|
| 2240 | if l.text:
|
---|
| 2241 | res.append(l.text)
|
---|
| 2242 | break
|
---|
| 2243 | else:
|
---|
| 2244 | res.append(None)
|
---|
| 2245 | return res
|
---|
| 2246 | return [o.name for o in self.items]
|
---|
| 2247 |
|
---|
| 2248 | def _totally_ordered_pairs(self):
|
---|
| 2249 | if self.disabled or self.name is None:
|
---|
| 2250 | return []
|
---|
| 2251 | else:
|
---|
| 2252 | return [(o._index, self.name, o.name) for o in self.items
|
---|
| 2253 | if o.selected and not o.disabled]
|
---|
| 2254 |
|
---|
| 2255 | def __str__(self):
|
---|
| 2256 | name = self.name
|
---|
| 2257 | if name is None: name = "<None>"
|
---|
| 2258 |
|
---|
| 2259 | display = [str(o) for o in self.items]
|
---|
| 2260 |
|
---|
| 2261 | infos = []
|
---|
| 2262 | if self.disabled: infos.append("disabled")
|
---|
| 2263 | if self.readonly: infos.append("readonly")
|
---|
| 2264 | info = ", ".join(infos)
|
---|
| 2265 | if info: info = " (%s)" % info
|
---|
| 2266 |
|
---|
| 2267 | return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
|
---|
| 2268 | name, ", ".join(display), info)
|
---|
| 2269 |
|
---|
| 2270 |
|
---|
| 2271 | class RadioControl(ListControl):
|
---|
| 2272 | """
|
---|
| 2273 | Covers:
|
---|
| 2274 |
|
---|
| 2275 | INPUT/RADIO
|
---|
| 2276 |
|
---|
| 2277 | """
|
---|
| 2278 | def __init__(self, type, name, attrs, select_default=False, index=None):
|
---|
| 2279 | attrs.setdefault("value", "on")
|
---|
| 2280 | ListControl.__init__(self, type, name, attrs, select_default,
|
---|
| 2281 | called_as_base_class=True, index=index)
|
---|
| 2282 | self.__dict__["multiple"] = False
|
---|
| 2283 | o = Item(self, attrs, index)
|
---|
| 2284 | o.__dict__["_selected"] = attrs.has_key("checked")
|
---|
| 2285 |
|
---|
| 2286 | def fixup(self):
|
---|
| 2287 | ListControl.fixup(self)
|
---|
| 2288 | found = [o for o in self.items if o.selected and not o.disabled]
|
---|
| 2289 | if not found:
|
---|
| 2290 | if self._select_default:
|
---|
| 2291 | for o in self.items:
|
---|
| 2292 | if not o.disabled:
|
---|
| 2293 | o.selected = True
|
---|
| 2294 | break
|
---|
| 2295 | else:
|
---|
| 2296 | # Ensure only one item selected. Choose the last one,
|
---|
| 2297 | # following IE and Firefox.
|
---|
| 2298 | for o in found[:-1]:
|
---|
| 2299 | o.selected = False
|
---|
| 2300 |
|
---|
| 2301 | def get_labels(self):
|
---|
| 2302 | return []
|
---|
| 2303 |
|
---|
| 2304 | class CheckboxControl(ListControl):
|
---|
| 2305 | """
|
---|
| 2306 | Covers:
|
---|
| 2307 |
|
---|
| 2308 | INPUT/CHECKBOX
|
---|
| 2309 |
|
---|
| 2310 | """
|
---|
| 2311 | def __init__(self, type, name, attrs, select_default=False, index=None):
|
---|
| 2312 | attrs.setdefault("value", "on")
|
---|
| 2313 | ListControl.__init__(self, type, name, attrs, select_default,
|
---|
| 2314 | called_as_base_class=True, index=index)
|
---|
| 2315 | self.__dict__["multiple"] = True
|
---|
| 2316 | o = Item(self, attrs, index)
|
---|
| 2317 | o.__dict__["_selected"] = attrs.has_key("checked")
|
---|
| 2318 |
|
---|
| 2319 | def get_labels(self):
|
---|
| 2320 | return []
|
---|
| 2321 |
|
---|
| 2322 |
|
---|
| 2323 | class SelectControl(ListControl):
|
---|
| 2324 | """
|
---|
| 2325 | Covers:
|
---|
| 2326 |
|
---|
| 2327 | SELECT (and OPTION)
|
---|
| 2328 |
|
---|
| 2329 |
|
---|
| 2330 | OPTION 'values', in HTML parlance, are Item 'names' in ClientForm parlance.
|
---|
| 2331 |
|
---|
| 2332 | SELECT control values and labels are subject to some messy defaulting
|
---|
| 2333 | rules. For example, if the HTML representation of the control is:
|
---|
| 2334 |
|
---|
| 2335 | <SELECT name=year>
|
---|
| 2336 | <OPTION value=0 label="2002">current year</OPTION>
|
---|
| 2337 | <OPTION value=1>2001</OPTION>
|
---|
| 2338 | <OPTION>2000</OPTION>
|
---|
| 2339 | </SELECT>
|
---|
| 2340 |
|
---|
| 2341 | The items, in order, have labels "2002", "2001" and "2000", whereas their
|
---|
| 2342 | names (the OPTION values) are "0", "1" and "2000" respectively. Note that
|
---|
| 2343 | the value of the last OPTION in this example defaults to its contents, as
|
---|
| 2344 | specified by RFC 1866, as do the labels of the second and third OPTIONs.
|
---|
| 2345 |
|
---|
| 2346 | The OPTION labels are sometimes more meaningful than the OPTION values,
|
---|
| 2347 | which can make for more maintainable code.
|
---|
| 2348 |
|
---|
| 2349 | Additional read-only public attribute: attrs
|
---|
| 2350 |
|
---|
| 2351 | The attrs attribute is a dictionary of the original HTML attributes of the
|
---|
| 2352 | SELECT element. Other ListControls do not have this attribute, because in
|
---|
| 2353 | other cases the control as a whole does not correspond to any single HTML
|
---|
| 2354 | element. control.get(...).attrs may be used as usual to get at the HTML
|
---|
| 2355 | attributes of the HTML elements corresponding to individual list items (for
|
---|
| 2356 | SELECT controls, these are OPTION elements).
|
---|
| 2357 |
|
---|
| 2358 | Another special case is that the Item.attrs dictionaries have a special key
|
---|
| 2359 | "contents" which does not correspond to any real HTML attribute, but rather
|
---|
| 2360 | contains the contents of the OPTION element:
|
---|
| 2361 |
|
---|
| 2362 | <OPTION>this bit</OPTION>
|
---|
| 2363 |
|
---|
| 2364 | """
|
---|
| 2365 | # HTML attributes here are treated slightly differently from other list
|
---|
| 2366 | # controls:
|
---|
| 2367 | # -The SELECT HTML attributes dictionary is stuffed into the OPTION
|
---|
| 2368 | # HTML attributes dictionary under the "__select" key.
|
---|
| 2369 | # -The content of each OPTION element is stored under the special
|
---|
| 2370 | # "contents" key of the dictionary.
|
---|
| 2371 | # After all this, the dictionary is passed to the SelectControl constructor
|
---|
| 2372 | # as the attrs argument, as usual. However:
|
---|
| 2373 | # -The first SelectControl constructed when building up a SELECT control
|
---|
| 2374 | # has a constructor attrs argument containing only the __select key -- so
|
---|
| 2375 | # this SelectControl represents an empty SELECT control.
|
---|
| 2376 | # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
|
---|
| 2377 | # the __select dictionary containing the SELECT HTML-attributes.
|
---|
| 2378 |
|
---|
| 2379 | def __init__(self, type, name, attrs, select_default=False, index=None):
|
---|
| 2380 | # fish out the SELECT HTML attributes from the OPTION HTML attributes
|
---|
| 2381 | # dictionary
|
---|
| 2382 | self.attrs = attrs["__select"].copy()
|
---|
| 2383 | self.__dict__["_label"] = _get_label(self.attrs)
|
---|
| 2384 | self.__dict__["id"] = self.attrs.get("id")
|
---|
| 2385 | self.__dict__["multiple"] = self.attrs.has_key("multiple")
|
---|
| 2386 | # the majority of the contents, label, and value dance already happened
|
---|
| 2387 | contents = attrs.get("contents")
|
---|
| 2388 | attrs = attrs.copy()
|
---|
| 2389 | del attrs["__select"]
|
---|
| 2390 |
|
---|
| 2391 | ListControl.__init__(self, type, name, self.attrs, select_default,
|
---|
| 2392 | called_as_base_class=True, index=index)
|
---|
| 2393 | self.disabled = self.attrs.has_key("disabled")
|
---|
| 2394 | self.readonly = self.attrs.has_key("readonly")
|
---|
| 2395 | if attrs.has_key("value"):
|
---|
| 2396 | # otherwise it is a marker 'select started' token
|
---|
| 2397 | o = Item(self, attrs, index)
|
---|
| 2398 | o.__dict__["_selected"] = attrs.has_key("selected")
|
---|
| 2399 | # add 'label' label and contents label, if different. If both are
|
---|
| 2400 | # provided, the 'label' label is used for display in HTML
|
---|
| 2401 | # 4.0-compliant browsers (and any lower spec? not sure) while the
|
---|
| 2402 | # contents are used for display in older or less-compliant
|
---|
| 2403 | # browsers. We make label objects for both, if the values are
|
---|
| 2404 | # different.
|
---|
| 2405 | label = attrs.get("label")
|
---|
| 2406 | if label:
|
---|
| 2407 | o._labels.append(Label({"__text": label}))
|
---|
| 2408 | if contents and contents != label:
|
---|
| 2409 | o._labels.append(Label({"__text": contents}))
|
---|
| 2410 | elif contents:
|
---|
| 2411 | o._labels.append(Label({"__text": contents}))
|
---|
| 2412 |
|
---|
| 2413 | def fixup(self):
|
---|
| 2414 | ListControl.fixup(self)
|
---|
| 2415 | # Firefox doesn't exclude disabled items from those considered here
|
---|
| 2416 | # (i.e. from 'found', for both branches of the if below). Note that
|
---|
| 2417 | # IE6 doesn't support the disabled attribute on OPTIONs at all.
|
---|
| 2418 | found = [o for o in self.items if o.selected]
|
---|
| 2419 | if not found:
|
---|
| 2420 | if not self.multiple or self._select_default:
|
---|
| 2421 | for o in self.items:
|
---|
| 2422 | if not o.disabled:
|
---|
| 2423 | was_disabled = self.disabled
|
---|
| 2424 | self.disabled = False
|
---|
| 2425 | try:
|
---|
| 2426 | o.selected = True
|
---|
| 2427 | finally:
|
---|
| 2428 | o.disabled = was_disabled
|
---|
| 2429 | break
|
---|
| 2430 | elif not self.multiple:
|
---|
| 2431 | # Ensure only one item selected. Choose the last one,
|
---|
| 2432 | # following IE and Firefox.
|
---|
| 2433 | for o in found[:-1]:
|
---|
| 2434 | o.selected = False
|
---|
| 2435 |
|
---|
| 2436 |
|
---|
| 2437 | #---------------------------------------------------
|
---|
| 2438 | class SubmitControl(ScalarControl):
|
---|
| 2439 | """
|
---|
| 2440 | Covers:
|
---|
| 2441 |
|
---|
| 2442 | INPUT/SUBMIT
|
---|
| 2443 | BUTTON/SUBMIT
|
---|
| 2444 |
|
---|
| 2445 | """
|
---|
| 2446 | def __init__(self, type, name, attrs, index=None):
|
---|
| 2447 | ScalarControl.__init__(self, type, name, attrs, index)
|
---|
| 2448 | # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
|
---|
| 2449 | # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem
|
---|
| 2450 | # to define this.
|
---|
| 2451 | if self.value is None: self.value = ""
|
---|
| 2452 | self.readonly = True
|
---|
| 2453 |
|
---|
| 2454 | def get_labels(self):
|
---|
| 2455 | res = []
|
---|
| 2456 | if self.value:
|
---|
| 2457 | res.append(Label({"__text": self.value}))
|
---|
| 2458 | res.extend(ScalarControl.get_labels(self))
|
---|
| 2459 | return res
|
---|
| 2460 |
|
---|
| 2461 | def is_of_kind(self, kind): return kind == "clickable"
|
---|
| 2462 |
|
---|
| 2463 | def _click(self, form, coord, return_type, request_class=urllib2.Request):
|
---|
| 2464 | self._clicked = coord
|
---|
| 2465 | r = form._switch_click(return_type, request_class)
|
---|
| 2466 | self._clicked = False
|
---|
| 2467 | return r
|
---|
| 2468 |
|
---|
| 2469 | def _totally_ordered_pairs(self):
|
---|
| 2470 | if not self._clicked:
|
---|
| 2471 | return []
|
---|
| 2472 | return ScalarControl._totally_ordered_pairs(self)
|
---|
| 2473 |
|
---|
| 2474 |
|
---|
| 2475 | #---------------------------------------------------
|
---|
| 2476 | class ImageControl(SubmitControl):
|
---|
| 2477 | """
|
---|
| 2478 | Covers:
|
---|
| 2479 |
|
---|
| 2480 | INPUT/IMAGE
|
---|
| 2481 |
|
---|
| 2482 | Coordinates are specified using one of the HTMLForm.click* methods.
|
---|
| 2483 |
|
---|
| 2484 | """
|
---|
| 2485 | def __init__(self, type, name, attrs, index=None):
|
---|
| 2486 | SubmitControl.__init__(self, type, name, attrs, index)
|
---|
| 2487 | self.readonly = False
|
---|
| 2488 |
|
---|
| 2489 | def _totally_ordered_pairs(self):
|
---|
| 2490 | clicked = self._clicked
|
---|
| 2491 | if self.disabled or not clicked:
|
---|
| 2492 | return []
|
---|
| 2493 | name = self.name
|
---|
| 2494 | if name is None: return []
|
---|
| 2495 | pairs = [
|
---|
| 2496 | (self._index, "%s.x" % name, str(clicked[0])),
|
---|
| 2497 | (self._index + 1, "%s.y" % name, str(clicked[1])),
|
---|
| 2498 | ]
|
---|
| 2499 | value = self._value
|
---|
| 2500 | if value:
|
---|
| 2501 | pairs.append((self._index + 2, name, value))
|
---|
| 2502 | return pairs
|
---|
| 2503 |
|
---|
| 2504 | get_labels = ScalarControl.get_labels
|
---|
| 2505 |
|
---|
| 2506 | # aliases, just to make str(control) and str(form) clearer
|
---|
| 2507 | class PasswordControl(TextControl): pass
|
---|
| 2508 | class HiddenControl(TextControl): pass
|
---|
| 2509 | class TextareaControl(TextControl): pass
|
---|
| 2510 | class SubmitButtonControl(SubmitControl): pass
|
---|
| 2511 |
|
---|
| 2512 |
|
---|
| 2513 | def is_listcontrol(control): return control.is_of_kind("list")
|
---|
| 2514 |
|
---|
| 2515 |
|
---|
| 2516 | class HTMLForm:
|
---|
| 2517 | """Represents a single HTML <form> ... </form> element.
|
---|
| 2518 |
|
---|
| 2519 | A form consists of a sequence of controls that usually have names, and
|
---|
| 2520 | which can take on various values. The values of the various types of
|
---|
| 2521 | controls represent variously: text, zero-or-one-of-many or many-of-many
|
---|
| 2522 | choices, and files to be uploaded. Some controls can be clicked on to
|
---|
| 2523 | submit the form, and clickable controls' values sometimes include the
|
---|
| 2524 | coordinates of the click.
|
---|
| 2525 |
|
---|
| 2526 | Forms can be filled in with data to be returned to the server, and then
|
---|
| 2527 | submitted, using the click method to generate a request object suitable for
|
---|
| 2528 | passing to urllib2.urlopen (or the click_request_data or click_pairs
|
---|
| 2529 | methods if you're not using urllib2).
|
---|
| 2530 |
|
---|
| 2531 | import ClientForm
|
---|
| 2532 | forms = ClientForm.ParseFile(html, base_uri)
|
---|
| 2533 | form = forms[0]
|
---|
| 2534 |
|
---|
| 2535 | form["query"] = "Python"
|
---|
| 2536 | form.find_control("nr_results").get("lots").selected = True
|
---|
| 2537 |
|
---|
| 2538 | response = urllib2.urlopen(form.click())
|
---|
| 2539 |
|
---|
| 2540 | Usually, HTMLForm instances are not created directly. Instead, the
|
---|
| 2541 | ParseFile or ParseResponse factory functions are used. If you do construct
|
---|
| 2542 | HTMLForm objects yourself, however, note that an HTMLForm instance is only
|
---|
| 2543 | properly initialised after the fixup method has been called (ParseFile and
|
---|
| 2544 | ParseResponse do this for you). See ListControl.__doc__ for the reason
|
---|
| 2545 | this is required.
|
---|
| 2546 |
|
---|
| 2547 | Indexing a form (form["control_name"]) returns the named Control's value
|
---|
| 2548 | attribute. Assignment to a form index (form["control_name"] = something)
|
---|
| 2549 | is equivalent to assignment to the named Control's value attribute. If you
|
---|
| 2550 | need to be more specific than just supplying the control's name, use the
|
---|
| 2551 | set_value and get_value methods.
|
---|
| 2552 |
|
---|
| 2553 | ListControl values are lists of item names (specifically, the names of the
|
---|
| 2554 | items that are selected and not disabled, and hence are "successful" -- ie.
|
---|
| 2555 | cause data to be returned to the server). The list item's name is the
|
---|
| 2556 | value of the corresponding HTML element's"value" attribute.
|
---|
| 2557 |
|
---|
| 2558 | Example:
|
---|
| 2559 |
|
---|
| 2560 | <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
|
---|
| 2561 | <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
|
---|
| 2562 |
|
---|
| 2563 | defines a CHECKBOX control with name "cheeses" which has two items, named
|
---|
| 2564 | "leicester" and "cheddar".
|
---|
| 2565 |
|
---|
| 2566 | Another example:
|
---|
| 2567 |
|
---|
| 2568 | <SELECT name="more_cheeses">
|
---|
| 2569 | <OPTION>1</OPTION>
|
---|
| 2570 | <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
|
---|
| 2571 | </SELECT>
|
---|
| 2572 |
|
---|
| 2573 | defines a SELECT control with name "more_cheeses" which has two items,
|
---|
| 2574 | named "1" and "2" (because the OPTION element's value HTML attribute
|
---|
| 2575 | defaults to the element contents -- see SelectControl.__doc__ for more on
|
---|
| 2576 | these defaulting rules).
|
---|
| 2577 |
|
---|
| 2578 | To select, deselect or otherwise manipulate individual list items, use the
|
---|
| 2579 | HTMLForm.find_control() and ListControl.get() methods. To set the whole
|
---|
| 2580 | value, do as for any other control: use indexing or the set_/get_value
|
---|
| 2581 | methods.
|
---|
| 2582 |
|
---|
| 2583 | Example:
|
---|
| 2584 |
|
---|
| 2585 | # select *only* the item named "cheddar"
|
---|
| 2586 | form["cheeses"] = ["cheddar"]
|
---|
| 2587 | # select "cheddar", leave other items unaffected
|
---|
| 2588 | form.find_control("cheeses").get("cheddar").selected = True
|
---|
| 2589 |
|
---|
| 2590 | Some controls (RADIO and SELECT without the multiple attribute) can only
|
---|
| 2591 | have zero or one items selected at a time. Some controls (CHECKBOX and
|
---|
| 2592 | SELECT with the multiple attribute) can have multiple items selected at a
|
---|
| 2593 | time. To set the whole value of a ListControl, assign a sequence to a form
|
---|
| 2594 | index:
|
---|
| 2595 |
|
---|
| 2596 | form["cheeses"] = ["cheddar", "leicester"]
|
---|
| 2597 |
|
---|
| 2598 | If the ListControl is not multiple-selection, the assigned list must be of
|
---|
| 2599 | length one.
|
---|
| 2600 |
|
---|
| 2601 | To check if a control has an item, if an item is selected, or if an item is
|
---|
| 2602 | successful (selected and not disabled), respectively:
|
---|
| 2603 |
|
---|
| 2604 | "cheddar" in [item.name for item in form.find_control("cheeses").items]
|
---|
| 2605 | "cheddar" in [item.name for item in form.find_control("cheeses").items and
|
---|
| 2606 | item.selected]
|
---|
| 2607 | "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses"))
|
---|
| 2608 |
|
---|
| 2609 | Note that some list items may be disabled (see below).
|
---|
| 2610 |
|
---|
| 2611 | Note the following mistake:
|
---|
| 2612 |
|
---|
| 2613 | form[control_name] = control_value
|
---|
| 2614 | assert form[control_name] == control_value # not necessarily true
|
---|
| 2615 |
|
---|
| 2616 | The reason for this is that form[control_name] always gives the list items
|
---|
| 2617 | in the order they were listed in the HTML.
|
---|
| 2618 |
|
---|
| 2619 | List items (hence list values, too) can be referred to in terms of list
|
---|
| 2620 | item labels rather than list item names using the appropriate label
|
---|
| 2621 | arguments. Note that each item may have several labels.
|
---|
| 2622 |
|
---|
| 2623 | The question of default values of OPTION contents, labels and values is
|
---|
| 2624 | somewhat complicated: see SelectControl.__doc__ and
|
---|
| 2625 | ListControl.get_item_attrs.__doc__ if you think you need to know.
|
---|
| 2626 |
|
---|
| 2627 | Controls can be disabled or readonly. In either case, the control's value
|
---|
| 2628 | cannot be changed until you clear those flags (see example below).
|
---|
| 2629 | Disabled is the state typically represented by browsers by 'greying out' a
|
---|
| 2630 | control. Disabled controls are not 'successful' -- they don't cause data
|
---|
| 2631 | to get returned to the server. Readonly controls usually appear in
|
---|
| 2632 | browsers as read-only text boxes. Readonly controls are successful. List
|
---|
| 2633 | items can also be disabled. Attempts to select or deselect disabled items
|
---|
| 2634 | fail with AttributeError.
|
---|
| 2635 |
|
---|
| 2636 | If a lot of controls are readonly, it can be useful to do this:
|
---|
| 2637 |
|
---|
| 2638 | form.set_all_readonly(False)
|
---|
| 2639 |
|
---|
| 2640 | To clear a control's value attribute, so that it is not successful (until a
|
---|
| 2641 | value is subsequently set):
|
---|
| 2642 |
|
---|
| 2643 | form.clear("cheeses")
|
---|
| 2644 |
|
---|
| 2645 | More examples:
|
---|
| 2646 |
|
---|
| 2647 | control = form.find_control("cheeses")
|
---|
| 2648 | control.disabled = False
|
---|
| 2649 | control.readonly = False
|
---|
| 2650 | control.get("gruyere").disabled = True
|
---|
| 2651 | control.items[0].selected = True
|
---|
| 2652 |
|
---|
| 2653 | See the various Control classes for further documentation. Many methods
|
---|
| 2654 | take name, type, kind, id, label and nr arguments to specify the control to
|
---|
| 2655 | be operated on: see HTMLForm.find_control.__doc__.
|
---|
| 2656 |
|
---|
| 2657 | ControlNotFoundError (subclass of ValueError) is raised if the specified
|
---|
| 2658 | control can't be found. This includes occasions where a non-ListControl
|
---|
| 2659 | is found, but the method (set, for example) requires a ListControl.
|
---|
| 2660 | ItemNotFoundError (subclass of ValueError) is raised if a list item can't
|
---|
| 2661 | be found. ItemCountError (subclass of ValueError) is raised if an attempt
|
---|
| 2662 | is made to select more than one item and the control doesn't allow that, or
|
---|
| 2663 | set/get_single are called and the control contains more than one item.
|
---|
| 2664 | AttributeError is raised if a control or item is readonly or disabled and
|
---|
| 2665 | an attempt is made to alter its value.
|
---|
| 2666 |
|
---|
| 2667 | Security note: Remember that any passwords you store in HTMLForm instances
|
---|
| 2668 | will be saved to disk in the clear if you pickle them (directly or
|
---|
| 2669 | indirectly). The simplest solution to this is to avoid pickling HTMLForm
|
---|
| 2670 | objects. You could also pickle before filling in any password, or just set
|
---|
| 2671 | the password to "" before pickling.
|
---|
| 2672 |
|
---|
| 2673 |
|
---|
| 2674 | Public attributes:
|
---|
| 2675 |
|
---|
| 2676 | action: full (absolute URI) form action
|
---|
| 2677 | method: "GET" or "POST"
|
---|
| 2678 | enctype: form transfer encoding MIME type
|
---|
| 2679 | name: name of form (None if no name was specified)
|
---|
| 2680 | attrs: dictionary mapping original HTML form attributes to their values
|
---|
| 2681 |
|
---|
| 2682 | controls: list of Control instances; do not alter this list
|
---|
| 2683 | (instead, call form.new_control to make a Control and add it to the
|
---|
| 2684 | form, or control.add_to_form if you already have a Control instance)
|
---|
| 2685 |
|
---|
| 2686 |
|
---|
| 2687 |
|
---|
| 2688 | Methods for form filling:
|
---|
| 2689 | -------------------------
|
---|
| 2690 |
|
---|
| 2691 | Most of the these methods have very similar arguments. See
|
---|
| 2692 | HTMLForm.find_control.__doc__ for details of the name, type, kind, label
|
---|
| 2693 | and nr arguments.
|
---|
| 2694 |
|
---|
| 2695 | def find_control(self,
|
---|
| 2696 | name=None, type=None, kind=None, id=None, predicate=None,
|
---|
| 2697 | nr=None, label=None)
|
---|
| 2698 |
|
---|
| 2699 | get_value(name=None, type=None, kind=None, id=None, nr=None,
|
---|
| 2700 | by_label=False, # by_label is deprecated
|
---|
| 2701 | label=None)
|
---|
| 2702 | set_value(value,
|
---|
| 2703 | name=None, type=None, kind=None, id=None, nr=None,
|
---|
| 2704 | by_label=False, # by_label is deprecated
|
---|
| 2705 | label=None)
|
---|
| 2706 |
|
---|
| 2707 | clear_all()
|
---|
| 2708 | clear(name=None, type=None, kind=None, id=None, nr=None, label=None)
|
---|
| 2709 |
|
---|
| 2710 | set_all_readonly(readonly)
|
---|
| 2711 |
|
---|
| 2712 |
|
---|
| 2713 | Method applying only to FileControls:
|
---|
| 2714 |
|
---|
| 2715 | add_file(file_object,
|
---|
| 2716 | content_type="application/octet-stream", filename=None,
|
---|
| 2717 | name=None, id=None, nr=None, label=None)
|
---|
| 2718 |
|
---|
| 2719 |
|
---|
| 2720 | Methods applying only to clickable controls:
|
---|
| 2721 |
|
---|
| 2722 | click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
|
---|
| 2723 | click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1),
|
---|
| 2724 | label=None)
|
---|
| 2725 | click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
|
---|
| 2726 |
|
---|
| 2727 | """
|
---|
| 2728 |
|
---|
| 2729 | type2class = {
|
---|
| 2730 | "text": TextControl,
|
---|
| 2731 | "password": PasswordControl,
|
---|
| 2732 | "hidden": HiddenControl,
|
---|
| 2733 | "textarea": TextareaControl,
|
---|
| 2734 |
|
---|
| 2735 | "isindex": IsindexControl,
|
---|
| 2736 |
|
---|
| 2737 | "file": FileControl,
|
---|
| 2738 |
|
---|
| 2739 | "button": IgnoreControl,
|
---|
| 2740 | "buttonbutton": IgnoreControl,
|
---|
| 2741 | "reset": IgnoreControl,
|
---|
| 2742 | "resetbutton": IgnoreControl,
|
---|
| 2743 |
|
---|
| 2744 | "submit": SubmitControl,
|
---|
| 2745 | "submitbutton": SubmitButtonControl,
|
---|
| 2746 | "image": ImageControl,
|
---|
| 2747 |
|
---|
| 2748 | "radio": RadioControl,
|
---|
| 2749 | "checkbox": CheckboxControl,
|
---|
| 2750 | "select": SelectControl,
|
---|
| 2751 | }
|
---|
| 2752 |
|
---|
| 2753 | #---------------------------------------------------
|
---|
| 2754 | # Initialisation. Use ParseResponse / ParseFile instead.
|
---|
| 2755 |
|
---|
| 2756 | def __init__(self, action, method="GET",
|
---|
| 2757 | enctype="application/x-www-form-urlencoded",
|
---|
| 2758 | name=None, attrs=None,
|
---|
| 2759 | request_class=urllib2.Request,
|
---|
| 2760 | forms=None, labels=None, id_to_labels=None,
|
---|
| 2761 | backwards_compat=True):
|
---|
| 2762 | """
|
---|
| 2763 | In the usual case, use ParseResponse (or ParseFile) to create new
|
---|
| 2764 | HTMLForm objects.
|
---|
| 2765 |
|
---|
| 2766 | action: full (absolute URI) form action
|
---|
| 2767 | method: "GET" or "POST"
|
---|
| 2768 | enctype: form transfer encoding MIME type
|
---|
| 2769 | name: name of form
|
---|
| 2770 | attrs: dictionary mapping original HTML form attributes to their values
|
---|
| 2771 |
|
---|
| 2772 | """
|
---|
| 2773 | self.action = action
|
---|
| 2774 | self.method = method
|
---|
| 2775 | self.enctype = enctype
|
---|
| 2776 | self.name = name
|
---|
| 2777 | if attrs is not None:
|
---|
| 2778 | self.attrs = attrs.copy()
|
---|
| 2779 | else:
|
---|
| 2780 | self.attrs = {}
|
---|
| 2781 | self.controls = []
|
---|
| 2782 | self._request_class = request_class
|
---|
| 2783 |
|
---|
| 2784 | # these attributes are used by zope.testbrowser
|
---|
| 2785 | self._forms = forms # this is a semi-public API!
|
---|
| 2786 | self._labels = labels # this is a semi-public API!
|
---|
| 2787 | self._id_to_labels = id_to_labels # this is a semi-public API!
|
---|
| 2788 |
|
---|
| 2789 | self.backwards_compat = backwards_compat # note __setattr__
|
---|
| 2790 |
|
---|
| 2791 | self._urlunparse = urlparse.urlunparse
|
---|
| 2792 | self._urlparse = urlparse.urlparse
|
---|
| 2793 |
|
---|
| 2794 | def __getattr__(self, name):
|
---|
| 2795 | if name == "backwards_compat":
|
---|
| 2796 | return self._backwards_compat
|
---|
| 2797 | return getattr(HTMLForm, name)
|
---|
| 2798 |
|
---|
| 2799 | def __setattr__(self, name, value):
|
---|
| 2800 | # yuck
|
---|
| 2801 | if name == "backwards_compat":
|
---|
| 2802 | name = "_backwards_compat"
|
---|
| 2803 | value = bool(value)
|
---|
| 2804 | for cc in self.controls:
|
---|
| 2805 | try:
|
---|
| 2806 | items = cc.items
|
---|
| 2807 | except AttributeError:
|
---|
| 2808 | continue
|
---|
| 2809 | else:
|
---|
| 2810 | for ii in items:
|
---|
| 2811 | for ll in ii.get_labels():
|
---|
| 2812 | ll._backwards_compat = value
|
---|
| 2813 | self.__dict__[name] = value
|
---|
| 2814 |
|
---|
| 2815 | def new_control(self, type, name, attrs,
|
---|
| 2816 | ignore_unknown=False, select_default=False, index=None):
|
---|
| 2817 | """Adds a new control to the form.
|
---|
| 2818 |
|
---|
| 2819 | This is usually called by ParseFile and ParseResponse. Don't call it
|
---|
| 2820 | youself unless you're building your own Control instances.
|
---|
| 2821 |
|
---|
| 2822 | Note that controls representing lists of items are built up from
|
---|
| 2823 | controls holding only a single list item. See ListControl.__doc__ for
|
---|
| 2824 | further information.
|
---|
| 2825 |
|
---|
| 2826 | type: type of control (see Control.__doc__ for a list)
|
---|
| 2827 | attrs: HTML attributes of control
|
---|
| 2828 | ignore_unknown: if true, use a dummy Control instance for controls of
|
---|
| 2829 | unknown type; otherwise, use a TextControl
|
---|
| 2830 | select_default: for RADIO and multiple-selection SELECT controls, pick
|
---|
| 2831 | the first item as the default if no 'selected' HTML attribute is
|
---|
| 2832 | present (this defaulting happens when the HTMLForm.fixup method is
|
---|
| 2833 | called)
|
---|
| 2834 | index: index of corresponding element in HTML (see
|
---|
| 2835 | MoreFormTests.test_interspersed_controls for motivation)
|
---|
| 2836 |
|
---|
| 2837 | """
|
---|
| 2838 | type = type.lower()
|
---|
| 2839 | klass = self.type2class.get(type)
|
---|
| 2840 | if klass is None:
|
---|
| 2841 | if ignore_unknown:
|
---|
| 2842 | klass = IgnoreControl
|
---|
| 2843 | else:
|
---|
| 2844 | klass = TextControl
|
---|
| 2845 |
|
---|
| 2846 | a = attrs.copy()
|
---|
| 2847 | if issubclass(klass, ListControl):
|
---|
| 2848 | control = klass(type, name, a, select_default, index)
|
---|
| 2849 | else:
|
---|
| 2850 | control = klass(type, name, a, index)
|
---|
| 2851 |
|
---|
| 2852 | if type == "select" and len(attrs) == 1:
|
---|
| 2853 | for ii in range(len(self.controls) - 1, -1, -1):
|
---|
| 2854 | ctl = self.controls[ii]
|
---|
| 2855 | if ctl.type == "select":
|
---|
| 2856 | ctl.close_control()
|
---|
| 2857 | break
|
---|
| 2858 |
|
---|
| 2859 | control.add_to_form(self)
|
---|
| 2860 | control._urlparse = self._urlparse
|
---|
| 2861 | control._urlunparse = self._urlunparse
|
---|
| 2862 |
|
---|
| 2863 | def fixup(self):
|
---|
| 2864 | """Normalise form after all controls have been added.
|
---|
| 2865 |
|
---|
| 2866 | This is usually called by ParseFile and ParseResponse. Don't call it
|
---|
| 2867 | youself unless you're building your own Control instances.
|
---|
| 2868 |
|
---|
| 2869 | This method should only be called once, after all controls have been
|
---|
| 2870 | added to the form.
|
---|
| 2871 |
|
---|
| 2872 | """
|
---|
| 2873 | for control in self.controls:
|
---|
| 2874 | control.fixup()
|
---|
| 2875 | self.backwards_compat = self._backwards_compat
|
---|
| 2876 |
|
---|
| 2877 | #---------------------------------------------------
|
---|
| 2878 | def __str__(self):
|
---|
| 2879 | header = "%s%s %s %s" % (
|
---|
| 2880 | (self.name and self.name + " " or ""),
|
---|
| 2881 | self.method, self.action, self.enctype)
|
---|
| 2882 | rep = [header]
|
---|
| 2883 | for control in self.controls:
|
---|
| 2884 | rep.append(" %s" % str(control))
|
---|
| 2885 | return "<%s>" % "\n".join(rep)
|
---|
| 2886 |
|
---|
| 2887 | #---------------------------------------------------
|
---|
| 2888 | # Form-filling methods.
|
---|
| 2889 |
|
---|
| 2890 | def __getitem__(self, name):
|
---|
| 2891 | return self.find_control(name).value
|
---|
| 2892 | def __contains__(self, name):
|
---|
| 2893 | return bool(self.find_control(name))
|
---|
| 2894 | def __setitem__(self, name, value):
|
---|
| 2895 | control = self.find_control(name)
|
---|
| 2896 | try:
|
---|
| 2897 | control.value = value
|
---|
| 2898 | except AttributeError, e:
|
---|
| 2899 | raise ValueError(str(e))
|
---|
| 2900 |
|
---|
| 2901 | def get_value(self,
|
---|
| 2902 | name=None, type=None, kind=None, id=None, nr=None,
|
---|
| 2903 | by_label=False, # by_label is deprecated
|
---|
| 2904 | label=None):
|
---|
| 2905 | """Return value of control.
|
---|
| 2906 |
|
---|
| 2907 | If only name and value arguments are supplied, equivalent to
|
---|
| 2908 |
|
---|
| 2909 | form[name]
|
---|
| 2910 |
|
---|
| 2911 | """
|
---|
| 2912 | if by_label:
|
---|
| 2913 | deprecation("form.get_value_by_label(...)")
|
---|
| 2914 | c = self.find_control(name, type, kind, id, label=label, nr=nr)
|
---|
| 2915 | if by_label:
|
---|
| 2916 | try:
|
---|
| 2917 | meth = c.get_value_by_label
|
---|
| 2918 | except AttributeError:
|
---|
| 2919 | raise NotImplementedError(
|
---|
| 2920 | "control '%s' does not yet support by_label" % c.name)
|
---|
| 2921 | else:
|
---|
| 2922 | return meth()
|
---|
| 2923 | else:
|
---|
| 2924 | return c.value
|
---|
| 2925 | def set_value(self, value,
|
---|
| 2926 | name=None, type=None, kind=None, id=None, nr=None,
|
---|
| 2927 | by_label=False, # by_label is deprecated
|
---|
| 2928 | label=None):
|
---|
| 2929 | """Set value of control.
|
---|
| 2930 |
|
---|
| 2931 | If only name and value arguments are supplied, equivalent to
|
---|
| 2932 |
|
---|
| 2933 | form[name] = value
|
---|
| 2934 |
|
---|
| 2935 | """
|
---|
| 2936 | if by_label:
|
---|
| 2937 | deprecation("form.get_value_by_label(...)")
|
---|
| 2938 | c = self.find_control(name, type, kind, id, label=label, nr=nr)
|
---|
| 2939 | if by_label:
|
---|
| 2940 | try:
|
---|
| 2941 | meth = c.set_value_by_label
|
---|
| 2942 | except AttributeError:
|
---|
| 2943 | raise NotImplementedError(
|
---|
| 2944 | "control '%s' does not yet support by_label" % c.name)
|
---|
| 2945 | else:
|
---|
| 2946 | meth(value)
|
---|
| 2947 | else:
|
---|
| 2948 | c.value = value
|
---|
| 2949 | def get_value_by_label(
|
---|
| 2950 | self, name=None, type=None, kind=None, id=None, label=None, nr=None):
|
---|
| 2951 | """
|
---|
| 2952 |
|
---|
| 2953 | All arguments should be passed by name.
|
---|
| 2954 |
|
---|
| 2955 | """
|
---|
| 2956 | c = self.find_control(name, type, kind, id, label=label, nr=nr)
|
---|
| 2957 | return c.get_value_by_label()
|
---|
| 2958 |
|
---|
| 2959 | def set_value_by_label(
|
---|
| 2960 | self, value,
|
---|
| 2961 | name=None, type=None, kind=None, id=None, label=None, nr=None):
|
---|
| 2962 | """
|
---|
| 2963 |
|
---|
| 2964 | All arguments should be passed by name.
|
---|
| 2965 |
|
---|
| 2966 | """
|
---|
| 2967 | c = self.find_control(name, type, kind, id, label=label, nr=nr)
|
---|
| 2968 | c.set_value_by_label(value)
|
---|
| 2969 |
|
---|
| 2970 | def set_all_readonly(self, readonly):
|
---|
| 2971 | for control in self.controls:
|
---|
| 2972 | control.readonly = bool(readonly)
|
---|
| 2973 |
|
---|
| 2974 | def clear_all(self):
|
---|
| 2975 | """Clear the value attributes of all controls in the form.
|
---|
| 2976 |
|
---|
| 2977 | See HTMLForm.clear.__doc__.
|
---|
| 2978 |
|
---|
| 2979 | """
|
---|
| 2980 | for control in self.controls:
|
---|
| 2981 | control.clear()
|
---|
| 2982 |
|
---|
| 2983 | def clear(self,
|
---|
| 2984 | name=None, type=None, kind=None, id=None, nr=None, label=None):
|
---|
| 2985 | """Clear the value attribute of a control.
|
---|
| 2986 |
|
---|
| 2987 | As a result, the affected control will not be successful until a value
|
---|
| 2988 | is subsequently set. AttributeError is raised on readonly controls.
|
---|
| 2989 |
|
---|
| 2990 | """
|
---|
| 2991 | c = self.find_control(name, type, kind, id, label=label, nr=nr)
|
---|
| 2992 | c.clear()
|
---|
| 2993 |
|
---|
| 2994 |
|
---|
| 2995 | #---------------------------------------------------
|
---|
| 2996 | # Form-filling methods applying only to ListControls.
|
---|
| 2997 |
|
---|
| 2998 | def possible_items(self, # deprecated
|
---|
| 2999 | name=None, type=None, kind=None, id=None,
|
---|
| 3000 | nr=None, by_label=False, label=None):
|
---|
| 3001 | """Return a list of all values that the specified control can take."""
|
---|
| 3002 | c = self._find_list_control(name, type, kind, id, label, nr)
|
---|
| 3003 | return c.possible_items(by_label)
|
---|
| 3004 |
|
---|
| 3005 | def set(self, selected, item_name, # deprecated
|
---|
| 3006 | name=None, type=None, kind=None, id=None, nr=None,
|
---|
| 3007 | by_label=False, label=None):
|
---|
| 3008 | """Select / deselect named list item.
|
---|
| 3009 |
|
---|
| 3010 | selected: boolean selected state
|
---|
| 3011 |
|
---|
| 3012 | """
|
---|
| 3013 | self._find_list_control(name, type, kind, id, label, nr).set(
|
---|
| 3014 | selected, item_name, by_label)
|
---|
| 3015 | def toggle(self, item_name, # deprecated
|
---|
| 3016 | name=None, type=None, kind=None, id=None, nr=None,
|
---|
| 3017 | by_label=False, label=None):
|
---|
| 3018 | """Toggle selected state of named list item."""
|
---|
| 3019 | self._find_list_control(name, type, kind, id, label, nr).toggle(
|
---|
| 3020 | item_name, by_label)
|
---|
| 3021 |
|
---|
| 3022 | def set_single(self, selected, # deprecated
|
---|
| 3023 | name=None, type=None, kind=None, id=None,
|
---|
| 3024 | nr=None, by_label=None, label=None):
|
---|
| 3025 | """Select / deselect list item in a control having only one item.
|
---|
| 3026 |
|
---|
| 3027 | If the control has multiple list items, ItemCountError is raised.
|
---|
| 3028 |
|
---|
| 3029 | This is just a convenience method, so you don't need to know the item's
|
---|
| 3030 | name -- the item name in these single-item controls is usually
|
---|
| 3031 | something meaningless like "1" or "on".
|
---|
| 3032 |
|
---|
| 3033 | For example, if a checkbox has a single item named "on", the following
|
---|
| 3034 | two calls are equivalent:
|
---|
| 3035 |
|
---|
| 3036 | control.toggle("on")
|
---|
| 3037 | control.toggle_single()
|
---|
| 3038 |
|
---|
| 3039 | """ # by_label ignored and deprecated
|
---|
| 3040 | self._find_list_control(
|
---|
| 3041 | name, type, kind, id, label, nr).set_single(selected)
|
---|
| 3042 | def toggle_single(self, name=None, type=None, kind=None, id=None,
|
---|
| 3043 | nr=None, by_label=None, label=None): # deprecated
|
---|
| 3044 | """Toggle selected state of list item in control having only one item.
|
---|
| 3045 |
|
---|
| 3046 | The rest is as for HTMLForm.set_single.__doc__.
|
---|
| 3047 |
|
---|
| 3048 | """ # by_label ignored and deprecated
|
---|
| 3049 | self._find_list_control(name, type, kind, id, label, nr).toggle_single()
|
---|
| 3050 |
|
---|
| 3051 | #---------------------------------------------------
|
---|
| 3052 | # Form-filling method applying only to FileControls.
|
---|
| 3053 |
|
---|
| 3054 | def add_file(self, file_object, content_type=None, filename=None,
|
---|
| 3055 | name=None, id=None, nr=None, label=None):
|
---|
| 3056 | """Add a file to be uploaded.
|
---|
| 3057 |
|
---|
| 3058 | file_object: file-like object (with read method) from which to read
|
---|
| 3059 | data to upload
|
---|
| 3060 | content_type: MIME content type of data to upload
|
---|
| 3061 | filename: filename to pass to server
|
---|
| 3062 |
|
---|
| 3063 | If filename is None, no filename is sent to the server.
|
---|
| 3064 |
|
---|
| 3065 | If content_type is None, the content type is guessed based on the
|
---|
| 3066 | filename and the data from read from the file object.
|
---|
| 3067 |
|
---|
| 3068 | XXX
|
---|
| 3069 | At the moment, guessed content type is always application/octet-stream.
|
---|
| 3070 | Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and
|
---|
| 3071 | plain text.
|
---|
| 3072 |
|
---|
| 3073 | Note the following useful HTML attributes of file upload controls (see
|
---|
| 3074 | HTML 4.01 spec, section 17):
|
---|
| 3075 |
|
---|
| 3076 | accept: comma-separated list of content types that the server will
|
---|
| 3077 | handle correctly; you can use this to filter out non-conforming files
|
---|
| 3078 | size: XXX IIRC, this is indicative of whether form wants multiple or
|
---|
| 3079 | single files
|
---|
| 3080 | maxlength: XXX hint of max content length in bytes?
|
---|
| 3081 |
|
---|
| 3082 | """
|
---|
| 3083 | self.find_control(name, "file", id=id, label=label, nr=nr).add_file(
|
---|
| 3084 | file_object, content_type, filename)
|
---|
| 3085 |
|
---|
| 3086 | #---------------------------------------------------
|
---|
| 3087 | # Form submission methods, applying only to clickable controls.
|
---|
| 3088 |
|
---|
| 3089 | def click(self, name=None, type=None, id=None, nr=0, coord=(1, 1),
|
---|
| 3090 | request_class=urllib2.Request,
|
---|
| 3091 | label=None):
|
---|
| 3092 | """Return request that would result from clicking on a control.
|
---|
| 3093 |
|
---|
| 3094 | The request object is a urllib2.Request instance, which you can pass to
|
---|
| 3095 | urllib2.urlopen (or ClientCookie.urlopen).
|
---|
| 3096 |
|
---|
| 3097 | Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
|
---|
| 3098 | IMAGEs) can be clicked.
|
---|
| 3099 |
|
---|
| 3100 | Will click on the first clickable control, subject to the name, type
|
---|
| 3101 | and nr arguments (as for find_control). If no name, type, id or number
|
---|
| 3102 | is specified and there are no clickable controls, a request will be
|
---|
| 3103 | returned for the form in its current, un-clicked, state.
|
---|
| 3104 |
|
---|
| 3105 | IndexError is raised if any of name, type, id or nr is specified but no
|
---|
| 3106 | matching control is found. ValueError is raised if the HTMLForm has an
|
---|
| 3107 | enctype attribute that is not recognised.
|
---|
| 3108 |
|
---|
| 3109 | You can optionally specify a coordinate to click at, which only makes a
|
---|
| 3110 | difference if you clicked on an image.
|
---|
| 3111 |
|
---|
| 3112 | """
|
---|
| 3113 | return self._click(name, type, id, label, nr, coord, "request",
|
---|
| 3114 | self._request_class)
|
---|
| 3115 |
|
---|
| 3116 | def click_request_data(self,
|
---|
| 3117 | name=None, type=None, id=None,
|
---|
| 3118 | nr=0, coord=(1, 1),
|
---|
| 3119 | request_class=urllib2.Request,
|
---|
| 3120 | label=None):
|
---|
| 3121 | """As for click method, but return a tuple (url, data, headers).
|
---|
| 3122 |
|
---|
| 3123 | You can use this data to send a request to the server. This is useful
|
---|
| 3124 | if you're using httplib or urllib rather than urllib2. Otherwise, use
|
---|
| 3125 | the click method.
|
---|
| 3126 |
|
---|
| 3127 | # Untested. Have to subclass to add headers, I think -- so use urllib2
|
---|
| 3128 | # instead!
|
---|
| 3129 | import urllib
|
---|
| 3130 | url, data, hdrs = form.click_request_data()
|
---|
| 3131 | r = urllib.urlopen(url, data)
|
---|
| 3132 |
|
---|
| 3133 | # Untested. I don't know of any reason to use httplib -- you can get
|
---|
| 3134 | # just as much control with urllib2.
|
---|
| 3135 | import httplib, urlparse
|
---|
| 3136 | url, data, hdrs = form.click_request_data()
|
---|
| 3137 | tup = urlparse(url)
|
---|
| 3138 | host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
|
---|
| 3139 | conn = httplib.HTTPConnection(host)
|
---|
| 3140 | if data:
|
---|
| 3141 | httplib.request("POST", path, data, hdrs)
|
---|
| 3142 | else:
|
---|
| 3143 | httplib.request("GET", path, headers=hdrs)
|
---|
| 3144 | r = conn.getresponse()
|
---|
| 3145 |
|
---|
| 3146 | """
|
---|
| 3147 | return self._click(name, type, id, label, nr, coord, "request_data",
|
---|
| 3148 | self._request_class)
|
---|
| 3149 |
|
---|
| 3150 | def click_pairs(self, name=None, type=None, id=None,
|
---|
| 3151 | nr=0, coord=(1, 1),
|
---|
| 3152 | label=None):
|
---|
| 3153 | """As for click_request_data, but returns a list of (key, value) pairs.
|
---|
| 3154 |
|
---|
| 3155 | You can use this list as an argument to ClientForm.urlencode. This is
|
---|
| 3156 | usually only useful if you're using httplib or urllib rather than
|
---|
| 3157 | urllib2 or ClientCookie. It may also be useful if you want to manually
|
---|
| 3158 | tweak the keys and/or values, but this should not be necessary.
|
---|
| 3159 | Otherwise, use the click method.
|
---|
| 3160 |
|
---|
| 3161 | Note that this method is only useful for forms of MIME type
|
---|
| 3162 | x-www-form-urlencoded. In particular, it does not return the
|
---|
| 3163 | information required for file upload. If you need file upload and are
|
---|
| 3164 | not using urllib2, use click_request_data.
|
---|
| 3165 |
|
---|
| 3166 | Also note that Python 2.0's urllib.urlencode is slightly broken: it
|
---|
| 3167 | only accepts a mapping, not a sequence of pairs, as an argument. This
|
---|
| 3168 | messes up any ordering in the argument. Use ClientForm.urlencode
|
---|
| 3169 | instead.
|
---|
| 3170 |
|
---|
| 3171 | """
|
---|
| 3172 | return self._click(name, type, id, label, nr, coord, "pairs",
|
---|
| 3173 | self._request_class)
|
---|
| 3174 |
|
---|
| 3175 | #---------------------------------------------------
|
---|
| 3176 |
|
---|
| 3177 | def find_control(self,
|
---|
| 3178 | name=None, type=None, kind=None, id=None,
|
---|
| 3179 | predicate=None, nr=None,
|
---|
| 3180 | label=None):
|
---|
| 3181 | """Locate and return some specific control within the form.
|
---|
| 3182 |
|
---|
| 3183 | At least one of the name, type, kind, predicate and nr arguments must
|
---|
| 3184 | be supplied. If no matching control is found, ControlNotFoundError is
|
---|
| 3185 | raised.
|
---|
| 3186 |
|
---|
| 3187 | If name is specified, then the control must have the indicated name.
|
---|
| 3188 |
|
---|
| 3189 | If type is specified then the control must have the specified type (in
|
---|
| 3190 | addition to the types possible for <input> HTML tags: "text",
|
---|
| 3191 | "password", "hidden", "submit", "image", "button", "radio", "checkbox",
|
---|
| 3192 | "file" we also have "reset", "buttonbutton", "submitbutton",
|
---|
| 3193 | "resetbutton", "textarea", "select" and "isindex").
|
---|
| 3194 |
|
---|
| 3195 | If kind is specified, then the control must fall into the specified
|
---|
| 3196 | group, each of which satisfies a particular interface. The types are
|
---|
| 3197 | "text", "list", "multilist", "singlelist", "clickable" and "file".
|
---|
| 3198 |
|
---|
| 3199 | If id is specified, then the control must have the indicated id.
|
---|
| 3200 |
|
---|
| 3201 | If predicate is specified, then the control must match that function.
|
---|
| 3202 | The predicate function is passed the control as its single argument,
|
---|
| 3203 | and should return a boolean value indicating whether the control
|
---|
| 3204 | matched.
|
---|
| 3205 |
|
---|
| 3206 | nr, if supplied, is the sequence number of the control (where 0 is the
|
---|
| 3207 | first). Note that control 0 is the first control matching all the
|
---|
| 3208 | other arguments (if supplied); it is not necessarily the first control
|
---|
| 3209 | in the form. If no nr is supplied, AmbiguityError is raised if
|
---|
| 3210 | multiple controls match the other arguments (unless the
|
---|
| 3211 | .backwards-compat attribute is true).
|
---|
| 3212 |
|
---|
| 3213 | If label is specified, then the control must have this label. Note
|
---|
| 3214 | that radio controls and checkboxes never have labels: their items do.
|
---|
| 3215 |
|
---|
| 3216 | """
|
---|
| 3217 | if ((name is None) and (type is None) and (kind is None) and
|
---|
| 3218 | (id is None) and (label is None) and (predicate is None) and
|
---|
| 3219 | (nr is None)):
|
---|
| 3220 | raise ValueError(
|
---|
| 3221 | "at least one argument must be supplied to specify control")
|
---|
| 3222 | return self._find_control(name, type, kind, id, label, predicate, nr)
|
---|
| 3223 |
|
---|
| 3224 | #---------------------------------------------------
|
---|
| 3225 | # Private methods.
|
---|
| 3226 |
|
---|
| 3227 | def _find_list_control(self,
|
---|
| 3228 | name=None, type=None, kind=None, id=None,
|
---|
| 3229 | label=None, nr=None):
|
---|
| 3230 | if ((name is None) and (type is None) and (kind is None) and
|
---|
| 3231 | (id is None) and (label is None) and (nr is None)):
|
---|
| 3232 | raise ValueError(
|
---|
| 3233 | "at least one argument must be supplied to specify control")
|
---|
| 3234 |
|
---|
| 3235 | return self._find_control(name, type, kind, id, label,
|
---|
| 3236 | is_listcontrol, nr)
|
---|
| 3237 |
|
---|
| 3238 | def _find_control(self, name, type, kind, id, label, predicate, nr):
|
---|
| 3239 | if ((name is not None) and (name is not Missing) and
|
---|
| 3240 | not isstringlike(name)):
|
---|
| 3241 | raise TypeError("control name must be string-like")
|
---|
| 3242 | if (type is not None) and not isstringlike(type):
|
---|
| 3243 | raise TypeError("control type must be string-like")
|
---|
| 3244 | if (kind is not None) and not isstringlike(kind):
|
---|
| 3245 | raise TypeError("control kind must be string-like")
|
---|
| 3246 | if (id is not None) and not isstringlike(id):
|
---|
| 3247 | raise TypeError("control id must be string-like")
|
---|
| 3248 | if (label is not None) and not isstringlike(label):
|
---|
| 3249 | raise TypeError("control label must be string-like")
|
---|
| 3250 | if (predicate is not None) and not callable(predicate):
|
---|
| 3251 | raise TypeError("control predicate must be callable")
|
---|
| 3252 | if (nr is not None) and nr < 0:
|
---|
| 3253 | raise ValueError("control number must be a positive integer")
|
---|
| 3254 |
|
---|
| 3255 | orig_nr = nr
|
---|
| 3256 | found = None
|
---|
| 3257 | ambiguous = False
|
---|
| 3258 | if nr is None and self.backwards_compat:
|
---|
| 3259 | nr = 0
|
---|
| 3260 |
|
---|
| 3261 | for control in self.controls:
|
---|
| 3262 | if ((name is not None and name != control.name) and
|
---|
| 3263 | (name is not Missing or control.name is not None)):
|
---|
| 3264 | continue
|
---|
| 3265 | if type is not None and type != control.type:
|
---|
| 3266 | continue
|
---|
| 3267 | if kind is not None and not control.is_of_kind(kind):
|
---|
| 3268 | continue
|
---|
| 3269 | if id is not None and id != control.id:
|
---|
| 3270 | continue
|
---|
| 3271 | if predicate and not predicate(control):
|
---|
| 3272 | continue
|
---|
| 3273 | if label:
|
---|
| 3274 | for l in control.get_labels():
|
---|
| 3275 | if l.text.find(label) > -1:
|
---|
| 3276 | break
|
---|
| 3277 | else:
|
---|
| 3278 | continue
|
---|
| 3279 | if nr is not None:
|
---|
| 3280 | if nr == 0:
|
---|
| 3281 | return control # early exit: unambiguous due to nr
|
---|
| 3282 | nr -= 1
|
---|
| 3283 | continue
|
---|
| 3284 | if found:
|
---|
| 3285 | ambiguous = True
|
---|
| 3286 | break
|
---|
| 3287 | found = control
|
---|
| 3288 |
|
---|
| 3289 | if found and not ambiguous:
|
---|
| 3290 | return found
|
---|
| 3291 |
|
---|
| 3292 | description = []
|
---|
| 3293 | if name is not None: description.append("name %s" % repr(name))
|
---|
| 3294 | if type is not None: description.append("type '%s'" % type)
|
---|
| 3295 | if kind is not None: description.append("kind '%s'" % kind)
|
---|
| 3296 | if id is not None: description.append("id '%s'" % id)
|
---|
| 3297 | if label is not None: description.append("label '%s'" % label)
|
---|
| 3298 | if predicate is not None:
|
---|
| 3299 | description.append("predicate %s" % predicate)
|
---|
| 3300 | if orig_nr: description.append("nr %d" % orig_nr)
|
---|
| 3301 | description = ", ".join(description)
|
---|
| 3302 |
|
---|
| 3303 | if ambiguous:
|
---|
| 3304 | raise AmbiguityError("more than one control matching " + description)
|
---|
| 3305 | elif not found:
|
---|
| 3306 | raise ControlNotFoundError("no control matching " + description)
|
---|
| 3307 | assert False
|
---|
| 3308 |
|
---|
| 3309 | def _click(self, name, type, id, label, nr, coord, return_type,
|
---|
| 3310 | request_class=urllib2.Request):
|
---|
| 3311 | try:
|
---|
| 3312 | control = self._find_control(
|
---|
| 3313 | name, type, "clickable", id, label, None, nr)
|
---|
| 3314 | except ControlNotFoundError:
|
---|
| 3315 | if ((name is not None) or (type is not None) or (id is not None) or
|
---|
| 3316 | (nr != 0)):
|
---|
| 3317 | raise
|
---|
| 3318 | # no clickable controls, but no control was explicitly requested,
|
---|
| 3319 | # so return state without clicking any control
|
---|
| 3320 | return self._switch_click(return_type, request_class)
|
---|
| 3321 | else:
|
---|
| 3322 | return control._click(self, coord, return_type, request_class)
|
---|
| 3323 |
|
---|
| 3324 | def _pairs(self):
|
---|
| 3325 | """Return sequence of (key, value) pairs suitable for urlencoding."""
|
---|
| 3326 | return [(k, v) for (i, k, v, c_i) in self._pairs_and_controls()]
|
---|
| 3327 |
|
---|
| 3328 |
|
---|
| 3329 | def _pairs_and_controls(self):
|
---|
| 3330 | """Return sequence of (index, key, value, control_index)
|
---|
| 3331 | of totally ordered pairs suitable for urlencoding.
|
---|
| 3332 |
|
---|
| 3333 | control_index is the index of the control in self.controls
|
---|
| 3334 | """
|
---|
| 3335 | pairs = []
|
---|
| 3336 | for control_index in range(len(self.controls)):
|
---|
| 3337 | control = self.controls[control_index]
|
---|
| 3338 | for ii, key, val in control._totally_ordered_pairs():
|
---|
| 3339 | pairs.append((ii, key, val, control_index))
|
---|
| 3340 |
|
---|
| 3341 | # stable sort by ONLY first item in tuple
|
---|
| 3342 | pairs.sort()
|
---|
| 3343 |
|
---|
| 3344 | return pairs
|
---|
| 3345 |
|
---|
| 3346 | def _request_data(self):
|
---|
| 3347 | """Return a tuple (url, data, headers)."""
|
---|
| 3348 | method = self.method.upper()
|
---|
| 3349 | #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.action)
|
---|
| 3350 | parts = self._urlparse(self.action)
|
---|
| 3351 | rest, (query, frag) = parts[:-2], parts[-2:]
|
---|
| 3352 |
|
---|
| 3353 | if method == "GET":
|
---|
| 3354 | if self.enctype != "application/x-www-form-urlencoded":
|
---|
| 3355 | raise ValueError(
|
---|
| 3356 | "unknown GET form encoding type '%s'" % self.enctype)
|
---|
| 3357 | parts = rest + (urlencode(self._pairs()), None)
|
---|
| 3358 | uri = self._urlunparse(parts)
|
---|
| 3359 | return uri, None, []
|
---|
| 3360 | elif method == "POST":
|
---|
| 3361 | parts = rest + (query, None)
|
---|
| 3362 | uri = self._urlunparse(parts)
|
---|
| 3363 | if self.enctype == "application/x-www-form-urlencoded":
|
---|
| 3364 | return (uri, urlencode(self._pairs()),
|
---|
| 3365 | [("Content-Type", self.enctype)])
|
---|
| 3366 | elif self.enctype == "multipart/form-data":
|
---|
| 3367 | data = StringIO()
|
---|
| 3368 | http_hdrs = []
|
---|
| 3369 | mw = MimeWriter(data, http_hdrs)
|
---|
| 3370 | f = mw.startmultipartbody("form-data", add_to_http_hdrs=True,
|
---|
| 3371 | prefix=0)
|
---|
| 3372 | for ii, k, v, control_index in self._pairs_and_controls():
|
---|
| 3373 | self.controls[control_index]._write_mime_data(mw, k, v)
|
---|
| 3374 | mw.lastpart()
|
---|
| 3375 | return uri, data.getvalue(), http_hdrs
|
---|
| 3376 | else:
|
---|
| 3377 | raise ValueError(
|
---|
| 3378 | "unknown POST form encoding type '%s'" % self.enctype)
|
---|
| 3379 | else:
|
---|
| 3380 | raise ValueError("Unknown method '%s'" % method)
|
---|
| 3381 |
|
---|
| 3382 | def _switch_click(self, return_type, request_class=urllib2.Request):
|
---|
| 3383 | # This is called by HTMLForm and clickable Controls to hide switching
|
---|
| 3384 | # on return_type.
|
---|
| 3385 | if return_type == "pairs":
|
---|
| 3386 | return self._pairs()
|
---|
| 3387 | elif return_type == "request_data":
|
---|
| 3388 | return self._request_data()
|
---|
| 3389 | else:
|
---|
| 3390 | req_data = self._request_data()
|
---|
| 3391 | req = request_class(req_data[0], req_data[1])
|
---|
| 3392 | for key, val in req_data[2]:
|
---|
| 3393 | add_hdr = req.add_header
|
---|
| 3394 | if key.lower() == "content-type":
|
---|
| 3395 | try:
|
---|
| 3396 | add_hdr = req.add_unredirected_header
|
---|
| 3397 | except AttributeError:
|
---|
| 3398 | # pre-2.4 and not using ClientCookie
|
---|
| 3399 | pass
|
---|
| 3400 | add_hdr(key, val)
|
---|
| 3401 | return req
|
---|