Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: py-scraping/mechanize/_http.py@ 157

Last change on this file since 157 was 106, checked in by Rick van der Zwet, 15 years ago
Initial commit...
File size: 26.1 KB

Rev	Line
[106]	1	"""HTTP related handlers.
	2
	3	Note that some other HTTP handlers live in more specific modules: _auth.py,
	4	_gzip.py, etc.
	5
	6
	7	Copyright 2002-2006 John J Lee <jjl@pobox.com>
	8
	9	This code is free software; you can redistribute it and/or modify it
	10	under the terms of the BSD or ZPL 2.1 licenses (see the file
	11	COPYING.txt included with the distribution).
	12
	13	"""
	14
	15	import time, htmlentitydefs, logging, socket, \
	16	urllib2, urllib, httplib, sgmllib
	17	from urllib2 import URLError, HTTPError, BaseHandler
	18	from cStringIO import StringIO
	19
	20	from _clientcookie import CookieJar
	21	from _headersutil import is_html
	22	from _html import unescape, unescape_charref
	23	from _request import Request
	24	from _response import closeable_response, response_seek_wrapper
	25	import _rfc3986
	26	import _sockettimeout
	27
	28	debug = logging.getLogger("mechanize").debug
	29	debug_robots = logging.getLogger("mechanize.robots").debug
	30
	31	# monkeypatch urllib2.HTTPError to show URL
	32	## def urllib2_str(self):
	33	## return 'HTTP Error %s: %s (%s)' % (
	34	## self.code, self.msg, self.geturl())
	35	## urllib2.HTTPError.__str__ = urllib2_str
	36
	37
	38	CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes
	39	DEFAULT_ENCODING = 'latin-1'
	40
	41
	42	try:
	43	socket._fileobject("fake socket", close=True)
	44	except TypeError:
	45	# python <= 2.4
	46	create_readline_wrapper = socket._fileobject
	47	else:
	48	def create_readline_wrapper(fh):
	49	return socket._fileobject(fh, close=True)
	50
	51
	52	# This adds "refresh" to the list of redirectables and provides a redirection
	53	# algorithm that doesn't go into a loop in the presence of cookies
	54	# (Python 2.4 has this new algorithm, 2.3 doesn't).
	55	class HTTPRedirectHandler(BaseHandler):
	56	# maximum number of redirections to any single URL
	57	# this is needed because of the state that cookies introduce
	58	max_repeats = 4
	59	# maximum total number of redirections (regardless of URL) before
	60	# assuming we're in a loop
	61	max_redirections = 10
	62
	63	# Implementation notes:
	64
	65	# To avoid the server sending us into an infinite loop, the request
	66	# object needs to track what URLs we have already seen. Do this by
	67	# adding a handler-specific attribute to the Request object. The value
	68	# of the dict is used to count the number of times the same URL has
	69	# been visited. This is needed because visiting the same URL twice
	70	# does not necessarily imply a loop, thanks to state introduced by
	71	# cookies.
	72
	73	# Always unhandled redirection codes:
	74	# 300 Multiple Choices: should not handle this here.
	75	# 304 Not Modified: no need to handle here: only of interest to caches
	76	# that do conditional GETs
	77	# 305 Use Proxy: probably not worth dealing with here
	78	# 306 Unused: what was this for in the previous versions of protocol??
	79
	80	def redirect_request(self, newurl, req, fp, code, msg, headers):
	81	"""Return a Request or None in response to a redirect.
	82
	83	This is called by the http_error_30x methods when a redirection
	84	response is received. If a redirection should take place, return a
	85	new Request to allow http_error_30x to perform the redirect;
	86	otherwise, return None to indicate that an HTTPError should be
	87	raised.
	88
	89	"""
	90	if code in (301, 302, 303, "refresh") or \
	91	(code == 307 and not req.has_data()):
	92	# Strictly (according to RFC 2616), 301 or 302 in response to
	93	# a POST MUST NOT cause a redirection without confirmation
	94	# from the user (of urllib2, in this case). In practice,
	95	# essentially all clients do redirect in this case, so we do
	96	# the same.
	97	# XXX really refresh redirections should be visiting; tricky to
	98	# fix, so this will wait until post-stable release
	99	new = Request(newurl,
	100	headers=req.headers,
	101	origin_req_host=req.get_origin_req_host(),
	102	unverifiable=True,
	103	visit=False,
	104	)
	105	new._origin_req = getattr(req, "_origin_req", req)
	106	return new
	107	else:
	108	raise HTTPError(req.get_full_url(), code, msg, headers, fp)
	109
	110	def http_error_302(self, req, fp, code, msg, headers):
	111	# Some servers (incorrectly) return multiple Location headers
	112	# (so probably same goes for URI). Use first header.
	113	if headers.has_key('location'):
	114	newurl = headers.getheaders('location')[0]
	115	elif headers.has_key('uri'):
	116	newurl = headers.getheaders('uri')[0]
	117	else:
	118	return
	119	newurl = _rfc3986.clean_url(newurl, "latin-1")
	120	newurl = _rfc3986.urljoin(req.get_full_url(), newurl)
	121
	122	# XXX Probably want to forget about the state of the current
	123	# request, although that might interact poorly with other
	124	# handlers that also use handler-specific request attributes
	125	new = self.redirect_request(newurl, req, fp, code, msg, headers)
	126	if new is None:
	127	return
	128
	129	# loop detection
	130	# .redirect_dict has a key url if url was previously visited.
	131	if hasattr(req, 'redirect_dict'):
	132	visited = new.redirect_dict = req.redirect_dict
	133	if (visited.get(newurl, 0) >= self.max_repeats or
	134	len(visited) >= self.max_redirections):
	135	raise HTTPError(req.get_full_url(), code,
	136	self.inf_msg + msg, headers, fp)
	137	else:
	138	visited = new.redirect_dict = req.redirect_dict = {}
	139	visited[newurl] = visited.get(newurl, 0) + 1
	140
	141	# Don't close the fp until we are sure that we won't use it
	142	# with HTTPError.
	143	fp.read()
	144	fp.close()
	145
	146	return self.parent.open(new)
	147
	148	http_error_301 = http_error_303 = http_error_307 = http_error_302
	149	http_error_refresh = http_error_302
	150
	151	inf_msg = "The HTTP server returned a redirect error that would " \
	152	"lead to an infinite loop.\n" \
	153	"The last 30x error message was:\n"
	154
	155
	156	# XXX would self.reset() work, instead of raising this exception?
	157	class EndOfHeadError(Exception): pass
	158	class AbstractHeadParser:
	159	# only these elements are allowed in or before HEAD of document
	160	head_elems = ("html", "head",
	161	"title", "base",
	162	"script", "style", "meta", "link", "object")
	163	_entitydefs = htmlentitydefs.name2codepoint
	164	_encoding = DEFAULT_ENCODING
	165
	166	def __init__(self):
	167	self.http_equiv = []
	168
	169	def start_meta(self, attrs):
	170	http_equiv = content = None
	171	for key, value in attrs:
	172	if key == "http-equiv":
	173	http_equiv = self.unescape_attr_if_required(value)
	174	elif key == "content":
	175	content = self.unescape_attr_if_required(value)
	176	if http_equiv is not None and content is not None:
	177	self.http_equiv.append((http_equiv, content))
	178
	179	def end_head(self):
	180	raise EndOfHeadError()
	181
	182	def handle_entityref(self, name):
	183	#debug("%s", name)
	184	self.handle_data(unescape(
	185	'&%s;' % name, self._entitydefs, self._encoding))
	186
	187	def handle_charref(self, name):
	188	#debug("%s", name)
	189	self.handle_data(unescape_charref(name, self._encoding))
	190
	191	def unescape_attr(self, name):
	192	#debug("%s", name)
	193	return unescape(name, self._entitydefs, self._encoding)
	194
	195	def unescape_attrs(self, attrs):
	196	#debug("%s", attrs)
	197	escaped_attrs = {}
	198	for key, val in attrs.items():
	199	escaped_attrs[key] = self.unescape_attr(val)
	200	return escaped_attrs
	201
	202	def unknown_entityref(self, ref):
	203	self.handle_data("&%s;" % ref)
	204
	205	def unknown_charref(self, ref):
	206	self.handle_data("&#%s;" % ref)
	207
	208
	209	try:
	210	import HTMLParser
	211	except ImportError:
	212	pass
	213	else:
	214	class XHTMLCompatibleHeadParser(AbstractHeadParser,
	215	HTMLParser.HTMLParser):
	216	def __init__(self):
	217	HTMLParser.HTMLParser.__init__(self)
	218	AbstractHeadParser.__init__(self)
	219
	220	def handle_starttag(self, tag, attrs):
	221	if tag not in self.head_elems:
	222	raise EndOfHeadError()
	223	try:
	224	method = getattr(self, 'start_' + tag)
	225	except AttributeError:
	226	try:
	227	method = getattr(self, 'do_' + tag)
	228	except AttributeError:
	229	pass # unknown tag
	230	else:
	231	method(attrs)
	232	else:
	233	method(attrs)
	234
	235	def handle_endtag(self, tag):
	236	if tag not in self.head_elems:
	237	raise EndOfHeadError()
	238	try:
	239	method = getattr(self, 'end_' + tag)
	240	except AttributeError:
	241	pass # unknown tag
	242	else:
	243	method()
	244
	245	def unescape(self, name):
	246	# Use the entitydefs passed into constructor, not
	247	# HTMLParser.HTMLParser's entitydefs.
	248	return self.unescape_attr(name)
	249
	250	def unescape_attr_if_required(self, name):
	251	return name # HTMLParser.HTMLParser already did it
	252
	253	class HeadParser(AbstractHeadParser, sgmllib.SGMLParser):
	254
	255	def _not_called(self):
	256	assert False
	257
	258	def __init__(self):
	259	sgmllib.SGMLParser.__init__(self)
	260	AbstractHeadParser.__init__(self)
	261
	262	def handle_starttag(self, tag, method, attrs):
	263	if tag not in self.head_elems:
	264	raise EndOfHeadError()
	265	if tag == "meta":
	266	method(attrs)
	267
	268	def unknown_starttag(self, tag, attrs):
	269	self.handle_starttag(tag, self._not_called, attrs)
	270
	271	def handle_endtag(self, tag, method):
	272	if tag in self.head_elems:
	273	method()
	274	else:
	275	raise EndOfHeadError()
	276
	277	def unescape_attr_if_required(self, name):
	278	return self.unescape_attr(name)
	279
	280	def parse_head(fileobj, parser):
	281	"""Return a list of key, value pairs."""
	282	while 1:
	283	data = fileobj.read(CHUNK)
	284	try:
	285	parser.feed(data)
	286	except EndOfHeadError:
	287	break
	288	if len(data) != CHUNK:
	289	# this should only happen if there is no HTML body, or if
	290	# CHUNK is big
	291	break
	292	return parser.http_equiv
	293
	294	class HTTPEquivProcessor(BaseHandler):
	295	"""Append META HTTP-EQUIV headers to regular HTTP headers."""
	296
	297	handler_order = 300 # before handlers that look at HTTP headers
	298
	299	def __init__(self, head_parser_class=HeadParser,
	300	i_want_broken_xhtml_support=False,
	301	):
	302	self.head_parser_class = head_parser_class
	303	self._allow_xhtml = i_want_broken_xhtml_support
	304
	305	def http_response(self, request, response):
	306	if not hasattr(response, "seek"):
	307	response = response_seek_wrapper(response)
	308	http_message = response.info()
	309	url = response.geturl()
	310	ct_hdrs = http_message.getheaders("content-type")
	311	if is_html(ct_hdrs, url, self._allow_xhtml):
	312	try:
	313	try:
	314	html_headers = parse_head(response,
	315	self.head_parser_class())
	316	finally:
	317	response.seek(0)
	318	except (HTMLParser.HTMLParseError,
	319	sgmllib.SGMLParseError):
	320	pass
	321	else:
	322	for hdr, val in html_headers:
	323	# add a header
	324	http_message.dict[hdr.lower()] = val
	325	text = hdr + ": " + val
	326	for line in text.split("\n"):
	327	http_message.headers.append(line + "\n")
	328	return response
	329
	330	https_response = http_response
	331
	332	class HTTPCookieProcessor(BaseHandler):
	333	"""Handle HTTP cookies.
	334
	335	Public attributes:
	336
	337	cookiejar: CookieJar instance
	338
	339	"""
	340	def __init__(self, cookiejar=None):
	341	if cookiejar is None:
	342	cookiejar = CookieJar()
	343	self.cookiejar = cookiejar
	344
	345	def http_request(self, request):
	346	self.cookiejar.add_cookie_header(request)
	347	return request
	348
	349	def http_response(self, request, response):
	350	self.cookiejar.extract_cookies(response, request)
	351	return response
	352
	353	https_request = http_request
	354	https_response = http_response
	355
	356	try:
	357	import robotparser
	358	except ImportError:
	359	pass
	360	else:
	361	class MechanizeRobotFileParser(robotparser.RobotFileParser):
	362
	363	def __init__(self, url='', opener=None):
	364	robotparser.RobotFileParser.__init__(self, url)
	365	self._opener = opener
	366	self._timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT
	367
	368	def set_opener(self, opener=None):
	369	import _opener
	370	if opener is None:
	371	opener = _opener.OpenerDirector()
	372	self._opener = opener
	373
	374	def set_timeout(self, timeout):
	375	self._timeout = timeout
	376
	377	def read(self):
	378	"""Reads the robots.txt URL and feeds it to the parser."""
	379	if self._opener is None:
	380	self.set_opener()
	381	req = Request(self.url, unverifiable=True, visit=False,
	382	timeout=self._timeout)
	383	try:
	384	f = self._opener.open(req)
	385	except HTTPError, f:
	386	pass
	387	except (IOError, socket.error, OSError), exc:
	388	debug_robots("ignoring error opening %r: %s" %
	389	(self.url, exc))
	390	return
	391	lines = []
	392	line = f.readline()
	393	while line:
	394	lines.append(line.strip())
	395	line = f.readline()
	396	status = f.code
	397	if status == 401 or status == 403:
	398	self.disallow_all = True
	399	debug_robots("disallow all")
	400	elif status >= 400:
	401	self.allow_all = True
	402	debug_robots("allow all")
	403	elif status == 200 and lines:
	404	debug_robots("parse lines")
	405	self.parse(lines)
	406
	407	class RobotExclusionError(urllib2.HTTPError):
	408	def __init__(self, request, *args):
	409	apply(urllib2.HTTPError.__init__, (self,) + args)
	410	self.request = request
	411
	412	class HTTPRobotRulesProcessor(BaseHandler):
	413	# before redirections, after everything else
	414	handler_order = 800
	415
	416	try:
	417	from httplib import HTTPMessage
	418	except:
	419	from mimetools import Message
	420	http_response_class = Message
	421	else:
	422	http_response_class = HTTPMessage
	423
	424	def __init__(self, rfp_class=MechanizeRobotFileParser):
	425	self.rfp_class = rfp_class
	426	self.rfp = None
	427	self._host = None
	428
	429	def http_request(self, request):
	430	scheme = request.get_type()
	431	if scheme not in ["http", "https"]:
	432	# robots exclusion only applies to HTTP
	433	return request
	434
	435	if request.get_selector() == "/robots.txt":
	436	# /robots.txt is always OK to fetch
	437	return request
	438
	439	host = request.get_host()
	440
	441	# robots.txt requests don't need to be allowed by robots.txt :-)
	442	origin_req = getattr(request, "_origin_req", None)
	443	if (origin_req is not None and
	444	origin_req.get_selector() == "/robots.txt" and
	445	origin_req.get_host() == host
	446	):
	447	return request
	448
	449	if host != self._host:
	450	self.rfp = self.rfp_class()
	451	try:
	452	self.rfp.set_opener(self.parent)
	453	except AttributeError:
	454	debug("%r instance does not support set_opener" %
	455	self.rfp.__class__)
	456	self.rfp.set_url(scheme + "://" + host + "/robots.txt")
	457	self.rfp.set_timeout(request.timeout)
	458	self.rfp.read()
	459	self._host = host
	460
	461	ua = request.get_header("User-agent", "")
	462	if self.rfp.can_fetch(ua, request.get_full_url()):
	463	return request
	464	else:
	465	# XXX This should really have raised URLError. Too late now...
	466	msg = "request disallowed by robots.txt"
	467	raise RobotExclusionError(
	468	request,
	469	request.get_full_url(),
	470	403, msg,
	471	self.http_response_class(StringIO()), StringIO(msg))
	472
	473	https_request = http_request
	474
	475	class HTTPRefererProcessor(BaseHandler):
	476	"""Add Referer header to requests.
	477
	478	This only makes sense if you use each RefererProcessor for a single
	479	chain of requests only (so, for example, if you use a single
	480	HTTPRefererProcessor to fetch a series of URLs extracted from a single
	481	page, this will break).
	482
	483	There's a proper implementation of this in mechanize.Browser.
	484
	485	"""
	486	def __init__(self):
	487	self.referer = None
	488
	489	def http_request(self, request):
	490	if ((self.referer is not None) and
	491	not request.has_header("Referer")):
	492	request.add_unredirected_header("Referer", self.referer)
	493	return request
	494
	495	def http_response(self, request, response):
	496	self.referer = response.geturl()
	497	return response
	498
	499	https_request = http_request
	500	https_response = http_response
	501
	502
	503	def clean_refresh_url(url):
	504	# e.g. Firefox 1.5 does (something like) this
	505	if ((url.startswith('"') and url.endswith('"')) or
	506	(url.startswith("'") and url.endswith("'"))):
	507	url = url[1:-1]
	508	return _rfc3986.clean_url(url, "latin-1") # XXX encoding
	509
	510	def parse_refresh_header(refresh):
	511	"""
	512	>>> parse_refresh_header("1; url=http://example.com/")
	513	(1.0, 'http://example.com/')
	514	>>> parse_refresh_header("1; url='http://example.com/'")
	515	(1.0, 'http://example.com/')
	516	>>> parse_refresh_header("1")
	517	(1.0, None)
	518	>>> parse_refresh_header("blah")
	519	Traceback (most recent call last):
	520	ValueError: invalid literal for float(): blah
	521
	522	"""
	523
	524	ii = refresh.find(";")
	525	if ii != -1:
	526	pause, newurl_spec = float(refresh[:ii]), refresh[ii + 1:]
	527	jj = newurl_spec.find("=")
	528	key = None
	529	if jj != -1:
	530	key, newurl = newurl_spec[:jj], newurl_spec[jj + 1:]
	531	newurl = clean_refresh_url(newurl)
	532	if key is None or key.strip().lower() != "url":
	533	raise ValueError()
	534	else:
	535	pause, newurl = float(refresh), None
	536	return pause, newurl
	537
	538	class HTTPRefreshProcessor(BaseHandler):
	539	"""Perform HTTP Refresh redirections.
	540
	541	Note that if a non-200 HTTP code has occurred (for example, a 30x
	542	redirect), this processor will do nothing.
	543
	544	By default, only zero-time Refresh headers are redirected. Use the
	545	max_time attribute / constructor argument to allow Refresh with longer
	546	pauses. Use the honor_time attribute / constructor argument to control
	547	whether the requested pause is honoured (with a time.sleep()) or
	548	skipped in favour of immediate redirection.
	549
	550	Public attributes:
	551
	552	max_time: see above
	553	honor_time: see above
	554
	555	"""
	556	handler_order = 1000
	557
	558	def __init__(self, max_time=0, honor_time=True):
	559	self.max_time = max_time
	560	self.honor_time = honor_time
	561	self._sleep = time.sleep
	562
	563	def http_response(self, request, response):
	564	code, msg, hdrs = response.code, response.msg, response.info()
	565
	566	if code == 200 and hdrs.has_key("refresh"):
	567	refresh = hdrs.getheaders("refresh")[0]
	568	try:
	569	pause, newurl = parse_refresh_header(refresh)
	570	except ValueError:
	571	debug("bad Refresh header: %r" % refresh)
	572	return response
	573
	574	if newurl is None:
	575	newurl = response.geturl()
	576	if (self.max_time is None) or (pause <= self.max_time):
	577	if pause > 1E-3 and self.honor_time:
	578	self._sleep(pause)
	579	hdrs["location"] = newurl
	580	# hardcoded http is NOT a bug
	581	response = self.parent.error(
	582	"http", request, response,
	583	"refresh", msg, hdrs)
	584	else:
	585	debug("Refresh header ignored: %r" % refresh)
	586
	587	return response
	588
	589	https_response = http_response
	590
	591	class HTTPErrorProcessor(BaseHandler):
	592	"""Process HTTP error responses.
	593
	594	The purpose of this handler is to to allow other response processors a
	595	look-in by removing the call to parent.error() from
	596	AbstractHTTPHandler.
	597
	598	For non-200 error codes, this just passes the job on to the
	599	Handler.<proto>_error_<code> methods, via the OpenerDirector.error
	600	method. Eventually, urllib2.HTTPDefaultErrorHandler will raise an
	601	HTTPError if no other handler handles the error.
	602
	603	"""
	604	handler_order = 1000 # after all other processors
	605
	606	def http_response(self, request, response):
	607	code, msg, hdrs = response.code, response.msg, response.info()
	608
	609	if code != 200:
	610	# hardcoded http is NOT a bug
	611	response = self.parent.error(
	612	"http", request, response, code, msg, hdrs)
	613
	614	return response
	615
	616	https_response = http_response
	617
	618
	619	class HTTPDefaultErrorHandler(BaseHandler):
	620	def http_error_default(self, req, fp, code, msg, hdrs):
	621	# why these error methods took the code, msg, headers args in the first
	622	# place rather than a response object, I don't know, but to avoid
	623	# multiple wrapping, we're discarding them
	624
	625	if isinstance(fp, urllib2.HTTPError):
	626	response = fp
	627	else:
	628	response = urllib2.HTTPError(
	629	req.get_full_url(), code, msg, hdrs, fp)
	630	assert code == response.code
	631	assert msg == response.msg
	632	assert hdrs == response.hdrs
	633	raise response
	634
	635
	636	class AbstractHTTPHandler(BaseHandler):
	637
	638	def __init__(self, debuglevel=0):
	639	self._debuglevel = debuglevel
	640
	641	def set_http_debuglevel(self, level):
	642	self._debuglevel = level
	643
	644	def do_request_(self, request):
	645	host = request.get_host()
	646	if not host:
	647	raise URLError('no host given')
	648
	649	if request.has_data(): # POST
	650	data = request.get_data()
	651	if not request.has_header('Content-type'):
	652	request.add_unredirected_header(
	653	'Content-type',
	654	'application/x-www-form-urlencoded')
	655	if not request.has_header('Content-length'):
	656	request.add_unredirected_header(
	657	'Content-length', '%d' % len(data))
	658
	659	scheme, sel = urllib.splittype(request.get_selector())
	660	sel_host, sel_path = urllib.splithost(sel)
	661	if not request.has_header('Host'):
	662	request.add_unredirected_header('Host', sel_host or host)
	663	for name, value in self.parent.addheaders:
	664	name = name.capitalize()
	665	if not request.has_header(name):
	666	request.add_unredirected_header(name, value)
	667
	668	return request
	669
	670	def do_open(self, http_class, req):
	671	"""Return an addinfourl object for the request, using http_class.
	672
	673	http_class must implement the HTTPConnection API from httplib.
	674	The addinfourl return value is a file-like object. It also
	675	has methods and attributes including:
	676	- info(): return a mimetools.Message object for the headers
	677	- geturl(): return the original request URL
	678	- code: HTTP status code
	679	"""
	680	host_port = req.get_host()
	681	if not host_port:
	682	raise URLError('no host given')
	683
	684	try:
	685	h = http_class(host_port, timeout=req.timeout)
	686	except TypeError:
	687	# Python < 2.6, no per-connection timeout support
	688	h = http_class(host_port)
	689	h.set_debuglevel(self._debuglevel)
	690
	691	headers = dict(req.headers)
	692	headers.update(req.unredirected_hdrs)
	693	# We want to make an HTTP/1.1 request, but the addinfourl
	694	# class isn't prepared to deal with a persistent connection.
	695	# It will try to read all remaining data from the socket,
	696	# which will block while the server waits for the next request.
	697	# So make sure the connection gets closed after the (only)
	698	# request.
	699	headers["Connection"] = "close"
	700	headers = dict(
	701	[(name.title(), val) for name, val in headers.items()])
	702	try:
	703	h.request(req.get_method(), req.get_selector(), req.data, headers)
	704	r = h.getresponse()
	705	except socket.error, err: # XXX what error?
	706	raise URLError(err)
	707
	708	# Pick apart the HTTPResponse object to get the addinfourl
	709	# object initialized properly.
	710
	711	# Wrap the HTTPResponse object in socket's file object adapter
	712	# for Windows. That adapter calls recv(), so delegate recv()
	713	# to read(). This weird wrapping allows the returned object to
	714	# have readline() and readlines() methods.
	715
	716	# XXX It might be better to extract the read buffering code
	717	# out of socket._fileobject() and into a base class.
	718
	719	r.recv = r.read
	720	fp = create_readline_wrapper(r)
	721
	722	resp = closeable_response(fp, r.msg, req.get_full_url(),
	723	r.status, r.reason)
	724	return resp
	725
	726
	727	class HTTPHandler(AbstractHTTPHandler):
	728	def http_open(self, req):
	729	return self.do_open(httplib.HTTPConnection, req)
	730
	731	http_request = AbstractHTTPHandler.do_request_
	732
	733	if hasattr(httplib, 'HTTPS'):
	734
	735	class HTTPSConnectionFactory:
	736	def __init__(self, key_file, cert_file):
	737	self._key_file = key_file
	738	self._cert_file = cert_file
	739	def __call__(self, hostport):
	740	return httplib.HTTPSConnection(
	741	hostport,
	742	key_file=self._key_file, cert_file=self._cert_file)
	743
	744	class HTTPSHandler(AbstractHTTPHandler):
	745	def __init__(self, client_cert_manager=None):
	746	AbstractHTTPHandler.__init__(self)
	747	self.client_cert_manager = client_cert_manager
	748
	749	def https_open(self, req):
	750	if self.client_cert_manager is not None:
	751	key_file, cert_file = self.client_cert_manager.find_key_cert(
	752	req.get_full_url())
	753	conn_factory = HTTPSConnectionFactory(key_file, cert_file)
	754	else:
	755	conn_factory = httplib.HTTPSConnection
	756	return self.do_open(conn_factory, req)
	757
	758	https_request = AbstractHTTPHandler.do_request_

Note: See TracBrowser for help on using the repository browser.

Download in other formats: