| OLD | NEW |
| (Empty) | |
| 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 """Classes and functions for generic network communication over HTTP.""" |
| 6 |
| 7 import cookielib |
| 8 import cStringIO as StringIO |
| 9 import httplib |
| 10 import itertools |
| 11 import logging |
| 12 import math |
| 13 import os |
| 14 import random |
| 15 import socket |
| 16 import ssl |
| 17 import threading |
| 18 import time |
| 19 import urllib |
| 20 import urllib2 |
| 21 import urlparse |
| 22 |
| 23 from third_party.rietveld import upload |
| 24 |
| 25 # Hack out upload logging.info() |
| 26 upload.logging = logging.getLogger('upload') |
| 27 # Mac pylint choke on this line. |
| 28 upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103 |
| 29 |
| 30 |
| 31 # The name of the key to store the count of url attempts. |
| 32 COUNT_KEY = 'UrlOpenAttempt' |
| 33 |
| 34 # Default maximum number of attempts to trying opening a url before aborting. |
| 35 URL_OPEN_MAX_ATTEMPTS = 30 |
| 36 |
| 37 # Default timeout when retrying. |
| 38 URL_OPEN_TIMEOUT = 6*60. |
| 39 |
| 40 |
| 41 # Global (for now) map: server URL (http://example.com) -> HttpService instance. |
| 42 # Used by get_http_service to cache HttpService instances. |
| 43 _http_services = {} |
| 44 _http_services_lock = threading.Lock() |
| 45 |
| 46 |
| 47 class TimeoutError(IOError): |
| 48 """Timeout while reading HTTP response.""" |
| 49 |
| 50 def __init__(self, inner_exc=None): |
| 51 super(TimeoutError, self).__init__(str(inner_exc or 'Timeout')) |
| 52 self.inner_exc = inner_exc |
| 53 |
| 54 |
| 55 def url_open(url, **kwargs): |
| 56 """Attempts to open the given url multiple times. |
| 57 |
| 58 |data| can be either: |
| 59 -None for a GET request |
| 60 -str for pre-encoded data |
| 61 -list for data to be encoded |
| 62 -dict for data to be encoded (COUNT_KEY will be added in this case) |
| 63 |
| 64 Returns HttpResponse object, where the response may be read from, or None |
| 65 if it was unable to connect. |
| 66 """ |
| 67 urlhost, urlpath = split_server_request_url(url) |
| 68 service = get_http_service(urlhost) |
| 69 return service.request(urlpath, **kwargs) |
| 70 |
| 71 |
| 72 def url_read(url, **kwargs): |
| 73 """Attempts to open the given url multiple times and read all data from it. |
| 74 |
| 75 Accepts same arguments as url_open function. |
| 76 |
| 77 Returns all data read or None if it was unable to connect or read the data. |
| 78 """ |
| 79 response = url_open(url, **kwargs) |
| 80 if not response: |
| 81 return None |
| 82 try: |
| 83 return response.read() |
| 84 except TimeoutError: |
| 85 return None |
| 86 |
| 87 |
| 88 def split_server_request_url(url): |
| 89 """Splits the url into scheme+netloc and path+params+query+fragment.""" |
| 90 url_parts = list(urlparse.urlparse(url)) |
| 91 urlhost = '%s://%s' % (url_parts[0], url_parts[1]) |
| 92 urlpath = urlparse.urlunparse(['', ''] + url_parts[2:]) |
| 93 return urlhost, urlpath |
| 94 |
| 95 |
| 96 def get_http_service(urlhost): |
| 97 """Returns existing or creates new instance of HttpService that can send |
| 98 requests to given base urlhost. |
| 99 """ |
| 100 # Ensure consistency. |
| 101 urlhost = str(urlhost).lower().rstrip('/') |
| 102 with _http_services_lock: |
| 103 service = _http_services.get(urlhost) |
| 104 if not service: |
| 105 service = AppEngineService(urlhost) |
| 106 _http_services[urlhost] = service |
| 107 return service |
| 108 |
| 109 |
| 110 class HttpService(object): |
| 111 """Base class for a class that provides an API to HTTP based service: |
| 112 - Provides 'request' method. |
| 113 - Supports automatic request retries. |
| 114 - Supports persistent cookies. |
| 115 - Thread safe. |
| 116 """ |
| 117 |
| 118 # File to use to store all auth cookies. |
| 119 COOKIE_FILE = os.path.join(os.path.expanduser('~'), '.isolated_cookies') |
| 120 |
| 121 # CookieJar reused by all services + lock that protects its instantiation. |
| 122 _cookie_jar = None |
| 123 _cookie_jar_lock = threading.Lock() |
| 124 |
| 125 def __init__(self, urlhost): |
| 126 self.urlhost = urlhost |
| 127 self.cookie_jar = self.load_cookie_jar() |
| 128 self.opener = self.create_url_opener() |
| 129 |
| 130 def authenticate(self): # pylint: disable=R0201 |
| 131 """Called when HTTP server asks client to authenticate. |
| 132 Can be implemented in subclasses. |
| 133 """ |
| 134 return False |
| 135 |
| 136 @staticmethod |
| 137 def load_cookie_jar(): |
| 138 """Returns global CoookieJar object that stores cookies in the file.""" |
| 139 with HttpService._cookie_jar_lock: |
| 140 if HttpService._cookie_jar is not None: |
| 141 return HttpService._cookie_jar |
| 142 jar = ThreadSafeCookieJar(HttpService.COOKIE_FILE) |
| 143 jar.load() |
| 144 HttpService._cookie_jar = jar |
| 145 return jar |
| 146 |
| 147 @staticmethod |
| 148 def save_cookie_jar(): |
| 149 """Called when cookie jar needs to be flushed to disk.""" |
| 150 with HttpService._cookie_jar_lock: |
| 151 if HttpService._cookie_jar is not None: |
| 152 HttpService._cookie_jar.save() |
| 153 |
| 154 def create_url_opener(self): # pylint: disable=R0201 |
| 155 """Returns OpenerDirector that will be used when sending requests. |
| 156 Can be reimplemented in subclasses.""" |
| 157 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar)) |
| 158 |
| 159 def request(self, urlpath, data=None, content_type=None, **kwargs): |
| 160 """Attempts to open the given url multiple times. |
| 161 |
| 162 |urlpath| is relative to the server root, i.e. '/some/request?param=1'. |
| 163 |
| 164 |data| can be either: |
| 165 -None for a GET request |
| 166 -str for pre-encoded data |
| 167 -list for data to be encoded |
| 168 -dict for data to be encoded (COUNT_KEY will be added in this case) |
| 169 |
| 170 Returns a file-like object, where the response may be read from, or None |
| 171 if it was unable to connect. |
| 172 """ |
| 173 assert urlpath and urlpath[0] == '/' |
| 174 |
| 175 if isinstance(data, dict) and COUNT_KEY in data: |
| 176 logging.error('%s already existed in the data passed into UlrOpen. It ' |
| 177 'would be overwritten. Aborting UrlOpen', COUNT_KEY) |
| 178 return None |
| 179 |
| 180 method = 'GET' if data is None else 'POST' |
| 181 assert not ((method != 'POST') and content_type), ( |
| 182 'Can\'t use content_type on GET') |
| 183 |
| 184 def make_request(extra): |
| 185 """Returns a urllib2.Request instance for this specific retry.""" |
| 186 if isinstance(data, str) or data is None: |
| 187 payload = data |
| 188 else: |
| 189 if isinstance(data, dict): |
| 190 payload = data.items() |
| 191 else: |
| 192 payload = data[:] |
| 193 payload.extend(extra.iteritems()) |
| 194 payload = urllib.urlencode(payload) |
| 195 new_url = urlparse.urljoin(self.urlhost, urlpath[1:]) |
| 196 if isinstance(data, str) or data is None: |
| 197 # In these cases, add the extra parameter to the query part of the url. |
| 198 url_parts = list(urlparse.urlparse(new_url)) |
| 199 # Append the query parameter. |
| 200 if url_parts[4] and extra: |
| 201 url_parts[4] += '&' |
| 202 url_parts[4] += urllib.urlencode(extra) |
| 203 new_url = urlparse.urlunparse(url_parts) |
| 204 request = urllib2.Request(new_url, data=payload) |
| 205 if payload is not None: |
| 206 if content_type: |
| 207 request.add_header('Content-Type', content_type) |
| 208 request.add_header('Content-Length', len(payload)) |
| 209 return request |
| 210 |
| 211 return self._retry_loop(make_request, **kwargs) |
| 212 |
| 213 def _retry_loop( |
| 214 self, |
| 215 make_request, |
| 216 max_attempts=URL_OPEN_MAX_ATTEMPTS, |
| 217 retry_404=False, |
| 218 retry_50x=True, |
| 219 timeout=URL_OPEN_TIMEOUT, |
| 220 read_timeout=None): |
| 221 """Runs internal request-retry loop. |
| 222 |
| 223 - Optionally retries HTTP 404 and 50x. |
| 224 - Retries up to |max_attempts| times. If None or 0, there's no limit in the |
| 225 number of retries. |
| 226 - Retries up to |timeout| duration in seconds. If None or 0, there's no |
| 227 limit in the time taken to do retries. |
| 228 - If both |max_attempts| and |timeout| are None or 0, this functions retries |
| 229 indefinitely. |
| 230 |
| 231 If |read_timeout| is not None will configure underlying socket to |
| 232 raise TimeoutError exception whenever there's no response from the server |
| 233 for more than |read_timeout| seconds. It can happen during any read |
| 234 operation so once you pass non-None |read_timeout| be prepared to handle |
| 235 these exceptions in subsequent reads from the stream. |
| 236 """ |
| 237 authenticated = False |
| 238 last_error = None |
| 239 attempt = 0 |
| 240 start = self._now() |
| 241 for attempt in itertools.count(): |
| 242 if max_attempts and attempt >= max_attempts: |
| 243 # Too many attempts. |
| 244 break |
| 245 if timeout and (self._now() - start) >= timeout: |
| 246 # Retried for too long. |
| 247 break |
| 248 extra = {COUNT_KEY: attempt} if attempt else {} |
| 249 request = make_request(extra) |
| 250 try: |
| 251 url_response = self._url_open(request, timeout=read_timeout) |
| 252 logging.debug('url_open(%s) succeeded', request.get_full_url()) |
| 253 # Some tests mock url_open to return StringIO without 'headers'. |
| 254 return HttpResponse(url_response, request.get_full_url(), |
| 255 getattr(url_response, 'headers', {})) |
| 256 except urllib2.HTTPError as e: |
| 257 # Unauthorized. Ask to authenticate and then try again. |
| 258 if e.code in (401, 403): |
| 259 # Try to authenticate only once. If it doesn't help, then server does |
| 260 # not support app engine authentication. |
| 261 logging.error( |
| 262 'Authentication is required for %s on attempt %d.\n%s', |
| 263 request.get_full_url(), attempt, |
| 264 self._format_exception(e, verbose=True)) |
| 265 if not authenticated and self.authenticate(): |
| 266 authenticated = True |
| 267 # Do not sleep. |
| 268 continue |
| 269 # If authentication failed, return. |
| 270 logging.error( |
| 271 'Unable to authenticate to %s.\n%s', |
| 272 request.get_full_url(), self._format_exception(e, verbose=True)) |
| 273 return None |
| 274 |
| 275 if ((e.code < 500 and not (retry_404 and e.code == 404)) or |
| 276 (e.code >= 500 and not retry_50x)): |
| 277 # This HTTPError means we reached the server and there was a problem |
| 278 # with the request, so don't retry. |
| 279 logging.error( |
| 280 'Able to connect to %s but an exception was thrown.\n%s', |
| 281 request.get_full_url(), self._format_exception(e, verbose=True)) |
| 282 return None |
| 283 |
| 284 # The HTTPError was due to a server error, so retry the attempt. |
| 285 logging.warning('Able to connect to %s on attempt %d.\n%s', |
| 286 request.get_full_url(), attempt, |
| 287 self._format_exception(e)) |
| 288 last_error = e |
| 289 |
| 290 except (urllib2.URLError, httplib.HTTPException, |
| 291 socket.timeout, ssl.SSLError) as e: |
| 292 logging.warning('Unable to open url %s on attempt %d.\n%s', |
| 293 request.get_full_url(), attempt, |
| 294 self._format_exception(e)) |
| 295 last_error = e |
| 296 |
| 297 # Only sleep if we are going to try again. |
| 298 if max_attempts and attempt != max_attempts: |
| 299 remaining = None |
| 300 if timeout: |
| 301 remaining = timeout - (self._now() - start) |
| 302 if remaining <= 0: |
| 303 break |
| 304 self.sleep_before_retry(attempt, remaining) |
| 305 |
| 306 logging.error('Unable to open given url, %s, after %d attempts.\n%s', |
| 307 request.get_full_url(), max_attempts, |
| 308 self._format_exception(last_error, verbose=True)) |
| 309 return None |
| 310 |
| 311 def _url_open(self, request, timeout=None): |
| 312 """Low level method to execute urllib2.Request's. |
| 313 |
| 314 To be mocked in tests. |
| 315 """ |
| 316 if timeout is not None: |
| 317 return self.opener.open(request, timeout=timeout) |
| 318 else: |
| 319 # Leave original default value for |timeout|. It's nontrivial. |
| 320 return self.opener.open(request) |
| 321 |
| 322 @staticmethod |
| 323 def _now(): |
| 324 """To be mocked in tests.""" |
| 325 return time.time() |
| 326 |
| 327 @staticmethod |
| 328 def calculate_sleep_before_retry(attempt, max_duration): |
| 329 # Maximum sleeping time. We're hammering a cloud-distributed service, it'll |
| 330 # survive. |
| 331 MAX_SLEEP = 10. |
| 332 # random.random() returns [0.0, 1.0). Starts with relatively short waiting |
| 333 # time by starting with 1.5/2+1.5^-1 median offset. |
| 334 duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1)) |
| 335 assert duration > 0.1 |
| 336 duration = min(MAX_SLEEP, duration) |
| 337 if max_duration: |
| 338 duration = min(max_duration, duration) |
| 339 return duration |
| 340 |
| 341 @classmethod |
| 342 def sleep_before_retry(cls, attempt, max_duration): |
| 343 """Sleeps for some amount of time when retrying the request. |
| 344 |
| 345 To be mocked in tests. |
| 346 """ |
| 347 time.sleep(cls.calculate_sleep_before_retry(attempt, max_duration)) |
| 348 |
| 349 @staticmethod |
| 350 def _format_exception(exc, verbose=False): |
| 351 """Given an instance of some exception raised by urlopen returns human |
| 352 readable piece of text with detailed information about the error. |
| 353 """ |
| 354 out = ['Exception: %s' % (exc,)] |
| 355 if verbose: |
| 356 if isinstance(exc, urllib2.HTTPError): |
| 357 out.append('-' * 10) |
| 358 if exc.hdrs: |
| 359 for header, value in exc.hdrs.items(): |
| 360 if not header.startswith('x-'): |
| 361 out.append('%s: %s' % (header.capitalize(), value)) |
| 362 out.append('') |
| 363 out.append(exc.read() or '<empty body>') |
| 364 out.append('-' * 10) |
| 365 return '\n'.join(out) |
| 366 |
| 367 |
| 368 class HttpResponse(object): |
| 369 """Response from HttpService.""" |
| 370 |
| 371 def __init__(self, stream, url, headers): |
| 372 self._stream = stream |
| 373 self._url = url |
| 374 self._headers = headers |
| 375 self._read = 0 |
| 376 |
| 377 @property |
| 378 def content_length(self): |
| 379 """Total length to the response or None if not known in advance.""" |
| 380 length = self._headers.get('Content-Length') |
| 381 return int(length) if length is not None else None |
| 382 |
| 383 def read(self, size=None): |
| 384 """Reads up to |size| bytes from the stream and returns them. |
| 385 |
| 386 If |size| is None reads all available bytes. |
| 387 |
| 388 Raises TimeoutError on read timeout. |
| 389 """ |
| 390 try: |
| 391 # cStringIO has a bug: stream.read(None) is not the same as stream.read(). |
| 392 data = self._stream.read() if size is None else self._stream.read(size) |
| 393 self._read += len(data) |
| 394 return data |
| 395 except (socket.timeout, ssl.SSLError) as e: |
| 396 logging.error('Timeout while reading from %s, read %d of %s: %s', |
| 397 self._url, self._read, self.content_length, e) |
| 398 raise TimeoutError(e) |
| 399 |
| 400 @classmethod |
| 401 def get_fake_response(cls, content, url): |
| 402 """Returns HttpResponse with predefined content, useful in tests.""" |
| 403 return cls(StringIO.StringIO(content), |
| 404 url, {'content-length': len(content)}) |
| 405 |
| 406 |
| 407 |
| 408 class AppEngineService(HttpService): |
| 409 """This class implements authentication support for |
| 410 an app engine based services. |
| 411 """ |
| 412 |
| 413 # This lock ensures that user won't be confused with multiple concurrent |
| 414 # login prompts. |
| 415 _auth_lock = threading.Lock() |
| 416 |
| 417 def __init__(self, urlhost, email=None, password=None): |
| 418 super(AppEngineService, self).__init__(urlhost) |
| 419 self.email = email |
| 420 self.password = password |
| 421 self._keyring = None |
| 422 |
| 423 def authenticate(self): |
| 424 """Authenticates in the app engine application. |
| 425 Returns True on success. |
| 426 """ |
| 427 if not upload: |
| 428 logging.error('\'upload\' module is missing, ' |
| 429 'app engine authentication is disabled.') |
| 430 return False |
| 431 cookie_jar = self.cookie_jar |
| 432 save_cookie_jar = self.save_cookie_jar |
| 433 # RPC server that uses AuthenticationSupport's cookie jar. |
| 434 class AuthServer(upload.AbstractRpcServer): |
| 435 def _GetOpener(self): |
| 436 # Authentication code needs to know about 302 response. |
| 437 # So make OpenerDirector without HTTPRedirectHandler. |
| 438 opener = urllib2.OpenerDirector() |
| 439 opener.add_handler(urllib2.ProxyHandler()) |
| 440 opener.add_handler(urllib2.UnknownHandler()) |
| 441 opener.add_handler(urllib2.HTTPHandler()) |
| 442 opener.add_handler(urllib2.HTTPDefaultErrorHandler()) |
| 443 opener.add_handler(urllib2.HTTPSHandler()) |
| 444 opener.add_handler(urllib2.HTTPErrorProcessor()) |
| 445 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar)) |
| 446 return opener |
| 447 def PerformAuthentication(self): |
| 448 self._Authenticate() |
| 449 save_cookie_jar() |
| 450 return self.authenticated |
| 451 with AppEngineService._auth_lock: |
| 452 rpc_server = AuthServer(self.urlhost, self.get_credentials) |
| 453 return rpc_server.PerformAuthentication() |
| 454 |
| 455 def get_credentials(self): |
| 456 """Called during authentication process to get the credentials. |
| 457 May be called mutliple times if authentication fails. |
| 458 Returns tuple (email, password). |
| 459 """ |
| 460 # 'authenticate' calls this only if 'upload' is present. |
| 461 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing. |
| 462 assert upload, '\'upload\' module is required for this to work' |
| 463 if self.email and self.password: |
| 464 return (self.email, self.password) |
| 465 if not self._keyring: |
| 466 self._keyring = upload.KeyringCreds(self.urlhost, |
| 467 self.urlhost, |
| 468 self.email) |
| 469 return self._keyring.GetUserCredentials() |
| 470 |
| 471 |
| 472 class ThreadSafeCookieJar(cookielib.MozillaCookieJar): |
| 473 """MozillaCookieJar with thread safe load and save.""" |
| 474 |
| 475 def load(self, filename=None, ignore_discard=False, ignore_expires=False): |
| 476 """Loads cookies from the file if it exists.""" |
| 477 filename = os.path.expanduser(filename or self.filename) |
| 478 with self._cookies_lock: |
| 479 if os.path.exists(filename): |
| 480 try: |
| 481 cookielib.MozillaCookieJar.load(self, filename, |
| 482 ignore_discard, |
| 483 ignore_expires) |
| 484 logging.debug('Loaded cookies from %s', filename) |
| 485 except (cookielib.LoadError, IOError): |
| 486 pass |
| 487 else: |
| 488 try: |
| 489 fd = os.open(filename, os.O_CREAT, 0600) |
| 490 os.close(fd) |
| 491 except OSError: |
| 492 logging.error('Failed to create %s', filename) |
| 493 try: |
| 494 os.chmod(filename, 0600) |
| 495 except OSError: |
| 496 logging.error('Failed to fix mode for %s', filename) |
| 497 |
| 498 def save(self, filename=None, ignore_discard=False, ignore_expires=False): |
| 499 """Saves cookies to the file, completely overwriting it.""" |
| 500 logging.debug('Saving cookies to %s', filename or self.filename) |
| 501 with self._cookies_lock: |
| 502 try: |
| 503 cookielib.MozillaCookieJar.save(self, filename, |
| 504 ignore_discard, |
| 505 ignore_expires) |
| 506 except OSError: |
| 507 logging.error('Failed to save %s', filename) |
| OLD | NEW |