| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Archives a set of files to a server.""" | 6 """Archives a set of files to a server.""" |
| 7 | 7 |
| 8 import binascii | 8 import binascii |
| 9 import cStringIO | 9 import cStringIO |
| 10 import hashlib | 10 import hashlib |
| 11 import itertools | 11 import itertools |
| 12 import logging | 12 import logging |
| 13 import optparse | 13 import optparse |
| 14 import os | 14 import os |
| 15 import sys | 15 import sys |
| 16 import time | 16 import time |
| 17 import urllib | 17 import urllib |
| 18 import zlib | 18 import zlib |
| 19 | 19 |
| 20 import run_isolated | 20 import run_isolated |
| 21 | 21 |
| 22 from utils import net |
| 22 from utils import threading_utils | 23 from utils import threading_utils |
| 23 from utils import tools | 24 from utils import tools |
| 24 | 25 |
| 25 | 26 |
| 26 # The minimum size of files to upload directly to the blobstore. | 27 # The minimum size of files to upload directly to the blobstore. |
| 27 MIN_SIZE_FOR_DIRECT_BLOBSTORE = 20 * 1024 | 28 MIN_SIZE_FOR_DIRECT_BLOBSTORE = 20 * 1024 |
| 28 | 29 |
| 29 # The number of files to check the isolate server per /contains query. | 30 # The number of files to check the isolate server per /contains query. |
| 30 # All files are sorted by likelihood of a change in the file content | 31 # All files are sorted by likelihood of a change in the file content |
| 31 # (currently file size is used to estimate this: larger the file -> larger the | 32 # (currently file size is used to estimate this: larger the file -> larger the |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 111 while True: | 112 while True: |
| 112 # Read in 1mb chunks. | 113 # Read in 1mb chunks. |
| 113 chunk = f.read(1024*1024) | 114 chunk = f.read(1024*1024) |
| 114 if not chunk: | 115 if not chunk: |
| 115 break | 116 break |
| 116 digest.update(chunk) | 117 digest.update(chunk) |
| 117 return digest.hexdigest() | 118 return digest.hexdigest() |
| 118 | 119 |
| 119 | 120 |
| 120 def url_read(url, **kwargs): | 121 def url_read(url, **kwargs): |
| 121 result = run_isolated.url_read(url, **kwargs) | 122 result = net.url_read(url, **kwargs) |
| 122 if result is None: | 123 if result is None: |
| 123 # If we get no response from the server, assume it is down and raise an | 124 # If we get no response from the server, assume it is down and raise an |
| 124 # exception. | 125 # exception. |
| 125 raise run_isolated.MappingError('Unable to connect to server %s' % url) | 126 raise run_isolated.MappingError('Unable to connect to server %s' % url) |
| 126 return result | 127 return result |
| 127 | 128 |
| 128 | 129 |
| 129 def upload_hash_content_to_blobstore( | 130 def upload_hash_content_to_blobstore( |
| 130 generate_upload_url, data, hash_key, content): | 131 generate_upload_url, data, hash_key, content): |
| 131 """Uploads the given hash contents directly to the blobstore via a generated | 132 """Uploads the given hash contents directly to the blobstore via a generated |
| 132 url. | 133 url. |
| 133 | 134 |
| 134 Arguments: | 135 Arguments: |
| 135 generate_upload_url: The url to get the new upload url from. | 136 generate_upload_url: The url to get the new upload url from. |
| 136 data: extra POST data. | 137 data: extra POST data. |
| 137 hash_key: sha1 of the uncompressed version of content. | 138 hash_key: sha1 of the uncompressed version of content. |
| 138 content: The contents to upload. Must fit in memory for now. | 139 content: The contents to upload. Must fit in memory for now. |
| 139 """ | 140 """ |
| 140 logging.debug('Generating url to directly upload file to blobstore') | 141 logging.debug('Generating url to directly upload file to blobstore') |
| 141 assert isinstance(hash_key, str), hash_key | 142 assert isinstance(hash_key, str), hash_key |
| 142 assert isinstance(content, str), (hash_key, content) | 143 assert isinstance(content, str), (hash_key, content) |
| 143 # TODO(maruel): Support large files. This would require streaming support. | 144 # TODO(maruel): Support large files. This would require streaming support. |
| 144 content_type, body = encode_multipart_formdata( | 145 content_type, body = encode_multipart_formdata( |
| 145 data, [('content', hash_key, content)]) | 146 data, [('content', hash_key, content)]) |
| 146 for attempt in xrange(run_isolated.URL_OPEN_MAX_ATTEMPTS): | 147 for attempt in xrange(net.URL_OPEN_MAX_ATTEMPTS): |
| 147 # Retry HTTP 50x here. | 148 # Retry HTTP 50x here. |
| 148 upload_url = run_isolated.url_read(generate_upload_url, data=data) | 149 upload_url = net.url_read(generate_upload_url, data=data) |
| 149 if not upload_url: | 150 if not upload_url: |
| 150 raise run_isolated.MappingError( | 151 raise run_isolated.MappingError( |
| 151 'Unable to connect to server %s' % generate_upload_url) | 152 'Unable to connect to server %s' % generate_upload_url) |
| 152 | 153 |
| 153 # Do not retry this request on HTTP 50x. Regenerate an upload url each time | 154 # Do not retry this request on HTTP 50x. Regenerate an upload url each time |
| 154 # since uploading "consumes" the upload url. | 155 # since uploading "consumes" the upload url. |
| 155 result = run_isolated.url_read( | 156 result = net.url_read( |
| 156 upload_url, data=body, content_type=content_type, retry_50x=False) | 157 upload_url, data=body, content_type=content_type, retry_50x=False) |
| 157 if result is not None: | 158 if result is not None: |
| 158 return result | 159 return result |
| 159 if attempt != run_isolated.URL_OPEN_MAX_ATTEMPTS - 1: | 160 if attempt != net.URL_OPEN_MAX_ATTEMPTS - 1: |
| 160 run_isolated.HttpService.sleep_before_retry(attempt, None) | 161 net.HttpService.sleep_before_retry(attempt, None) |
| 161 raise run_isolated.MappingError( | 162 raise run_isolated.MappingError( |
| 162 'Unable to connect to server %s' % generate_upload_url) | 163 'Unable to connect to server %s' % generate_upload_url) |
| 163 | 164 |
| 164 | 165 |
| 165 class UploadRemote(run_isolated.Remote): | 166 class UploadRemote(run_isolated.Remote): |
| 166 def __init__(self, namespace, base_url, token): | 167 def __init__(self, namespace, base_url, token): |
| 167 self.namespace = str(namespace) | 168 self.namespace = str(namespace) |
| 168 self._token = token | 169 self._token = token |
| 169 super(UploadRemote, self).__init__(base_url) | 170 super(UploadRemote, self).__init__(base_url) |
| 170 | 171 |
| (...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 400 with tools.Profiler('Archive'): | 401 with tools.Profiler('Archive'): |
| 401 return upload_sha1_tree( | 402 return upload_sha1_tree( |
| 402 base_url=options.remote, | 403 base_url=options.remote, |
| 403 indir=os.getcwd(), | 404 indir=os.getcwd(), |
| 404 infiles=infiles, | 405 infiles=infiles, |
| 405 namespace=options.namespace) | 406 namespace=options.namespace) |
| 406 | 407 |
| 407 | 408 |
| 408 if __name__ == '__main__': | 409 if __name__ == '__main__': |
| 409 sys.exit(main(sys.argv[1:])) | 410 sys.exit(main(sys.argv[1:])) |
| OLD | NEW |