OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python |
| 2 # Copyright (c) 2012 The Native Client Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """A library to assist automatically downloading files. |
| 7 |
| 8 This library is used by scripts that download tarballs, zipfiles, etc. as part |
| 9 of the build process. |
| 10 """ |
| 11 |
| 12 import hashlib |
| 13 import http_download |
| 14 import os.path |
| 15 import re |
| 16 import shutil |
| 17 import sys |
| 18 import time |
| 19 import urllib2 |
| 20 |
| 21 SOURCE_STAMP = 'SOURCE_URL' |
| 22 HASH_STAMP = 'SOURCE_SHA1' |
| 23 |
| 24 |
| 25 # Designed to handle more general inputs than sys.platform because the platform |
| 26 # name may come from the command line. |
| 27 PLATFORM_COLLAPSE = { |
| 28 'windows': 'windows', |
| 29 'win32': 'windows', |
| 30 'cygwin': 'windows', |
| 31 'linux': 'linux', |
| 32 'linux2': 'linux', |
| 33 'linux3': 'linux', |
| 34 'darwin': 'mac', |
| 35 'mac': 'mac', |
| 36 } |
| 37 |
| 38 ARCH_COLLAPSE = { |
| 39 'i386' : 'x86', |
| 40 'i686' : 'x86', |
| 41 'x86_64': 'x86', |
| 42 'armv7l': 'arm', |
| 43 } |
| 44 |
| 45 |
| 46 class HashError(Exception): |
| 47 def __init__(self, download_url, expected_hash, actual_hash): |
| 48 self.download_url = download_url |
| 49 self.expected_hash = expected_hash |
| 50 self.actual_hash = actual_hash |
| 51 |
| 52 def __str__(self): |
| 53 return 'Got hash "%s" but expected hash "%s" for "%s"' % ( |
| 54 self.actual_hash, self.expected_hash, self.download_url) |
| 55 |
| 56 |
| 57 def PlatformName(name=None): |
| 58 if name is None: |
| 59 name = sys.platform |
| 60 return PLATFORM_COLLAPSE[name] |
| 61 |
| 62 def ArchName(name=None): |
| 63 if name is None: |
| 64 if PlatformName() == 'windows': |
| 65 # TODO(pdox): Figure out how to auto-detect 32-bit vs 64-bit Windows. |
| 66 name = 'i386' |
| 67 else: |
| 68 import platform |
| 69 name = platform.machine() |
| 70 return ARCH_COLLAPSE[name] |
| 71 |
| 72 def EnsureFileCanBeWritten(filename): |
| 73 directory = os.path.dirname(filename) |
| 74 if not os.path.exists(directory): |
| 75 os.makedirs(directory) |
| 76 |
| 77 |
| 78 def WriteData(filename, data): |
| 79 EnsureFileCanBeWritten(filename) |
| 80 f = open(filename, 'wb') |
| 81 f.write(data) |
| 82 f.close() |
| 83 |
| 84 |
| 85 def WriteDataFromStream(filename, stream, chunk_size, verbose=True): |
| 86 EnsureFileCanBeWritten(filename) |
| 87 dst = open(filename, 'wb') |
| 88 try: |
| 89 while True: |
| 90 data = stream.read(chunk_size) |
| 91 if len(data) == 0: |
| 92 break |
| 93 dst.write(data) |
| 94 if verbose: |
| 95 # Indicate that we're still writing. |
| 96 sys.stdout.write('.') |
| 97 sys.stdout.flush() |
| 98 finally: |
| 99 if verbose: |
| 100 sys.stdout.write('\n') |
| 101 dst.close() |
| 102 |
| 103 |
| 104 def DoesStampMatch(stampfile, expected, index): |
| 105 try: |
| 106 f = open(stampfile, 'r') |
| 107 stamp = f.read() |
| 108 f.close() |
| 109 if stamp.split('\n')[index] == expected: |
| 110 return "already up-to-date." |
| 111 elif stamp.startswith('manual'): |
| 112 return "manual override." |
| 113 return False |
| 114 except IOError: |
| 115 return False |
| 116 |
| 117 |
| 118 def WriteStamp(stampfile, data): |
| 119 EnsureFileCanBeWritten(stampfile) |
| 120 f = open(stampfile, 'w') |
| 121 f.write(data) |
| 122 f.close() |
| 123 |
| 124 |
| 125 def StampIsCurrent(path, stamp_name, stamp_contents, min_time=None, index=0): |
| 126 stampfile = os.path.join(path, stamp_name) |
| 127 |
| 128 # Check if the stampfile is older than the minimum last mod time |
| 129 if min_time: |
| 130 try: |
| 131 stamp_time = os.stat(stampfile).st_mtime |
| 132 if stamp_time <= min_time: |
| 133 return False |
| 134 except OSError: |
| 135 return False |
| 136 |
| 137 return DoesStampMatch(stampfile, stamp_contents, index) |
| 138 |
| 139 |
| 140 def WriteSourceStamp(path, url): |
| 141 stampfile = os.path.join(path, SOURCE_STAMP) |
| 142 WriteStamp(stampfile, url) |
| 143 |
| 144 def WriteHashStamp(path, hash_val): |
| 145 hash_stampfile = os.path.join(path, HASH_STAMP) |
| 146 WriteStamp(hash_stampfile, hash_val) |
| 147 |
| 148 |
| 149 def Retry(op, *args): |
| 150 # Windows seems to be prone to having commands that delete files or |
| 151 # directories fail. We currently do not have a complete understanding why, |
| 152 # and as a workaround we simply retry the command a few times. |
| 153 # It appears that file locks are hanging around longer than they should. This |
| 154 # may be a secondary effect of processes hanging around longer than they |
| 155 # should. This may be because when we kill a browser sel_ldr does not exit |
| 156 # immediately, etc. |
| 157 # Virus checkers can also accidently prevent files from being deleted, but |
| 158 # that shouldn't be a problem on the bots. |
| 159 if sys.platform in ('win32', 'cygwin'): |
| 160 count = 0 |
| 161 while True: |
| 162 try: |
| 163 op(*args) |
| 164 break |
| 165 except Exception: |
| 166 sys.stdout.write("FAILED: %s %s\n" % (op.__name__, repr(args))) |
| 167 count += 1 |
| 168 if count < 5: |
| 169 sys.stdout.write("RETRY: %s %s\n" % (op.__name__, repr(args))) |
| 170 time.sleep(pow(2, count)) |
| 171 else: |
| 172 # Don't mask the exception. |
| 173 raise |
| 174 else: |
| 175 op(*args) |
| 176 |
| 177 |
| 178 def MoveDirCleanly(src, dst): |
| 179 RemoveDir(dst) |
| 180 MoveDir(src, dst) |
| 181 |
| 182 |
| 183 def MoveDir(src, dst): |
| 184 Retry(shutil.move, src, dst) |
| 185 |
| 186 |
| 187 def RemoveDir(path): |
| 188 if os.path.exists(path): |
| 189 Retry(shutil.rmtree, path) |
| 190 |
| 191 |
| 192 def RemoveFile(path): |
| 193 if os.path.exists(path): |
| 194 Retry(os.unlink, path) |
| 195 |
| 196 |
| 197 def _HashFileHandle(fh): |
| 198 """sha1 of a file like object. |
| 199 |
| 200 Arguments: |
| 201 fh: file handle like object to hash. |
| 202 Returns: |
| 203 sha1 as a string. |
| 204 """ |
| 205 hasher = hashlib.sha1() |
| 206 try: |
| 207 while True: |
| 208 data = fh.read(4096) |
| 209 if not data: |
| 210 break |
| 211 hasher.update(data) |
| 212 finally: |
| 213 fh.close() |
| 214 return hasher.hexdigest() |
| 215 |
| 216 |
| 217 def HashFile(filename): |
| 218 """sha1 a file on disk. |
| 219 |
| 220 Arguments: |
| 221 filename: filename to hash. |
| 222 Returns: |
| 223 sha1 as a string. |
| 224 """ |
| 225 fh = open(filename, 'rb') |
| 226 return _HashFileHandle(fh) |
| 227 |
| 228 |
| 229 def HashUrlByDownloading(url): |
| 230 """sha1 the data at an url. |
| 231 |
| 232 Arguments: |
| 233 url: url to download from. |
| 234 Returns: |
| 235 sha1 of the data at the url. |
| 236 """ |
| 237 try: |
| 238 fh = urllib2.urlopen(url) |
| 239 except: |
| 240 sys.stderr.write("Failed fetching URL: %s\n" % url) |
| 241 raise |
| 242 return _HashFileHandle(fh) |
| 243 |
| 244 |
| 245 # Attempts to get the SHA1 hash of a file given a URL by looking for |
| 246 # an adjacent file with a ".sha1hash" suffix. This saves having to |
| 247 # download a large tarball just to get its hash. Otherwise, we fall |
| 248 # back to downloading the main file. |
| 249 def HashUrl(url): |
| 250 hash_url = '%s.sha1hash' % url |
| 251 try: |
| 252 fh = urllib2.urlopen(hash_url) |
| 253 data = fh.read(100) |
| 254 fh.close() |
| 255 except urllib2.HTTPError, exn: |
| 256 if exn.code == 404: |
| 257 return HashUrlByDownloading(url) |
| 258 raise |
| 259 else: |
| 260 if not re.match('[0-9a-f]{40}\n?$', data): |
| 261 raise AssertionError('Bad SHA1 hash file: %r' % data) |
| 262 return data.strip() |
| 263 |
| 264 |
| 265 def SyncURL(url, filename=None, stamp_dir=None, min_time=None, |
| 266 hash_val=None, keep=False, verbose=False, stamp_index=0): |
| 267 """Synchronize a destination file with a URL |
| 268 |
| 269 if the URL does not match the URL stamp, then we must re-download it. |
| 270 |
| 271 Arugments: |
| 272 url: the url which will to compare against and download |
| 273 filename: the file to create on download |
| 274 path: the download path |
| 275 stamp_dir: the filename containing the URL stamp to check against |
| 276 hash_val: if set, the expected hash which must be matched |
| 277 verbose: prints out status as it runs |
| 278 stamp_index: index within the stamp file to check. |
| 279 Returns: |
| 280 True if the file is replaced |
| 281 False if the file is not replaced |
| 282 Exception: |
| 283 HashError: if the hash does not match |
| 284 """ |
| 285 |
| 286 assert url and filename |
| 287 |
| 288 # If we are not keeping the tarball, or we already have it, we can |
| 289 # skip downloading it for this reason. If we are keeping it, |
| 290 # it must exist. |
| 291 if keep: |
| 292 tarball_ok = os.path.isfile(filename) |
| 293 else: |
| 294 tarball_ok = True |
| 295 |
| 296 # If we don't need the tarball and the stamp_file matches the url, then |
| 297 # we must be up to date. If the URL differs but the recorded hash matches |
| 298 # the one we'll insist the tarball has, then that's good enough too. |
| 299 # TODO(mcgrathr): Download the .sha1sum file first to compare with |
| 300 # the cached hash, in case --file-hash options weren't used. |
| 301 if tarball_ok and stamp_dir is not None: |
| 302 if StampIsCurrent(stamp_dir, SOURCE_STAMP, url, min_time): |
| 303 if verbose: |
| 304 print '%s is already up to date.' % filename |
| 305 return False |
| 306 if (hash_val is not None and |
| 307 StampIsCurrent(stamp_dir, HASH_STAMP, hash_val, min_time, stamp_index)): |
| 308 if verbose: |
| 309 print '%s is identical to the up to date file.' % filename |
| 310 return False |
| 311 |
| 312 if verbose: |
| 313 print 'Updating %s\n\tfrom %s.' % (filename, url) |
| 314 EnsureFileCanBeWritten(filename) |
| 315 http_download.HttpDownload(url, filename) |
| 316 |
| 317 if hash_val: |
| 318 tar_hash = HashFile(filename) |
| 319 if hash_val != tar_hash: |
| 320 raise HashError(actual_hash=tar_hash, expected_hash=hash_val, |
| 321 download_url=url) |
| 322 |
| 323 return True |
OLD | NEW |