| Index: gstools.py
|
| diff --git a/gstools.py b/gstools.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..40d641596492e43b92835e9ab7974ad6b32d971c
|
| --- /dev/null
|
| +++ b/gstools.py
|
| @@ -0,0 +1,114 @@
|
| +# Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +"""Utility functions common to the Google storage scripts."""
|
| +
|
| +import hashlib
|
| +import os
|
| +import re
|
| +import sys
|
| +
|
| +import subprocess2
|
| +
|
| +
|
| +class Gsutil(object):
|
| + """Call gsutil with some predefined settings."""
|
| + def __init__(self, path, boto_path=None, timeout=None):
|
| + if not os.path.exists(path):
|
| + raise OSError('GSUtil not found in %s' % path)
|
| + self.path = path
|
| + self.timeout = timeout
|
| + self.boto_path = boto_path
|
| +
|
| + def call(self, *args):
|
| + env = os.environ.copy()
|
| + if self.boto_path is not None:
|
| + env['AWS_CREDENTIAL_FILE'] = self.boto_path
|
| + return subprocess2.call((sys.executable, self.path) + args,
|
| + env=env,
|
| + timeout=self.timeout)
|
| +
|
| + def check_call(self, *args):
|
| + env = os.environ.copy()
|
| + if self.boto_path is not None:
|
| + env['AWS_CREDENTIAL_FILE'] = self.boto_path
|
| + ((out, err), code) = subprocess2.communicate(
|
| + (sys.executable, self.path) + args,
|
| + stdout=subprocess2.PIPE,
|
| + stderr=subprocess2.PIPE,
|
| + env=env,
|
| + timeout=self.timeout)
|
| +
|
| + # Parse output.
|
| + status_code_match = re.search('status=([0-9]+)', err)
|
| + if status_code_match:
|
| + return int(status_code_match.groups(1))
|
| + elif ('You are attempting to access protected data with '
|
| + 'no configured credentials.' in err):
|
| + return (403, out, err)
|
| + elif 'No such object' in err:
|
| + return (404, out, err)
|
| + else:
|
| + return (code, out, err)
|
| +
|
| + def clone(self):
|
| + return Gsutil(self.path, self.boto_path, self.timeout)
|
| +
|
| +
|
| +def CheckBucketPermissions(bucket, gsutil):
|
| + if not bucket:
|
| + print >> sys.stderr, 'Missing bucket %s.'
|
| + return (None, 1)
|
| + base_url = 'gs://%s' % bucket
|
| +
|
| + # Check if we have permissions to the Google Storage bucket.
|
| + code, _, ls_err = gsutil.check_call('ls', base_url)
|
| + if code == 403:
|
| + code, _, _ = gsutil.call('config')
|
| + if code != 0:
|
| + print >> sys.stderr, 'Error while authenticating to %s.' % base_url
|
| + elif code == 404:
|
| + print >> sys.stderr, '%s not found.' % base_url
|
| + elif code != 0:
|
| + print >> sys.stderr, ls_err
|
| + return (base_url, code)
|
| +
|
| +
|
| +def GetSHA1(filename):
|
| + sha1 = hashlib.sha1()
|
| + with open(filename, 'rb') as f:
|
| + while True:
|
| + # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
|
| + chunk = f.read(1024*1024)
|
| + if not chunk:
|
| + break
|
| + sha1.update(chunk)
|
| + return sha1.hexdigest()
|
| +
|
| +
|
| +def GetMD5(filename, lock):
|
| + md5_calculator = hashlib.md5()
|
| + with lock:
|
| + with open(filename, 'rb') as f:
|
| + while True:
|
| + chunk = f.read(1024*1024)
|
| + if not chunk:
|
| + break
|
| + md5_calculator.update(chunk)
|
| + return md5_calculator.hexdigest()
|
| +
|
| +
|
| +def GetMD5Cached(filename, lock):
|
| + """Don't calculate the MD5 if we can find a .md5 file."""
|
| + # See if we can find an existing MD5 sum stored in a file.
|
| + if os.path.exists('%s.md5' % filename):
|
| + with open('%s.md5' % filename) as f:
|
| + md5_match = re.search('([a-z0-9]{32})', f.read())
|
| + if md5_match:
|
| + return md5_match.group(1)
|
| + else:
|
| + md5_hash = GetMD5(filename, lock)
|
| + with open('%s.md5' % filename, 'w') as f:
|
| + f.write(md5_hash)
|
| + return md5_hash
|
|
|