Index: chrome/common/extensions/docs/server2/new_github_file_system.py |
diff --git a/chrome/common/extensions/docs/server2/new_github_file_system.py b/chrome/common/extensions/docs/server2/new_github_file_system.py |
index 20619f1ff398d6cc254eb1f9799f317d0fd55c67..0fe023507c120ed1797c0e3225c80e2ce31ab3f4 100644 |
--- a/chrome/common/extensions/docs/server2/new_github_file_system.py |
+++ b/chrome/common/extensions/docs/server2/new_github_file_system.py |
@@ -123,91 +123,113 @@ class GithubFileSystem(FileSystem): |
def __init__(self, base_url, owner, repo, object_store_creator, Fetcher): |
self._repo_key = '%s/%s' % (owner, repo) |
self._repo_url = '%s/%s/%s' % (base_url, owner, repo) |
- |
- self._blobstore = blobstore.AppEngineBlobstore() |
- # Lookup the chrome github api credentials. |
self._username, self._password = _LoadCredentials(object_store_creator) |
+ self._blobstore = blobstore.AppEngineBlobstore() |
self._fetcher = Fetcher(self._repo_url) |
- |
- # start_empty=False here to maintain the most recent stat across cron runs. |
- # Refresh() will always re-stat and use that to decide whether to download |
- # the zipball. |
- self._stat_cache = object_store_creator.Create( |
- GithubFileSystem, category='stat-cache', start_empty=False) |
- |
- # A Future to the github zip file. Normally this Future will resolve itself |
- # by querying blobstore for the blob; however, Refresh() may decide to |
- # override this with a new blob if it's out of date. |
- def resolve_from_blobstore(): |
- blob = self._blobstore.Get(self._repo_url, _GITHUB_REPOS_NAMESPACE) |
- return _GithubZipFile.Create(self._repo_key, blob) if blob else None |
- self._repo_zip = Future(delegate=Gettable(resolve_from_blobstore)) |
- |
- def _GetCachedVersion(self): |
- '''Returns the currently cached version of the repository. The version is a |
- 'sha' hash value. |
+ self._transient_stat_cache = object_store_creator.Create( |
+ GithubFileSystem, category='t-stat-cache') |
+ self._persistent_stat_cache = object_store_creator.Create( |
+ GithubFileSystem, category='p-stat-cache', start_empty=False) |
+ |
+ # Created lazily in |_EnsureRepoZip|. |
+ self._repo_zip = None |
+ |
+ def _EnsureRepoZip(self): |
+ '''Initializes |self._repo_zip| if hasn't already been (i.e. if |
Yoyo Zhou
2013/11/22 22:52:02
nit: if it
not at google - send to devlin
2013/11/22 23:01:58
Done.
|
+ _EnsureRepoZip has never been called before). In that case |self._repo_zip| |
+ will be set to a Future of _GithubZipFile and the fetch process started, |
+ whether that be from a blobstore or if necessary all the way from GitHub. |
''' |
- return self._stat_cache.Get(self._repo_key).Get() |
+ if self._repo_zip is not None: |
+ return |
- def _SetCachedVersion(self, version): |
- '''Sets the currently cached version of the repository. The version is a |
- 'sha' hash value. |
- ''' |
- self._stat_cache.Set(self._repo_key, version) |
+ repo_key, repo_url, username, password = ( |
+ self._repo_key, self._repo_url, self._username, self._password) |
- def _FetchLiveVersion(self): |
+ def fetch_from_blobstore(): |
+ '''Returns a Future which resolves to the _GithubZipFile for this repo |
+ fetched from blobstore. |
+ ''' |
+ blob = self._blobstore.Get(repo_url, _GITHUB_REPOS_NAMESPACE) |
+ if blob is None: |
+ return FileSystemError.RaiseInFuture( |
+ 'No blob for %s found in datastore' % repo_key) |
+ |
+ repo_zip = _GithubZipFile.Create(repo_key, blob) |
+ if repo_zip is None: |
+ return FileSystemError.RaiseInFuture( |
+ 'Blob for %s was corrupted in blobstore!?' % repo_key) |
+ |
+ return Future(value=repo_zip) |
+ |
+ def fetch_from_github(version): |
+ '''Returns a Future which resolves to the _GithubZipFile for this repo |
+ fetched new from GitHub, then writes it to blobstore and |version| to the |
+ stat caches. |
+ ''' |
+ github_future = self._fetcher.FetchAsync( |
+ 'zipball', username=username, password=password) |
+ def resolve(): |
+ try: |
+ blob = github_future.Get().content |
+ except urlfetch.DownloadError: |
+ raise FileSystemError('Failed to download repo %s file from %s' % |
+ (repo_key, repo_url)) |
+ |
+ repo_zip = _GithubZipFile.Create(repo_key, blob) |
+ if repo_zip is None: |
+ raise FileSystemError('Blob for %s was fetched corrupted from %s' % |
+ (repo_key, repo_url)) |
+ |
+ self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE) |
+ self._transient_stat_cache.Set(repo_key, version) |
Yoyo Zhou
2013/11/22 22:52:02
We should do away with this and replace it with a
not at google - send to devlin
2013/11/22 23:01:58
Done, though I called it "up to date" since that's
|
+ self._persistent_stat_cache.Set(repo_key, version) |
+ return repo_zip |
+ return Future(delegate=Gettable(resolve)) |
+ |
+ # To decide whether we need to re-stat, and from there whether to re-fetch, |
+ # make use of ObjectStore's start-empty configuration. If |
+ # |object_store_creator| is configured to start empty then our creator |
+ # wants to refresh (e.g. running a cron), so fetch the live stat from |
+ # GitHub. If the stat hasn't changed since last time then no reason to |
+ # re-fetch from GitHub, just take from blobstore. |
+ |
+ if self._transient_stat_cache.Get(repo_key).Get() is None: |
+ # This is either a cron or an instance where a cron has never been run. |
+ persistent_version = self._persistent_stat_cache.Get(repo_key).Get() |
+ live_version = self._FetchLiveVersion(username, password) |
+ if persistent_version != live_version: |
+ # Note: branch intentionally triggered if |persistent_version| is None. |
+ logging.info('%s has changed, fetching from GitHub.' % repo_url) |
+ self._repo_zip = fetch_from_github(live_version) |
+ else: |
+ # Already up to date. Fetch from blobstore. No need to update the |
+ # transient version here since it'll already be set for instances, and |
+ # it'll never be set for crons. |
+ logging.info('%s is up to date.' % repo_url) |
+ self._repo_zip = fetch_from_blobstore() |
+ else: |
+ # Instance where cron has been run. It should be in blobstore. |
+ self._repo_zip = fetch_from_blobstore() |
+ |
+ assert self._repo_zip is not None |
+ |
+ def _FetchLiveVersion(self, username, password): |
'''Fetches the current repository version from github.com and returns it. |
The version is a 'sha' hash value. |
''' |
# TODO(kalman): Do this asynchronously (use FetchAsync). |
result = self._fetcher.Fetch( |
- 'commits/HEAD', username=self._username, password=self._password) |
+ 'commits/HEAD', username=username, password=password) |
try: |
- return json.loads(result.content)['commit']['tree']['sha'] |
+ return json.loads(result.content)['sha'] |
except (KeyError, ValueError): |
raise FileSystemError('Error parsing JSON from repo %s: %s' % |
(self._repo_url, traceback.format_exc())) |
def Refresh(self): |
- '''Compares the cached and live stat versions to see if the cached |
- repository is out of date. If it is, an async fetch is started and a |
- Future is returned. When this Future is evaluated, the fetch will be |
- completed and the results cached. |
- |
- If no update is needed, None will be returned. |
- ''' |
- version = self._FetchLiveVersion() |
- if version == self._GetCachedVersion(): |
- logging.info('%s is up to date.' % self._repo_url) |
- # By default this Future will load the blob from datastore. |
- return self._repo_zip |
- |
- logging.info('%s has changed. Re-fetching.' % self._repo_url) |
- fetch = self._fetcher.FetchAsync( |
- 'zipball', username=self._username, password=self._password) |
- |
- def resolve(): |
- '''Completes |fetch| and stores the results in blobstore. |
- ''' |
- repo_zip_url = self._repo_url + '/zipball' |
- try: |
- blob = fetch.Get().content |
- except urlfetch.DownloadError: |
- raise FileSystemError( |
- '%s: Failed to download zip file from repository %s' % repo_zip_url) |
- |
- repo_zip = _GithubZipFile.Create(self._repo_key, blob) |
- if repo_zip is None: |
- raise FileSystemError('%s: failed to create zip' % repo_zip_url) |
- |
- # Success. Update blobstore and the version. |
- self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE) |
- self._SetCachedVersion(version) |
- return repo_zip |
- |
- self._repo_zip = Future(delegate=Gettable(resolve)) |
- return self._repo_zip |
+ return self.ReadSingle('') |
def Read(self, paths, binary=False): |
'''Returns a directory mapping |paths| to the contents of the file at each |
@@ -216,10 +238,9 @@ class GithubFileSystem(FileSystem): |
|binary| is ignored. |
''' |
+ self._EnsureRepoZip() |
def resolve(): |
repo_zip = self._repo_zip.Get() |
- if repo_zip is None: |
- raise FileNotFoundError('"%s" has not been Refreshed' % self._repo_key) |
reads = {} |
for path in paths: |
if path not in repo_zip.Paths(): |
@@ -229,8 +250,6 @@ class GithubFileSystem(FileSystem): |
else: |
reads[path] = repo_zip.Read(path) |
return reads |
- |
- # Delay reading until after self._repo_zip has finished fetching. |
return Future(delegate=Gettable(resolve)) |
def Stat(self, path): |
@@ -244,17 +263,16 @@ class GithubFileSystem(FileSystem): |
Because the repository will only be downloaded once per server version, all |
stat versions are always 0. |
''' |
- repo_zip = self._repo_zip.Get() |
- if repo_zip is None: |
- raise FileNotFoundError('"%s" has never been Refreshed' % self._repo_key) |
+ self._EnsureRepoZip() |
+ repo_zip = self._repo_zip.Get() # if only Stat returned a Future... |
Yoyo Zhou
2013/11/22 22:52:02
Is this a TODO?
not at google - send to devlin
2013/11/22 23:01:58
Internal dialogue. Deleted.
|
if path not in repo_zip.Paths(): |
raise FileNotFoundError('"%s" does not contain file "%s"' % |
(self._repo_key, path)) |
- version = self._GetCachedVersion() |
- assert version, ('There was a zipball in datastore; there should be a ' |
- 'version cached for it') |
+ version = self._persistent_stat_cache.Get(self._repo_key).Get() |
+ assert version is not None, ('There was a zipball in datastore; there ' |
+ 'should be a version cached for it') |
stat_info = StatInfo(version) |
if path == '' or path.endswith('/'): |