Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(72)

Side by Side Diff: chrome/common/extensions/docs/server2/new_github_file_system.py

Issue 82433002: Docserver: Further refactoring to the new GithubFileSystem to make it update (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 # Copyright 2013 The Chromium Authors. All rights reserved. 1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import json 5 import json
6 import logging 6 import logging
7 from cStringIO import StringIO 7 from cStringIO import StringIO
8 import posixpath 8 import posixpath
9 import sys 9 import sys
10 import traceback 10 import traceback
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
116 return GithubFileSystem( 116 return GithubFileSystem(
117 path if path is not None else 'test_data/github_file_system', 117 path if path is not None else 'test_data/github_file_system',
118 'test_owner', 118 'test_owner',
119 repo, 119 repo,
120 object_store_creator or ObjectStoreCreator.ForTest(), 120 object_store_creator or ObjectStoreCreator.ForTest(),
121 fake_fetcher) 121 fake_fetcher)
122 122
123 def __init__(self, base_url, owner, repo, object_store_creator, Fetcher): 123 def __init__(self, base_url, owner, repo, object_store_creator, Fetcher):
124 self._repo_key = '%s/%s' % (owner, repo) 124 self._repo_key = '%s/%s' % (owner, repo)
125 self._repo_url = '%s/%s/%s' % (base_url, owner, repo) 125 self._repo_url = '%s/%s/%s' % (base_url, owner, repo)
126 self._username, self._password = _LoadCredentials(object_store_creator)
127 self._blobstore = blobstore.AppEngineBlobstore()
128 self._fetcher = Fetcher(self._repo_url)
129 self._transient_stat_cache = object_store_creator.Create(
130 GithubFileSystem, category='t-stat-cache')
131 self._persistent_stat_cache = object_store_creator.Create(
132 GithubFileSystem, category='p-stat-cache', start_empty=False)
126 133
127 self._blobstore = blobstore.AppEngineBlobstore() 134 # Created lazily in |_EnsureRepoZip|.
128 # Lookup the chrome github api credentials. 135 self._repo_zip = None
129 self._username, self._password = _LoadCredentials(object_store_creator)
130 self._fetcher = Fetcher(self._repo_url)
131 136
132 # start_empty=False here to maintain the most recent stat across cron runs. 137 def _EnsureRepoZip(self):
133 # Refresh() will always re-stat and use that to decide whether to download 138 '''Initializes |self._repo_zip| if hasn't already been (i.e. if
Yoyo Zhou 2013/11/22 22:52:02 nit: if it
not at google - send to devlin 2013/11/22 23:01:58 Done.
134 # the zipball. 139 _EnsureRepoZip has never been called before). In that case |self._repo_zip|
135 self._stat_cache = object_store_creator.Create( 140 will be set to a Future of _GithubZipFile and the fetch process started,
136 GithubFileSystem, category='stat-cache', start_empty=False) 141 whether that be from a blobstore or if necessary all the way from GitHub.
142 '''
143 if self._repo_zip is not None:
144 return
137 145
138 # A Future to the github zip file. Normally this Future will resolve itself 146 repo_key, repo_url, username, password = (
139 # by querying blobstore for the blob; however, Refresh() may decide to 147 self._repo_key, self._repo_url, self._username, self._password)
140 # override this with a new blob if it's out of date.
141 def resolve_from_blobstore():
142 blob = self._blobstore.Get(self._repo_url, _GITHUB_REPOS_NAMESPACE)
143 return _GithubZipFile.Create(self._repo_key, blob) if blob else None
144 self._repo_zip = Future(delegate=Gettable(resolve_from_blobstore))
145 148
146 def _GetCachedVersion(self): 149 def fetch_from_blobstore():
147 '''Returns the currently cached version of the repository. The version is a 150 '''Returns a Future which resolves to the _GithubZipFile for this repo
148 'sha' hash value. 151 fetched from blobstore.
149 ''' 152 '''
150 return self._stat_cache.Get(self._repo_key).Get() 153 blob = self._blobstore.Get(repo_url, _GITHUB_REPOS_NAMESPACE)
154 if blob is None:
155 return FileSystemError.RaiseInFuture(
156 'No blob for %s found in datastore' % repo_key)
151 157
152 def _SetCachedVersion(self, version): 158 repo_zip = _GithubZipFile.Create(repo_key, blob)
153 '''Sets the currently cached version of the repository. The version is a 159 if repo_zip is None:
154 'sha' hash value. 160 return FileSystemError.RaiseInFuture(
155 ''' 161 'Blob for %s was corrupted in blobstore!?' % repo_key)
156 self._stat_cache.Set(self._repo_key, version)
157 162
158 def _FetchLiveVersion(self): 163 return Future(value=repo_zip)
164
165 def fetch_from_github(version):
166 '''Returns a Future which resolves to the _GithubZipFile for this repo
167 fetched new from GitHub, then writes it to blobstore and |version| to the
168 stat caches.
169 '''
170 github_future = self._fetcher.FetchAsync(
171 'zipball', username=username, password=password)
172 def resolve():
173 try:
174 blob = github_future.Get().content
175 except urlfetch.DownloadError:
176 raise FileSystemError('Failed to download repo %s file from %s' %
177 (repo_key, repo_url))
178
179 repo_zip = _GithubZipFile.Create(repo_key, blob)
180 if repo_zip is None:
181 raise FileSystemError('Blob for %s was fetched corrupted from %s' %
182 (repo_key, repo_url))
183
184 self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE)
185 self._transient_stat_cache.Set(repo_key, version)
Yoyo Zhou 2013/11/22 22:52:02 We should do away with this and replace it with a
not at google - send to devlin 2013/11/22 23:01:58 Done, though I called it "up to date" since that's
186 self._persistent_stat_cache.Set(repo_key, version)
187 return repo_zip
188 return Future(delegate=Gettable(resolve))
189
190 # To decide whether we need to re-stat, and from there whether to re-fetch,
191 # make use of ObjectStore's start-empty configuration. If
192 # |object_store_creator| is configured to start empty then our creator
193 # wants to refresh (e.g. running a cron), so fetch the live stat from
194 # GitHub. If the stat hasn't changed since last time then no reason to
195 # re-fetch from GitHub, just take from blobstore.
196
197 if self._transient_stat_cache.Get(repo_key).Get() is None:
198 # This is either a cron or an instance where a cron has never been run.
199 persistent_version = self._persistent_stat_cache.Get(repo_key).Get()
200 live_version = self._FetchLiveVersion(username, password)
201 if persistent_version != live_version:
202 # Note: branch intentionally triggered if |persistent_version| is None.
203 logging.info('%s has changed, fetching from GitHub.' % repo_url)
204 self._repo_zip = fetch_from_github(live_version)
205 else:
206 # Already up to date. Fetch from blobstore. No need to update the
207 # transient version here since it'll already be set for instances, and
208 # it'll never be set for crons.
209 logging.info('%s is up to date.' % repo_url)
210 self._repo_zip = fetch_from_blobstore()
211 else:
212 # Instance where cron has been run. It should be in blobstore.
213 self._repo_zip = fetch_from_blobstore()
214
215 assert self._repo_zip is not None
216
217 def _FetchLiveVersion(self, username, password):
159 '''Fetches the current repository version from github.com and returns it. 218 '''Fetches the current repository version from github.com and returns it.
160 The version is a 'sha' hash value. 219 The version is a 'sha' hash value.
161 ''' 220 '''
162 # TODO(kalman): Do this asynchronously (use FetchAsync). 221 # TODO(kalman): Do this asynchronously (use FetchAsync).
163 result = self._fetcher.Fetch( 222 result = self._fetcher.Fetch(
164 'commits/HEAD', username=self._username, password=self._password) 223 'commits/HEAD', username=username, password=password)
165 224
166 try: 225 try:
167 return json.loads(result.content)['commit']['tree']['sha'] 226 return json.loads(result.content)['sha']
168 except (KeyError, ValueError): 227 except (KeyError, ValueError):
169 raise FileSystemError('Error parsing JSON from repo %s: %s' % 228 raise FileSystemError('Error parsing JSON from repo %s: %s' %
170 (self._repo_url, traceback.format_exc())) 229 (self._repo_url, traceback.format_exc()))
171 230
172 def Refresh(self): 231 def Refresh(self):
173 '''Compares the cached and live stat versions to see if the cached 232 return self.ReadSingle('')
174 repository is out of date. If it is, an async fetch is started and a
175 Future is returned. When this Future is evaluated, the fetch will be
176 completed and the results cached.
177
178 If no update is needed, None will be returned.
179 '''
180 version = self._FetchLiveVersion()
181 if version == self._GetCachedVersion():
182 logging.info('%s is up to date.' % self._repo_url)
183 # By default this Future will load the blob from datastore.
184 return self._repo_zip
185
186 logging.info('%s has changed. Re-fetching.' % self._repo_url)
187 fetch = self._fetcher.FetchAsync(
188 'zipball', username=self._username, password=self._password)
189
190 def resolve():
191 '''Completes |fetch| and stores the results in blobstore.
192 '''
193 repo_zip_url = self._repo_url + '/zipball'
194 try:
195 blob = fetch.Get().content
196 except urlfetch.DownloadError:
197 raise FileSystemError(
198 '%s: Failed to download zip file from repository %s' % repo_zip_url)
199
200 repo_zip = _GithubZipFile.Create(self._repo_key, blob)
201 if repo_zip is None:
202 raise FileSystemError('%s: failed to create zip' % repo_zip_url)
203
204 # Success. Update blobstore and the version.
205 self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE)
206 self._SetCachedVersion(version)
207 return repo_zip
208
209 self._repo_zip = Future(delegate=Gettable(resolve))
210 return self._repo_zip
211 233
212 def Read(self, paths, binary=False): 234 def Read(self, paths, binary=False):
213 '''Returns a directory mapping |paths| to the contents of the file at each 235 '''Returns a directory mapping |paths| to the contents of the file at each
214 path. If path ends with a '/', it is treated as a directory and is mapped to 236 path. If path ends with a '/', it is treated as a directory and is mapped to
215 a list of filenames in that directory. 237 a list of filenames in that directory.
216 238
217 |binary| is ignored. 239 |binary| is ignored.
218 ''' 240 '''
241 self._EnsureRepoZip()
219 def resolve(): 242 def resolve():
220 repo_zip = self._repo_zip.Get() 243 repo_zip = self._repo_zip.Get()
221 if repo_zip is None:
222 raise FileNotFoundError('"%s" has not been Refreshed' % self._repo_key)
223 reads = {} 244 reads = {}
224 for path in paths: 245 for path in paths:
225 if path not in repo_zip.Paths(): 246 if path not in repo_zip.Paths():
226 raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path)) 247 raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path))
227 if path == '' or path.endswith('/'): 248 if path == '' or path.endswith('/'):
228 reads[path] = repo_zip.List(path) 249 reads[path] = repo_zip.List(path)
229 else: 250 else:
230 reads[path] = repo_zip.Read(path) 251 reads[path] = repo_zip.Read(path)
231 return reads 252 return reads
232
233 # Delay reading until after self._repo_zip has finished fetching.
234 return Future(delegate=Gettable(resolve)) 253 return Future(delegate=Gettable(resolve))
235 254
236 def Stat(self, path): 255 def Stat(self, path):
237 '''Stats |path| returning its version as as StatInfo object. If |path| ends 256 '''Stats |path| returning its version as as StatInfo object. If |path| ends
238 with a '/', it is assumed to be a directory and the StatInfo object returned 257 with a '/', it is assumed to be a directory and the StatInfo object returned
239 includes child_versions for all paths in the directory. 258 includes child_versions for all paths in the directory.
240 259
241 File paths do not include the name of the zip file, which is arbitrary and 260 File paths do not include the name of the zip file, which is arbitrary and
242 useless to consumers. 261 useless to consumers.
243 262
244 Because the repository will only be downloaded once per server version, all 263 Because the repository will only be downloaded once per server version, all
245 stat versions are always 0. 264 stat versions are always 0.
246 ''' 265 '''
247 repo_zip = self._repo_zip.Get() 266 self._EnsureRepoZip()
248 if repo_zip is None: 267 repo_zip = self._repo_zip.Get() # if only Stat returned a Future...
Yoyo Zhou 2013/11/22 22:52:02 Is this a TODO?
not at google - send to devlin 2013/11/22 23:01:58 Internal dialogue. Deleted.
249 raise FileNotFoundError('"%s" has never been Refreshed' % self._repo_key)
250 268
251 if path not in repo_zip.Paths(): 269 if path not in repo_zip.Paths():
252 raise FileNotFoundError('"%s" does not contain file "%s"' % 270 raise FileNotFoundError('"%s" does not contain file "%s"' %
253 (self._repo_key, path)) 271 (self._repo_key, path))
254 272
255 version = self._GetCachedVersion() 273 version = self._persistent_stat_cache.Get(self._repo_key).Get()
256 assert version, ('There was a zipball in datastore; there should be a ' 274 assert version is not None, ('There was a zipball in datastore; there '
257 'version cached for it') 275 'should be a version cached for it')
258 276
259 stat_info = StatInfo(version) 277 stat_info = StatInfo(version)
260 if path == '' or path.endswith('/'): 278 if path == '' or path.endswith('/'):
261 stat_info.child_versions = dict((p, StatInfo(version)) 279 stat_info.child_versions = dict((p, StatInfo(version))
262 for p in repo_zip.List(path)) 280 for p in repo_zip.List(path))
263 return stat_info 281 return stat_info
264 282
265 def GetIdentity(self): 283 def GetIdentity(self):
266 return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key) 284 return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key)
267 285
268 def __repr__(self): 286 def __repr__(self):
269 return '%s(key=%s, url=%s)' % (type(self).__name__, 287 return '%s(key=%s, url=%s)' % (type(self).__name__,
270 self._repo_key, 288 self._repo_key,
271 self._repo_url) 289 self._repo_url)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698