Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(172)

Side by Side Diff: chrome/common/extensions/docs/server2/new_github_file_system.py

Issue 82433002: Docserver: Further refactoring to the new GithubFileSystem to make it update (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase, fix test data Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 # Copyright 2013 The Chromium Authors. All rights reserved. 1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import json 5 import json
6 import logging 6 import logging
7 from cStringIO import StringIO 7 from cStringIO import StringIO
8 import posixpath 8 import posixpath
9 import sys 9 import sys
10 import traceback 10 import traceback
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
116 return GithubFileSystem( 116 return GithubFileSystem(
117 path if path is not None else 'test_data/github_file_system', 117 path if path is not None else 'test_data/github_file_system',
118 'test_owner', 118 'test_owner',
119 repo, 119 repo,
120 object_store_creator or ObjectStoreCreator.ForTest(), 120 object_store_creator or ObjectStoreCreator.ForTest(),
121 fake_fetcher) 121 fake_fetcher)
122 122
123 def __init__(self, base_url, owner, repo, object_store_creator, Fetcher): 123 def __init__(self, base_url, owner, repo, object_store_creator, Fetcher):
124 self._repo_key = '%s/%s' % (owner, repo) 124 self._repo_key = '%s/%s' % (owner, repo)
125 self._repo_url = '%s/%s/%s' % (base_url, owner, repo) 125 self._repo_url = '%s/%s/%s' % (base_url, owner, repo)
126 126 self._username, self._password = _LoadCredentials(object_store_creator)
127 self._blobstore = blobstore.AppEngineBlobstore() 127 self._blobstore = blobstore.AppEngineBlobstore()
128 # Lookup the chrome github api credentials.
129 self._username, self._password = _LoadCredentials(object_store_creator)
130 self._fetcher = Fetcher(self._repo_url) 128 self._fetcher = Fetcher(self._repo_url)
131 129 # Stores whether the github is up-to-date. This will either be True or
132 # start_empty=False here to maintain the most recent stat across cron runs. 130 # empty, the emptiness most likely due to this being a cron run.
133 # Refresh() will always re-stat and use that to decide whether to download 131 self._up_to_date_cache = object_store_creator.Create(
134 # the zipball. 132 GithubFileSystem, category='up-to-date')
133 # Caches the zip file's stat. Overrides start_empty=False and use
134 # |self._up_to_date_cache| to determine whether we need to refresh.
135 self._stat_cache = object_store_creator.Create( 135 self._stat_cache = object_store_creator.Create(
136 GithubFileSystem, category='stat-cache', start_empty=False) 136 GithubFileSystem, category='stat-cache', start_empty=False)
137 137
138 # A Future to the github zip file. Normally this Future will resolve itself 138 # Created lazily in |_EnsureRepoZip|.
139 # by querying blobstore for the blob; however, Refresh() may decide to 139 self._repo_zip = None
140 # override this with a new blob if it's out of date.
141 def resolve_from_blobstore():
142 blob = self._blobstore.Get(self._repo_url, _GITHUB_REPOS_NAMESPACE)
143 return _GithubZipFile.Create(self._repo_key, blob) if blob else None
144 self._repo_zip = Future(delegate=Gettable(resolve_from_blobstore))
145 140
146 def _GetCachedVersion(self): 141 def _EnsureRepoZip(self):
147 '''Returns the currently cached version of the repository. The version is a 142 '''Initializes |self._repo_zip| if it hasn't already been (i.e. if
148 'sha' hash value. 143 _EnsureRepoZip has never been called before). In that case |self._repo_zip|
144 will be set to a Future of _GithubZipFile and the fetch process started,
145 whether that be from a blobstore or if necessary all the way from GitHub.
149 ''' 146 '''
150 return self._stat_cache.Get(self._repo_key).Get() 147 if self._repo_zip is not None:
148 return
151 149
152 def _SetCachedVersion(self, version): 150 repo_key, repo_url, username, password = (
153 '''Sets the currently cached version of the repository. The version is a 151 self._repo_key, self._repo_url, self._username, self._password)
154 'sha' hash value.
155 '''
156 self._stat_cache.Set(self._repo_key, version)
157 152
158 def _FetchLiveVersion(self): 153 def fetch_from_blobstore():
154 '''Returns a Future which resolves to the _GithubZipFile for this repo
155 fetched from blobstore.
156 '''
157 blob = self._blobstore.Get(repo_url, _GITHUB_REPOS_NAMESPACE)
158 if blob is None:
159 return FileSystemError.RaiseInFuture(
160 'No blob for %s found in datastore' % repo_key)
161
162 repo_zip = _GithubZipFile.Create(repo_key, blob)
163 if repo_zip is None:
164 return FileSystemError.RaiseInFuture(
165 'Blob for %s was corrupted in blobstore!?' % repo_key)
166
167 return Future(value=repo_zip)
168
169 def fetch_from_github(version):
170 '''Returns a Future which resolves to the _GithubZipFile for this repo
171 fetched new from GitHub, then writes it to blobstore and |version| to the
172 stat caches.
173 '''
174 github_future = self._fetcher.FetchAsync(
175 'zipball', username=username, password=password)
176 def resolve():
177 try:
178 blob = github_future.Get().content
179 except urlfetch.DownloadError:
180 raise FileSystemError('Failed to download repo %s file from %s' %
181 (repo_key, repo_url))
182
183 repo_zip = _GithubZipFile.Create(repo_key, blob)
184 if repo_zip is None:
185 raise FileSystemError('Blob for %s was fetched corrupted from %s' %
186 (repo_key, repo_url))
187
188 self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE)
189 self._up_to_date_cache.Set(repo_key, True)
190 self._stat_cache.Set(repo_key, version)
191 return repo_zip
192 return Future(delegate=Gettable(resolve))
193
194 # To decide whether we need to re-stat, and from there whether to re-fetch,
195 # make use of ObjectStore's start-empty configuration. If
196 # |object_store_creator| is configured to start empty then our creator
197 # wants to refresh (e.g. running a cron), so fetch the live stat from
198 # GitHub. If the stat hasn't changed since last time then no reason to
199 # re-fetch from GitHub, just take from blobstore.
200
201 if self._up_to_date_cache.Get(repo_key).Get() is None:
202 # This is either a cron or an instance where a cron has never been run.
203 cached_version = self._stat_cache.Get(repo_key).Get()
204 live_version = self._FetchLiveVersion(username, password)
205 if cached_version != live_version:
206 # Note: branch intentionally triggered if |cached_version| is None.
207 logging.info('%s has changed, fetching from GitHub.' % repo_url)
208 self._repo_zip = fetch_from_github(live_version)
209 else:
210 # Already up to date. Fetch from blobstore. No need to set up-to-date
211 # to True here since it'll already be set for instances, and it'll
212 # never be set for crons.
213 logging.info('%s is up to date.' % repo_url)
214 self._repo_zip = fetch_from_blobstore()
215 else:
216 # Instance where cron has been run. It should be in blobstore.
217 self._repo_zip = fetch_from_blobstore()
218
219 assert self._repo_zip is not None
220
221 def _FetchLiveVersion(self, username, password):
159 '''Fetches the current repository version from github.com and returns it. 222 '''Fetches the current repository version from github.com and returns it.
160 The version is a 'sha' hash value. 223 The version is a 'sha' hash value.
161 ''' 224 '''
162 # TODO(kalman): Do this asynchronously (use FetchAsync). 225 # TODO(kalman): Do this asynchronously (use FetchAsync).
163 result = self._fetcher.Fetch( 226 result = self._fetcher.Fetch(
164 'commits/HEAD', username=self._username, password=self._password) 227 'commits/HEAD', username=username, password=password)
165 228
166 try: 229 try:
167 return json.loads(result.content)['commit']['tree']['sha'] 230 return json.loads(result.content)['sha']
168 except (KeyError, ValueError): 231 except (KeyError, ValueError):
169 raise FileSystemError('Error parsing JSON from repo %s: %s' % 232 raise FileSystemError('Error parsing JSON from repo %s: %s' %
170 (self._repo_url, traceback.format_exc())) 233 (self._repo_url, traceback.format_exc()))
171 234
172 def Refresh(self): 235 def Refresh(self):
173 '''Compares the cached and live stat versions to see if the cached 236 return self.ReadSingle('')
174 repository is out of date. If it is, an async fetch is started and a
175 Future is returned. When this Future is evaluated, the fetch will be
176 completed and the results cached.
177
178 If no update is needed, None will be returned.
179 '''
180 version = self._FetchLiveVersion()
181 if version == self._GetCachedVersion():
182 logging.info('%s is up to date.' % self._repo_url)
183 # By default this Future will load the blob from datastore.
184 return self._repo_zip
185
186 logging.info('%s has changed. Re-fetching.' % self._repo_url)
187 fetch = self._fetcher.FetchAsync(
188 'zipball', username=self._username, password=self._password)
189
190 def resolve():
191 '''Completes |fetch| and stores the results in blobstore.
192 '''
193 repo_zip_url = self._repo_url + '/zipball'
194 try:
195 blob = fetch.Get().content
196 except urlfetch.DownloadError:
197 raise FileSystemError(
198 '%s: Failed to download zip file from repository %s' % repo_zip_url)
199
200 repo_zip = _GithubZipFile.Create(self._repo_key, blob)
201 if repo_zip is None:
202 raise FileSystemError('%s: failed to create zip' % repo_zip_url)
203
204 # Success. Update blobstore and the version.
205 self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE)
206 self._SetCachedVersion(version)
207 return repo_zip
208
209 self._repo_zip = Future(delegate=Gettable(resolve))
210 return self._repo_zip
211 237
212 def Read(self, paths, binary=False): 238 def Read(self, paths, binary=False):
213 '''Returns a directory mapping |paths| to the contents of the file at each 239 '''Returns a directory mapping |paths| to the contents of the file at each
214 path. If path ends with a '/', it is treated as a directory and is mapped to 240 path. If path ends with a '/', it is treated as a directory and is mapped to
215 a list of filenames in that directory. 241 a list of filenames in that directory.
216 242
217 |binary| is ignored. 243 |binary| is ignored.
218 ''' 244 '''
245 self._EnsureRepoZip()
219 def resolve(): 246 def resolve():
220 repo_zip = self._repo_zip.Get() 247 repo_zip = self._repo_zip.Get()
221 if repo_zip is None:
222 raise FileNotFoundError('"%s" has not been Refreshed' % self._repo_key)
223 reads = {} 248 reads = {}
224 for path in paths: 249 for path in paths:
225 if path not in repo_zip.Paths(): 250 if path not in repo_zip.Paths():
226 raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path)) 251 raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path))
227 if path == '' or path.endswith('/'): 252 if path == '' or path.endswith('/'):
228 reads[path] = repo_zip.List(path) 253 reads[path] = repo_zip.List(path)
229 else: 254 else:
230 reads[path] = repo_zip.Read(path) 255 reads[path] = repo_zip.Read(path)
231 return reads 256 return reads
232
233 # Delay reading until after self._repo_zip has finished fetching.
234 return Future(delegate=Gettable(resolve)) 257 return Future(delegate=Gettable(resolve))
235 258
236 def Stat(self, path): 259 def Stat(self, path):
237 '''Stats |path| returning its version as as StatInfo object. If |path| ends 260 '''Stats |path| returning its version as as StatInfo object. If |path| ends
238 with a '/', it is assumed to be a directory and the StatInfo object returned 261 with a '/', it is assumed to be a directory and the StatInfo object returned
239 includes child_versions for all paths in the directory. 262 includes child_versions for all paths in the directory.
240 263
241 File paths do not include the name of the zip file, which is arbitrary and 264 File paths do not include the name of the zip file, which is arbitrary and
242 useless to consumers. 265 useless to consumers.
243 266
244 Because the repository will only be downloaded once per server version, all 267 Because the repository will only be downloaded once per server version, all
245 stat versions are always 0. 268 stat versions are always 0.
246 ''' 269 '''
270 self._EnsureRepoZip()
247 repo_zip = self._repo_zip.Get() 271 repo_zip = self._repo_zip.Get()
248 if repo_zip is None:
249 raise FileNotFoundError('"%s" has never been Refreshed' % self._repo_key)
250 272
251 if path not in repo_zip.Paths(): 273 if path not in repo_zip.Paths():
252 raise FileNotFoundError('"%s" does not contain file "%s"' % 274 raise FileNotFoundError('"%s" does not contain file "%s"' %
253 (self._repo_key, path)) 275 (self._repo_key, path))
254 276
255 version = self._GetCachedVersion() 277 version = self._stat_cache.Get(self._repo_key).Get()
256 assert version, ('There was a zipball in datastore; there should be a ' 278 assert version is not None, ('There was a zipball in datastore; there '
257 'version cached for it') 279 'should be a version cached for it')
258 280
259 stat_info = StatInfo(version) 281 stat_info = StatInfo(version)
260 if path == '' or path.endswith('/'): 282 if path == '' or path.endswith('/'):
261 stat_info.child_versions = dict((p, StatInfo(version)) 283 stat_info.child_versions = dict((p, StatInfo(version))
262 for p in repo_zip.List(path)) 284 for p in repo_zip.List(path))
263 return stat_info 285 return stat_info
264 286
265 def GetIdentity(self): 287 def GetIdentity(self):
266 return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key) 288 return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key)
267 289
268 def __repr__(self): 290 def __repr__(self):
269 return '%s(key=%s, url=%s)' % (type(self).__name__, 291 return '%s(key=%s, url=%s)' % (type(self).__name__,
270 self._repo_key, 292 self._repo_key,
271 self._repo_url) 293 self._repo_url)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698