OLD | NEW |
---|---|
1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import json | 5 import json |
6 import logging | 6 import logging |
7 from cStringIO import StringIO | 7 from cStringIO import StringIO |
8 import posixpath | 8 import posixpath |
9 import sys | 9 import sys |
10 import traceback | 10 import traceback |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
116 return GithubFileSystem( | 116 return GithubFileSystem( |
117 path if path is not None else 'test_data/github_file_system', | 117 path if path is not None else 'test_data/github_file_system', |
118 'test_owner', | 118 'test_owner', |
119 repo, | 119 repo, |
120 object_store_creator or ObjectStoreCreator.ForTest(), | 120 object_store_creator or ObjectStoreCreator.ForTest(), |
121 fake_fetcher) | 121 fake_fetcher) |
122 | 122 |
123 def __init__(self, base_url, owner, repo, object_store_creator, Fetcher): | 123 def __init__(self, base_url, owner, repo, object_store_creator, Fetcher): |
124 self._repo_key = '%s/%s' % (owner, repo) | 124 self._repo_key = '%s/%s' % (owner, repo) |
125 self._repo_url = '%s/%s/%s' % (base_url, owner, repo) | 125 self._repo_url = '%s/%s/%s' % (base_url, owner, repo) |
126 self._username, self._password = _LoadCredentials(object_store_creator) | |
127 self._blobstore = blobstore.AppEngineBlobstore() | |
128 self._fetcher = Fetcher(self._repo_url) | |
129 self._transient_stat_cache = object_store_creator.Create( | |
130 GithubFileSystem, category='t-stat-cache') | |
131 self._persistent_stat_cache = object_store_creator.Create( | |
132 GithubFileSystem, category='p-stat-cache', start_empty=False) | |
126 | 133 |
127 self._blobstore = blobstore.AppEngineBlobstore() | 134 # Created lazily in |_EnsureRepoZip|. |
128 # Lookup the chrome github api credentials. | 135 self._repo_zip = None |
129 self._username, self._password = _LoadCredentials(object_store_creator) | |
130 self._fetcher = Fetcher(self._repo_url) | |
131 | 136 |
132 # start_empty=False here to maintain the most recent stat across cron runs. | 137 def _EnsureRepoZip(self): |
133 # Refresh() will always re-stat and use that to decide whether to download | 138 '''Initializes |self._repo_zip| if hasn't already been (i.e. if |
Yoyo Zhou
2013/11/22 22:52:02
nit: if it
not at google - send to devlin
2013/11/22 23:01:58
Done.
| |
134 # the zipball. | 139 _EnsureRepoZip has never been called before). In that case |self._repo_zip| |
135 self._stat_cache = object_store_creator.Create( | 140 will be set to a Future of _GithubZipFile and the fetch process started, |
136 GithubFileSystem, category='stat-cache', start_empty=False) | 141 whether that be from a blobstore or if necessary all the way from GitHub. |
142 ''' | |
143 if self._repo_zip is not None: | |
144 return | |
137 | 145 |
138 # A Future to the github zip file. Normally this Future will resolve itself | 146 repo_key, repo_url, username, password = ( |
139 # by querying blobstore for the blob; however, Refresh() may decide to | 147 self._repo_key, self._repo_url, self._username, self._password) |
140 # override this with a new blob if it's out of date. | |
141 def resolve_from_blobstore(): | |
142 blob = self._blobstore.Get(self._repo_url, _GITHUB_REPOS_NAMESPACE) | |
143 return _GithubZipFile.Create(self._repo_key, blob) if blob else None | |
144 self._repo_zip = Future(delegate=Gettable(resolve_from_blobstore)) | |
145 | 148 |
146 def _GetCachedVersion(self): | 149 def fetch_from_blobstore(): |
147 '''Returns the currently cached version of the repository. The version is a | 150 '''Returns a Future which resolves to the _GithubZipFile for this repo |
148 'sha' hash value. | 151 fetched from blobstore. |
149 ''' | 152 ''' |
150 return self._stat_cache.Get(self._repo_key).Get() | 153 blob = self._blobstore.Get(repo_url, _GITHUB_REPOS_NAMESPACE) |
154 if blob is None: | |
155 return FileSystemError.RaiseInFuture( | |
156 'No blob for %s found in datastore' % repo_key) | |
151 | 157 |
152 def _SetCachedVersion(self, version): | 158 repo_zip = _GithubZipFile.Create(repo_key, blob) |
153 '''Sets the currently cached version of the repository. The version is a | 159 if repo_zip is None: |
154 'sha' hash value. | 160 return FileSystemError.RaiseInFuture( |
155 ''' | 161 'Blob for %s was corrupted in blobstore!?' % repo_key) |
156 self._stat_cache.Set(self._repo_key, version) | |
157 | 162 |
158 def _FetchLiveVersion(self): | 163 return Future(value=repo_zip) |
164 | |
165 def fetch_from_github(version): | |
166 '''Returns a Future which resolves to the _GithubZipFile for this repo | |
167 fetched new from GitHub, then writes it to blobstore and |version| to the | |
168 stat caches. | |
169 ''' | |
170 github_future = self._fetcher.FetchAsync( | |
171 'zipball', username=username, password=password) | |
172 def resolve(): | |
173 try: | |
174 blob = github_future.Get().content | |
175 except urlfetch.DownloadError: | |
176 raise FileSystemError('Failed to download repo %s file from %s' % | |
177 (repo_key, repo_url)) | |
178 | |
179 repo_zip = _GithubZipFile.Create(repo_key, blob) | |
180 if repo_zip is None: | |
181 raise FileSystemError('Blob for %s was fetched corrupted from %s' % | |
182 (repo_key, repo_url)) | |
183 | |
184 self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE) | |
185 self._transient_stat_cache.Set(repo_key, version) | |
Yoyo Zhou
2013/11/22 22:52:02
We should do away with this and replace it with a
not at google - send to devlin
2013/11/22 23:01:58
Done, though I called it "up to date" since that's
| |
186 self._persistent_stat_cache.Set(repo_key, version) | |
187 return repo_zip | |
188 return Future(delegate=Gettable(resolve)) | |
189 | |
190 # To decide whether we need to re-stat, and from there whether to re-fetch, | |
191 # make use of ObjectStore's start-empty configuration. If | |
192 # |object_store_creator| is configured to start empty then our creator | |
193 # wants to refresh (e.g. running a cron), so fetch the live stat from | |
194 # GitHub. If the stat hasn't changed since last time then no reason to | |
195 # re-fetch from GitHub, just take from blobstore. | |
196 | |
197 if self._transient_stat_cache.Get(repo_key).Get() is None: | |
198 # This is either a cron or an instance where a cron has never been run. | |
199 persistent_version = self._persistent_stat_cache.Get(repo_key).Get() | |
200 live_version = self._FetchLiveVersion(username, password) | |
201 if persistent_version != live_version: | |
202 # Note: branch intentionally triggered if |persistent_version| is None. | |
203 logging.info('%s has changed, fetching from GitHub.' % repo_url) | |
204 self._repo_zip = fetch_from_github(live_version) | |
205 else: | |
206 # Already up to date. Fetch from blobstore. No need to update the | |
207 # transient version here since it'll already be set for instances, and | |
208 # it'll never be set for crons. | |
209 logging.info('%s is up to date.' % repo_url) | |
210 self._repo_zip = fetch_from_blobstore() | |
211 else: | |
212 # Instance where cron has been run. It should be in blobstore. | |
213 self._repo_zip = fetch_from_blobstore() | |
214 | |
215 assert self._repo_zip is not None | |
216 | |
217 def _FetchLiveVersion(self, username, password): | |
159 '''Fetches the current repository version from github.com and returns it. | 218 '''Fetches the current repository version from github.com and returns it. |
160 The version is a 'sha' hash value. | 219 The version is a 'sha' hash value. |
161 ''' | 220 ''' |
162 # TODO(kalman): Do this asynchronously (use FetchAsync). | 221 # TODO(kalman): Do this asynchronously (use FetchAsync). |
163 result = self._fetcher.Fetch( | 222 result = self._fetcher.Fetch( |
164 'commits/HEAD', username=self._username, password=self._password) | 223 'commits/HEAD', username=username, password=password) |
165 | 224 |
166 try: | 225 try: |
167 return json.loads(result.content)['commit']['tree']['sha'] | 226 return json.loads(result.content)['sha'] |
168 except (KeyError, ValueError): | 227 except (KeyError, ValueError): |
169 raise FileSystemError('Error parsing JSON from repo %s: %s' % | 228 raise FileSystemError('Error parsing JSON from repo %s: %s' % |
170 (self._repo_url, traceback.format_exc())) | 229 (self._repo_url, traceback.format_exc())) |
171 | 230 |
172 def Refresh(self): | 231 def Refresh(self): |
173 '''Compares the cached and live stat versions to see if the cached | 232 return self.ReadSingle('') |
174 repository is out of date. If it is, an async fetch is started and a | |
175 Future is returned. When this Future is evaluated, the fetch will be | |
176 completed and the results cached. | |
177 | |
178 If no update is needed, None will be returned. | |
179 ''' | |
180 version = self._FetchLiveVersion() | |
181 if version == self._GetCachedVersion(): | |
182 logging.info('%s is up to date.' % self._repo_url) | |
183 # By default this Future will load the blob from datastore. | |
184 return self._repo_zip | |
185 | |
186 logging.info('%s has changed. Re-fetching.' % self._repo_url) | |
187 fetch = self._fetcher.FetchAsync( | |
188 'zipball', username=self._username, password=self._password) | |
189 | |
190 def resolve(): | |
191 '''Completes |fetch| and stores the results in blobstore. | |
192 ''' | |
193 repo_zip_url = self._repo_url + '/zipball' | |
194 try: | |
195 blob = fetch.Get().content | |
196 except urlfetch.DownloadError: | |
197 raise FileSystemError( | |
198 '%s: Failed to download zip file from repository %s' % repo_zip_url) | |
199 | |
200 repo_zip = _GithubZipFile.Create(self._repo_key, blob) | |
201 if repo_zip is None: | |
202 raise FileSystemError('%s: failed to create zip' % repo_zip_url) | |
203 | |
204 # Success. Update blobstore and the version. | |
205 self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE) | |
206 self._SetCachedVersion(version) | |
207 return repo_zip | |
208 | |
209 self._repo_zip = Future(delegate=Gettable(resolve)) | |
210 return self._repo_zip | |
211 | 233 |
212 def Read(self, paths, binary=False): | 234 def Read(self, paths, binary=False): |
213 '''Returns a directory mapping |paths| to the contents of the file at each | 235 '''Returns a directory mapping |paths| to the contents of the file at each |
214 path. If path ends with a '/', it is treated as a directory and is mapped to | 236 path. If path ends with a '/', it is treated as a directory and is mapped to |
215 a list of filenames in that directory. | 237 a list of filenames in that directory. |
216 | 238 |
217 |binary| is ignored. | 239 |binary| is ignored. |
218 ''' | 240 ''' |
241 self._EnsureRepoZip() | |
219 def resolve(): | 242 def resolve(): |
220 repo_zip = self._repo_zip.Get() | 243 repo_zip = self._repo_zip.Get() |
221 if repo_zip is None: | |
222 raise FileNotFoundError('"%s" has not been Refreshed' % self._repo_key) | |
223 reads = {} | 244 reads = {} |
224 for path in paths: | 245 for path in paths: |
225 if path not in repo_zip.Paths(): | 246 if path not in repo_zip.Paths(): |
226 raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path)) | 247 raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path)) |
227 if path == '' or path.endswith('/'): | 248 if path == '' or path.endswith('/'): |
228 reads[path] = repo_zip.List(path) | 249 reads[path] = repo_zip.List(path) |
229 else: | 250 else: |
230 reads[path] = repo_zip.Read(path) | 251 reads[path] = repo_zip.Read(path) |
231 return reads | 252 return reads |
232 | |
233 # Delay reading until after self._repo_zip has finished fetching. | |
234 return Future(delegate=Gettable(resolve)) | 253 return Future(delegate=Gettable(resolve)) |
235 | 254 |
236 def Stat(self, path): | 255 def Stat(self, path): |
237 '''Stats |path| returning its version as as StatInfo object. If |path| ends | 256 '''Stats |path| returning its version as as StatInfo object. If |path| ends |
238 with a '/', it is assumed to be a directory and the StatInfo object returned | 257 with a '/', it is assumed to be a directory and the StatInfo object returned |
239 includes child_versions for all paths in the directory. | 258 includes child_versions for all paths in the directory. |
240 | 259 |
241 File paths do not include the name of the zip file, which is arbitrary and | 260 File paths do not include the name of the zip file, which is arbitrary and |
242 useless to consumers. | 261 useless to consumers. |
243 | 262 |
244 Because the repository will only be downloaded once per server version, all | 263 Because the repository will only be downloaded once per server version, all |
245 stat versions are always 0. | 264 stat versions are always 0. |
246 ''' | 265 ''' |
247 repo_zip = self._repo_zip.Get() | 266 self._EnsureRepoZip() |
248 if repo_zip is None: | 267 repo_zip = self._repo_zip.Get() # if only Stat returned a Future... |
Yoyo Zhou
2013/11/22 22:52:02
Is this a TODO?
not at google - send to devlin
2013/11/22 23:01:58
Internal dialogue. Deleted.
| |
249 raise FileNotFoundError('"%s" has never been Refreshed' % self._repo_key) | |
250 | 268 |
251 if path not in repo_zip.Paths(): | 269 if path not in repo_zip.Paths(): |
252 raise FileNotFoundError('"%s" does not contain file "%s"' % | 270 raise FileNotFoundError('"%s" does not contain file "%s"' % |
253 (self._repo_key, path)) | 271 (self._repo_key, path)) |
254 | 272 |
255 version = self._GetCachedVersion() | 273 version = self._persistent_stat_cache.Get(self._repo_key).Get() |
256 assert version, ('There was a zipball in datastore; there should be a ' | 274 assert version is not None, ('There was a zipball in datastore; there ' |
257 'version cached for it') | 275 'should be a version cached for it') |
258 | 276 |
259 stat_info = StatInfo(version) | 277 stat_info = StatInfo(version) |
260 if path == '' or path.endswith('/'): | 278 if path == '' or path.endswith('/'): |
261 stat_info.child_versions = dict((p, StatInfo(version)) | 279 stat_info.child_versions = dict((p, StatInfo(version)) |
262 for p in repo_zip.List(path)) | 280 for p in repo_zip.List(path)) |
263 return stat_info | 281 return stat_info |
264 | 282 |
265 def GetIdentity(self): | 283 def GetIdentity(self): |
266 return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key) | 284 return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key) |
267 | 285 |
268 def __repr__(self): | 286 def __repr__(self): |
269 return '%s(key=%s, url=%s)' % (type(self).__name__, | 287 return '%s(key=%s, url=%s)' % (type(self).__name__, |
270 self._repo_key, | 288 self._repo_key, |
271 self._repo_url) | 289 self._repo_url) |
OLD | NEW |