OLD | NEW |
1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import json | 5 import json |
6 import logging | 6 import logging |
7 from cStringIO import StringIO | 7 from cStringIO import StringIO |
8 import posixpath | 8 import posixpath |
9 import sys | 9 import sys |
10 import traceback | 10 import traceback |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
116 return GithubFileSystem( | 116 return GithubFileSystem( |
117 path if path is not None else 'test_data/github_file_system', | 117 path if path is not None else 'test_data/github_file_system', |
118 'test_owner', | 118 'test_owner', |
119 repo, | 119 repo, |
120 object_store_creator or ObjectStoreCreator.ForTest(), | 120 object_store_creator or ObjectStoreCreator.ForTest(), |
121 fake_fetcher) | 121 fake_fetcher) |
122 | 122 |
123 def __init__(self, base_url, owner, repo, object_store_creator, Fetcher): | 123 def __init__(self, base_url, owner, repo, object_store_creator, Fetcher): |
124 self._repo_key = '%s/%s' % (owner, repo) | 124 self._repo_key = '%s/%s' % (owner, repo) |
125 self._repo_url = '%s/%s/%s' % (base_url, owner, repo) | 125 self._repo_url = '%s/%s/%s' % (base_url, owner, repo) |
126 | 126 self._username, self._password = _LoadCredentials(object_store_creator) |
127 self._blobstore = blobstore.AppEngineBlobstore() | 127 self._blobstore = blobstore.AppEngineBlobstore() |
128 # Lookup the chrome github api credentials. | |
129 self._username, self._password = _LoadCredentials(object_store_creator) | |
130 self._fetcher = Fetcher(self._repo_url) | 128 self._fetcher = Fetcher(self._repo_url) |
131 | 129 # Stores whether the github is up-to-date. This will either be True or |
132 # start_empty=False here to maintain the most recent stat across cron runs. | 130 # empty, the emptiness most likely due to this being a cron run. |
133 # Refresh() will always re-stat and use that to decide whether to download | 131 self._up_to_date_cache = object_store_creator.Create( |
134 # the zipball. | 132 GithubFileSystem, category='up-to-date') |
| 133 # Caches the zip file's stat. Overrides start_empty=False and use |
| 134 # |self._up_to_date_cache| to determine whether we need to refresh. |
135 self._stat_cache = object_store_creator.Create( | 135 self._stat_cache = object_store_creator.Create( |
136 GithubFileSystem, category='stat-cache', start_empty=False) | 136 GithubFileSystem, category='stat-cache', start_empty=False) |
137 | 137 |
138 # A Future to the github zip file. Normally this Future will resolve itself | 138 # Created lazily in |_EnsureRepoZip|. |
139 # by querying blobstore for the blob; however, Refresh() may decide to | 139 self._repo_zip = None |
140 # override this with a new blob if it's out of date. | |
141 def resolve_from_blobstore(): | |
142 blob = self._blobstore.Get(self._repo_url, _GITHUB_REPOS_NAMESPACE) | |
143 return _GithubZipFile.Create(self._repo_key, blob) if blob else None | |
144 self._repo_zip = Future(delegate=Gettable(resolve_from_blobstore)) | |
145 | 140 |
146 def _GetCachedVersion(self): | 141 def _EnsureRepoZip(self): |
147 '''Returns the currently cached version of the repository. The version is a | 142 '''Initializes |self._repo_zip| if it hasn't already been (i.e. if |
148 'sha' hash value. | 143 _EnsureRepoZip has never been called before). In that case |self._repo_zip| |
| 144 will be set to a Future of _GithubZipFile and the fetch process started, |
| 145 whether that be from a blobstore or if necessary all the way from GitHub. |
149 ''' | 146 ''' |
150 return self._stat_cache.Get(self._repo_key).Get() | 147 if self._repo_zip is not None: |
| 148 return |
151 | 149 |
152 def _SetCachedVersion(self, version): | 150 repo_key, repo_url, username, password = ( |
153 '''Sets the currently cached version of the repository. The version is a | 151 self._repo_key, self._repo_url, self._username, self._password) |
154 'sha' hash value. | |
155 ''' | |
156 self._stat_cache.Set(self._repo_key, version) | |
157 | 152 |
158 def _FetchLiveVersion(self): | 153 def fetch_from_blobstore(): |
| 154 '''Returns a Future which resolves to the _GithubZipFile for this repo |
| 155 fetched from blobstore. |
| 156 ''' |
| 157 blob = self._blobstore.Get(repo_url, _GITHUB_REPOS_NAMESPACE) |
| 158 if blob is None: |
| 159 return FileSystemError.RaiseInFuture( |
| 160 'No blob for %s found in datastore' % repo_key) |
| 161 |
| 162 repo_zip = _GithubZipFile.Create(repo_key, blob) |
| 163 if repo_zip is None: |
| 164 return FileSystemError.RaiseInFuture( |
| 165 'Blob for %s was corrupted in blobstore!?' % repo_key) |
| 166 |
| 167 return Future(value=repo_zip) |
| 168 |
| 169 def fetch_from_github(version): |
| 170 '''Returns a Future which resolves to the _GithubZipFile for this repo |
| 171 fetched new from GitHub, then writes it to blobstore and |version| to the |
| 172 stat caches. |
| 173 ''' |
| 174 github_future = self._fetcher.FetchAsync( |
| 175 'zipball', username=username, password=password) |
| 176 def resolve(): |
| 177 try: |
| 178 blob = github_future.Get().content |
| 179 except urlfetch.DownloadError: |
| 180 raise FileSystemError('Failed to download repo %s file from %s' % |
| 181 (repo_key, repo_url)) |
| 182 |
| 183 repo_zip = _GithubZipFile.Create(repo_key, blob) |
| 184 if repo_zip is None: |
| 185 raise FileSystemError('Blob for %s was fetched corrupted from %s' % |
| 186 (repo_key, repo_url)) |
| 187 |
| 188 self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE) |
| 189 self._up_to_date_cache.Set(repo_key, True) |
| 190 self._stat_cache.Set(repo_key, version) |
| 191 return repo_zip |
| 192 return Future(delegate=Gettable(resolve)) |
| 193 |
| 194 # To decide whether we need to re-stat, and from there whether to re-fetch, |
| 195 # make use of ObjectStore's start-empty configuration. If |
| 196 # |object_store_creator| is configured to start empty then our creator |
| 197 # wants to refresh (e.g. running a cron), so fetch the live stat from |
| 198 # GitHub. If the stat hasn't changed since last time then no reason to |
| 199 # re-fetch from GitHub, just take from blobstore. |
| 200 |
| 201 if self._up_to_date_cache.Get(repo_key).Get() is None: |
| 202 # This is either a cron or an instance where a cron has never been run. |
| 203 cached_version = self._stat_cache.Get(repo_key).Get() |
| 204 live_version = self._FetchLiveVersion(username, password) |
| 205 if cached_version != live_version: |
| 206 # Note: branch intentionally triggered if |cached_version| is None. |
| 207 logging.info('%s has changed, fetching from GitHub.' % repo_url) |
| 208 self._repo_zip = fetch_from_github(live_version) |
| 209 else: |
| 210 # Already up to date. Fetch from blobstore. No need to set up-to-date |
| 211 # to True here since it'll already be set for instances, and it'll |
| 212 # never be set for crons. |
| 213 logging.info('%s is up to date.' % repo_url) |
| 214 self._repo_zip = fetch_from_blobstore() |
| 215 else: |
| 216 # Instance where cron has been run. It should be in blobstore. |
| 217 self._repo_zip = fetch_from_blobstore() |
| 218 |
| 219 assert self._repo_zip is not None |
| 220 |
| 221 def _FetchLiveVersion(self, username, password): |
159 '''Fetches the current repository version from github.com and returns it. | 222 '''Fetches the current repository version from github.com and returns it. |
160 The version is a 'sha' hash value. | 223 The version is a 'sha' hash value. |
161 ''' | 224 ''' |
162 # TODO(kalman): Do this asynchronously (use FetchAsync). | 225 # TODO(kalman): Do this asynchronously (use FetchAsync). |
163 result = self._fetcher.Fetch( | 226 result = self._fetcher.Fetch( |
164 'commits/HEAD', username=self._username, password=self._password) | 227 'commits/HEAD', username=username, password=password) |
165 | 228 |
166 try: | 229 try: |
167 return json.loads(result.content)['commit']['tree']['sha'] | 230 return json.loads(result.content)['sha'] |
168 except (KeyError, ValueError): | 231 except (KeyError, ValueError): |
169 raise FileSystemError('Error parsing JSON from repo %s: %s' % | 232 raise FileSystemError('Error parsing JSON from repo %s: %s' % |
170 (self._repo_url, traceback.format_exc())) | 233 (self._repo_url, traceback.format_exc())) |
171 | 234 |
172 def Refresh(self): | 235 def Refresh(self): |
173 '''Compares the cached and live stat versions to see if the cached | 236 return self.ReadSingle('') |
174 repository is out of date. If it is, an async fetch is started and a | |
175 Future is returned. When this Future is evaluated, the fetch will be | |
176 completed and the results cached. | |
177 | |
178 If no update is needed, None will be returned. | |
179 ''' | |
180 version = self._FetchLiveVersion() | |
181 if version == self._GetCachedVersion(): | |
182 logging.info('%s is up to date.' % self._repo_url) | |
183 # By default this Future will load the blob from datastore. | |
184 return self._repo_zip | |
185 | |
186 logging.info('%s has changed. Re-fetching.' % self._repo_url) | |
187 fetch = self._fetcher.FetchAsync( | |
188 'zipball', username=self._username, password=self._password) | |
189 | |
190 def resolve(): | |
191 '''Completes |fetch| and stores the results in blobstore. | |
192 ''' | |
193 repo_zip_url = self._repo_url + '/zipball' | |
194 try: | |
195 blob = fetch.Get().content | |
196 except urlfetch.DownloadError: | |
197 raise FileSystemError( | |
198 '%s: Failed to download zip file from repository %s' % repo_zip_url) | |
199 | |
200 repo_zip = _GithubZipFile.Create(self._repo_key, blob) | |
201 if repo_zip is None: | |
202 raise FileSystemError('%s: failed to create zip' % repo_zip_url) | |
203 | |
204 # Success. Update blobstore and the version. | |
205 self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE) | |
206 self._SetCachedVersion(version) | |
207 return repo_zip | |
208 | |
209 self._repo_zip = Future(delegate=Gettable(resolve)) | |
210 return self._repo_zip | |
211 | 237 |
212 def Read(self, paths, binary=False): | 238 def Read(self, paths, binary=False): |
213 '''Returns a directory mapping |paths| to the contents of the file at each | 239 '''Returns a directory mapping |paths| to the contents of the file at each |
214 path. If path ends with a '/', it is treated as a directory and is mapped to | 240 path. If path ends with a '/', it is treated as a directory and is mapped to |
215 a list of filenames in that directory. | 241 a list of filenames in that directory. |
216 | 242 |
217 |binary| is ignored. | 243 |binary| is ignored. |
218 ''' | 244 ''' |
| 245 self._EnsureRepoZip() |
219 def resolve(): | 246 def resolve(): |
220 repo_zip = self._repo_zip.Get() | 247 repo_zip = self._repo_zip.Get() |
221 if repo_zip is None: | |
222 raise FileNotFoundError('"%s" has not been Refreshed' % self._repo_key) | |
223 reads = {} | 248 reads = {} |
224 for path in paths: | 249 for path in paths: |
225 if path not in repo_zip.Paths(): | 250 if path not in repo_zip.Paths(): |
226 raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path)) | 251 raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path)) |
227 if path == '' or path.endswith('/'): | 252 if path == '' or path.endswith('/'): |
228 reads[path] = repo_zip.List(path) | 253 reads[path] = repo_zip.List(path) |
229 else: | 254 else: |
230 reads[path] = repo_zip.Read(path) | 255 reads[path] = repo_zip.Read(path) |
231 return reads | 256 return reads |
232 | |
233 # Delay reading until after self._repo_zip has finished fetching. | |
234 return Future(delegate=Gettable(resolve)) | 257 return Future(delegate=Gettable(resolve)) |
235 | 258 |
236 def Stat(self, path): | 259 def Stat(self, path): |
237 '''Stats |path| returning its version as as StatInfo object. If |path| ends | 260 '''Stats |path| returning its version as as StatInfo object. If |path| ends |
238 with a '/', it is assumed to be a directory and the StatInfo object returned | 261 with a '/', it is assumed to be a directory and the StatInfo object returned |
239 includes child_versions for all paths in the directory. | 262 includes child_versions for all paths in the directory. |
240 | 263 |
241 File paths do not include the name of the zip file, which is arbitrary and | 264 File paths do not include the name of the zip file, which is arbitrary and |
242 useless to consumers. | 265 useless to consumers. |
243 | 266 |
244 Because the repository will only be downloaded once per server version, all | 267 Because the repository will only be downloaded once per server version, all |
245 stat versions are always 0. | 268 stat versions are always 0. |
246 ''' | 269 ''' |
| 270 self._EnsureRepoZip() |
247 repo_zip = self._repo_zip.Get() | 271 repo_zip = self._repo_zip.Get() |
248 if repo_zip is None: | |
249 raise FileNotFoundError('"%s" has never been Refreshed' % self._repo_key) | |
250 | 272 |
251 if path not in repo_zip.Paths(): | 273 if path not in repo_zip.Paths(): |
252 raise FileNotFoundError('"%s" does not contain file "%s"' % | 274 raise FileNotFoundError('"%s" does not contain file "%s"' % |
253 (self._repo_key, path)) | 275 (self._repo_key, path)) |
254 | 276 |
255 version = self._GetCachedVersion() | 277 version = self._stat_cache.Get(self._repo_key).Get() |
256 assert version, ('There was a zipball in datastore; there should be a ' | 278 assert version is not None, ('There was a zipball in datastore; there ' |
257 'version cached for it') | 279 'should be a version cached for it') |
258 | 280 |
259 stat_info = StatInfo(version) | 281 stat_info = StatInfo(version) |
260 if path == '' or path.endswith('/'): | 282 if path == '' or path.endswith('/'): |
261 stat_info.child_versions = dict((p, StatInfo(version)) | 283 stat_info.child_versions = dict((p, StatInfo(version)) |
262 for p in repo_zip.List(path)) | 284 for p in repo_zip.List(path)) |
263 return stat_info | 285 return stat_info |
264 | 286 |
265 def GetIdentity(self): | 287 def GetIdentity(self): |
266 return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key) | 288 return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key) |
267 | 289 |
268 def __repr__(self): | 290 def __repr__(self): |
269 return '%s(key=%s, url=%s)' % (type(self).__name__, | 291 return '%s(key=%s, url=%s)' % (type(self).__name__, |
270 self._repo_key, | 292 self._repo_key, |
271 self._repo_url) | 293 self._repo_url) |
OLD | NEW |