Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1008)

Side by Side Diff: third_party/gsutil/boto/cloudsearch/search.py

Issue 12042069: Scripts to download files from google storage based on sha1 sums (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Removed gsutil/tests and gsutil/docs Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright (c) 2012 Mitch Garnaat http://garnaat.org/
2 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates.
3 # All Rights Reserved
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish, dis-
9 # tribute, sublicense, and/or sell copies of the Software, and to permit
10 # persons to whom the Software is furnished to do so, subject to the fol-
11 # lowing conditions:
12 #
13 # The above copyright notice and this permission notice shall be included
14 # in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 # IN THE SOFTWARE.
23 #
24 from math import ceil
25 import time
26 import json
27 import boto
28 import requests
29
30
31 class SearchServiceException(Exception):
32 pass
33
34
35 class CommitMismatchError(Exception):
36 pass
37
38
39 class SearchResults(object):
40
41 def __init__(self, **attrs):
42 self.rid = attrs['info']['rid']
43 # self.doc_coverage_pct = attrs['info']['doc-coverage-pct']
44 self.cpu_time_ms = attrs['info']['cpu-time-ms']
45 self.time_ms = attrs['info']['time-ms']
46 self.hits = attrs['hits']['found']
47 self.docs = attrs['hits']['hit']
48 self.start = attrs['hits']['start']
49 self.rank = attrs['rank']
50 self.match_expression = attrs['match-expr']
51 self.query = attrs['query']
52 self.search_service = attrs['search_service']
53
54 self.num_pages_needed = ceil(self.hits / self.query.real_size)
55
56 def __len__(self):
57 return len(self.docs)
58
59 def __iter__(self):
60 return iter(self.docs)
61
62 def next_page(self):
63 """Call Cloudsearch to get the next page of search results
64
65 :rtype: :class:`exfm.cloudsearch.SearchResults`
66 :return: A cloudsearch SearchResults object
67 """
68 if self.query.page <= self.num_pages_needed:
69 self.query.start += self.query.real_size
70 self.query.page += 1
71 return self.search_service(self.query)
72 else:
73 raise StopIteration
74
75
76 class Query(object):
77
78 RESULTS_PER_PAGE = 500
79
80 def __init__(self, q=None, bq=None, rank=None,
81 return_fields=None, size=10,
82 start=0, facet=None, facet_constraints=None,
83 facet_sort=None, facet_top_n=None, t=None):
84
85 self.q = q
86 self.bq = bq
87 self.rank = rank or []
88 self.return_fields = return_fields or []
89 self.start = start
90 self.facet = facet or []
91 self.facet_constraints = facet_constraints or {}
92 self.facet_sort = facet_sort or {}
93 self.facet_top_n = facet_top_n or {}
94 self.t = t or {}
95 self.page = 0
96 self.update_size(size)
97
98 def update_size(self, new_size):
99 self.size = new_size
100 self.real_size = Query.RESULTS_PER_PAGE if (self.size >
101 Query.RESULTS_PER_PAGE or self.size == 0) else self.size
102
103 def to_params(self):
104 """Transform search parameters from instance properties to a dictionary
105
106 :rtype: dict
107 :return: search parameters
108 """
109 params = {'start': self.start, 'size': self.real_size}
110
111 if self.q:
112 params['q'] = self.q
113
114 if self.bq:
115 params['bq'] = self.bq
116
117 if self.rank:
118 params['rank'] = ','.join(self.rank)
119
120 if self.return_fields:
121 params['return-fields'] = ','.join(self.return_fields)
122
123 if self.facet:
124 params['facet'] = ','.join(self.facet)
125
126 if self.facet_constraints:
127 for k, v in self.facet_constraints.iteritems():
128 params['facet-%s-constraints' % k] = v
129
130 if self.facet_sort:
131 for k, v in self.facet_sort.iteritems():
132 params['facet-%s-sort' % k] = v
133
134 if self.facet_top_n:
135 for k, v in self.facet_top_n.iteritems():
136 params['facet-%s-top-n' % k] = v
137
138 if self.t:
139 for k, v in self.t.iteritems():
140 params['t-%s' % k] = v
141 return params
142
143
144 class SearchConnection(object):
145
146 def __init__(self, domain=None, endpoint=None):
147 self.domain = domain
148 self.endpoint = endpoint
149 if not endpoint:
150 self.endpoint = domain.search_service_endpoint
151
152 def build_query(self, q=None, bq=None, rank=None, return_fields=None,
153 size=10, start=0, facet=None, facet_constraints=None,
154 facet_sort=None, facet_top_n=None, t=None):
155 return Query(q=q, bq=bq, rank=rank, return_fields=return_fields,
156 size=size, start=start, facet=facet,
157 facet_constraints=facet_constraints,
158 facet_sort=facet_sort, facet_top_n=facet_top_n, t=t)
159
160 def search(self, q=None, bq=None, rank=None, return_fields=None,
161 size=10, start=0, facet=None, facet_constraints=None,
162 facet_sort=None, facet_top_n=None, t=None):
163 """
164 Query Cloudsearch
165
166 :type q:
167 :param q:
168
169 :type bq:
170 :param bq:
171
172 :type rank:
173 :param rank:
174
175 :type return_fields:
176 :param return_fields:
177
178 :type size:
179 :param size:
180
181 :type start:
182 :param start:
183
184 :type facet:
185 :param facet:
186
187 :type facet_constraints:
188 :param facet_constraints:
189
190 :type facet_sort:
191 :param facet_sort:
192
193 :type facet_top_n:
194 :param facet_top_n:
195
196 :type t:
197 :param t:
198
199 :rtype: :class:`exfm.cloudsearch.SearchResults`
200 :return: A cloudsearch SearchResults object
201 """
202
203 query = self.build_query(q=q, bq=bq, rank=rank,
204 return_fields=return_fields,
205 size=size, start=start, facet=facet,
206 facet_constraints=facet_constraints,
207 facet_sort=facet_sort,
208 facet_top_n=facet_top_n, t=t)
209 return self(query)
210
211 def __call__(self, query):
212 """Make a call to CloudSearch
213
214 :type query: :class:`exfm.cloudsearch.Query`
215 :param query: A fully specified Query instance
216
217 :rtype: :class:`exfm.cloudsearch.SearchResults`
218 :return: A cloudsearch SearchResults object
219 """
220 url = "http://%s/2011-02-01/search" % (self.endpoint)
221 params = query.to_params()
222
223 r = requests.get(url, params=params)
224 data = json.loads(r.content)
225 data['query'] = query
226 data['search_service'] = self
227
228 if 'messages' in data and 'error' in data:
229 for m in data['messages']:
230 if m['severity'] == 'fatal':
231 raise SearchServiceException("Error processing search %s "
232 "=> %s" % (params, m['message']), query)
233 elif 'error' in data:
234 raise SearchServiceException("Unknown error processing search %s"
235 % (params), query)
236
237 return SearchResults(**data)
238
239 def get_all_paged(self, query, per_page):
240 """Get a generator to iterate over all pages of search results
241
242 :type query: :class:`exfm.cloudsearch.Query`
243 :param query: A fully specified Query instance
244
245 :type per_page: int
246 :param per_page: Number of docs in each SearchResults object.
247
248 :rtype: generator
249 :return: Generator containing :class:`exfm.cloudsearch.SearchResults`
250 """
251 query.update_size(per_page)
252 page = 0
253 num_pages_needed = 0
254 while page <= num_pages_needed:
255 results = self(query)
256 num_pages_needed = results.num_pages_needed
257 yield results
258 query.start += query.real_size
259 page += 1
260
261 def get_all_hits(self, query):
262 """Get a generator to iterate over all search results
263
264 Transparently handles the results paging from Cloudsearch
265 search results so even if you have many thousands of results
266 you can iterate over all results in a reasonably efficient
267 manner.
268
269 :type query: :class:`exfm.cloudsearch.Query`
270 :param query: A fully specified Query instance
271
272 :rtype: generator
273 :return: All docs matching query
274 """
275 page = 0
276 num_pages_needed = 0
277 while page <= num_pages_needed:
278 results = self(query)
279 num_pages_needed = results.num_pages_needed
280 for doc in results:
281 yield doc
282 query.start += query.real_size
283 page += 1
284
285 def get_num_hits(self, query):
286 """Return the total number of hits for query
287
288 :type query: :class:`exfm.cloudsearch.Query`
289 :param query: A fully specified Query instance
290
291 :rtype: int
292 :return: Total number of hits for query
293 """
294 query.update_size(1)
295 return self(query).hits
296
297
298
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698