OLD | NEW |
(Empty) | |
| 1 # Copyright (c) 2012 Mitch Garnaat http://garnaat.org/ |
| 2 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. |
| 3 # All Rights Reserved |
| 4 # |
| 5 # Permission is hereby granted, free of charge, to any person obtaining a |
| 6 # copy of this software and associated documentation files (the |
| 7 # "Software"), to deal in the Software without restriction, including |
| 8 # without limitation the rights to use, copy, modify, merge, publish, dis- |
| 9 # tribute, sublicense, and/or sell copies of the Software, and to permit |
| 10 # persons to whom the Software is furnished to do so, subject to the fol- |
| 11 # lowing conditions: |
| 12 # |
| 13 # The above copyright notice and this permission notice shall be included |
| 14 # in all copies or substantial portions of the Software. |
| 15 # |
| 16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- |
| 18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT |
| 19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 22 # IN THE SOFTWARE. |
| 23 # |
| 24 from math import ceil |
| 25 import time |
| 26 import json |
| 27 import boto |
| 28 import requests |
| 29 |
| 30 |
| 31 class SearchServiceException(Exception): |
| 32 pass |
| 33 |
| 34 |
| 35 class CommitMismatchError(Exception): |
| 36 pass |
| 37 |
| 38 |
| 39 class SearchResults(object): |
| 40 |
| 41 def __init__(self, **attrs): |
| 42 self.rid = attrs['info']['rid'] |
| 43 # self.doc_coverage_pct = attrs['info']['doc-coverage-pct'] |
| 44 self.cpu_time_ms = attrs['info']['cpu-time-ms'] |
| 45 self.time_ms = attrs['info']['time-ms'] |
| 46 self.hits = attrs['hits']['found'] |
| 47 self.docs = attrs['hits']['hit'] |
| 48 self.start = attrs['hits']['start'] |
| 49 self.rank = attrs['rank'] |
| 50 self.match_expression = attrs['match-expr'] |
| 51 self.query = attrs['query'] |
| 52 self.search_service = attrs['search_service'] |
| 53 |
| 54 self.num_pages_needed = ceil(self.hits / self.query.real_size) |
| 55 |
| 56 def __len__(self): |
| 57 return len(self.docs) |
| 58 |
| 59 def __iter__(self): |
| 60 return iter(self.docs) |
| 61 |
| 62 def next_page(self): |
| 63 """Call Cloudsearch to get the next page of search results |
| 64 |
| 65 :rtype: :class:`exfm.cloudsearch.SearchResults` |
| 66 :return: A cloudsearch SearchResults object |
| 67 """ |
| 68 if self.query.page <= self.num_pages_needed: |
| 69 self.query.start += self.query.real_size |
| 70 self.query.page += 1 |
| 71 return self.search_service(self.query) |
| 72 else: |
| 73 raise StopIteration |
| 74 |
| 75 |
| 76 class Query(object): |
| 77 |
| 78 RESULTS_PER_PAGE = 500 |
| 79 |
| 80 def __init__(self, q=None, bq=None, rank=None, |
| 81 return_fields=None, size=10, |
| 82 start=0, facet=None, facet_constraints=None, |
| 83 facet_sort=None, facet_top_n=None, t=None): |
| 84 |
| 85 self.q = q |
| 86 self.bq = bq |
| 87 self.rank = rank or [] |
| 88 self.return_fields = return_fields or [] |
| 89 self.start = start |
| 90 self.facet = facet or [] |
| 91 self.facet_constraints = facet_constraints or {} |
| 92 self.facet_sort = facet_sort or {} |
| 93 self.facet_top_n = facet_top_n or {} |
| 94 self.t = t or {} |
| 95 self.page = 0 |
| 96 self.update_size(size) |
| 97 |
| 98 def update_size(self, new_size): |
| 99 self.size = new_size |
| 100 self.real_size = Query.RESULTS_PER_PAGE if (self.size > |
| 101 Query.RESULTS_PER_PAGE or self.size == 0) else self.size |
| 102 |
| 103 def to_params(self): |
| 104 """Transform search parameters from instance properties to a dictionary |
| 105 |
| 106 :rtype: dict |
| 107 :return: search parameters |
| 108 """ |
| 109 params = {'start': self.start, 'size': self.real_size} |
| 110 |
| 111 if self.q: |
| 112 params['q'] = self.q |
| 113 |
| 114 if self.bq: |
| 115 params['bq'] = self.bq |
| 116 |
| 117 if self.rank: |
| 118 params['rank'] = ','.join(self.rank) |
| 119 |
| 120 if self.return_fields: |
| 121 params['return-fields'] = ','.join(self.return_fields) |
| 122 |
| 123 if self.facet: |
| 124 params['facet'] = ','.join(self.facet) |
| 125 |
| 126 if self.facet_constraints: |
| 127 for k, v in self.facet_constraints.iteritems(): |
| 128 params['facet-%s-constraints' % k] = v |
| 129 |
| 130 if self.facet_sort: |
| 131 for k, v in self.facet_sort.iteritems(): |
| 132 params['facet-%s-sort' % k] = v |
| 133 |
| 134 if self.facet_top_n: |
| 135 for k, v in self.facet_top_n.iteritems(): |
| 136 params['facet-%s-top-n' % k] = v |
| 137 |
| 138 if self.t: |
| 139 for k, v in self.t.iteritems(): |
| 140 params['t-%s' % k] = v |
| 141 return params |
| 142 |
| 143 |
| 144 class SearchConnection(object): |
| 145 |
| 146 def __init__(self, domain=None, endpoint=None): |
| 147 self.domain = domain |
| 148 self.endpoint = endpoint |
| 149 if not endpoint: |
| 150 self.endpoint = domain.search_service_endpoint |
| 151 |
| 152 def build_query(self, q=None, bq=None, rank=None, return_fields=None, |
| 153 size=10, start=0, facet=None, facet_constraints=None, |
| 154 facet_sort=None, facet_top_n=None, t=None): |
| 155 return Query(q=q, bq=bq, rank=rank, return_fields=return_fields, |
| 156 size=size, start=start, facet=facet, |
| 157 facet_constraints=facet_constraints, |
| 158 facet_sort=facet_sort, facet_top_n=facet_top_n, t=t) |
| 159 |
| 160 def search(self, q=None, bq=None, rank=None, return_fields=None, |
| 161 size=10, start=0, facet=None, facet_constraints=None, |
| 162 facet_sort=None, facet_top_n=None, t=None): |
| 163 """ |
| 164 Query Cloudsearch |
| 165 |
| 166 :type q: |
| 167 :param q: |
| 168 |
| 169 :type bq: |
| 170 :param bq: |
| 171 |
| 172 :type rank: |
| 173 :param rank: |
| 174 |
| 175 :type return_fields: |
| 176 :param return_fields: |
| 177 |
| 178 :type size: |
| 179 :param size: |
| 180 |
| 181 :type start: |
| 182 :param start: |
| 183 |
| 184 :type facet: |
| 185 :param facet: |
| 186 |
| 187 :type facet_constraints: |
| 188 :param facet_constraints: |
| 189 |
| 190 :type facet_sort: |
| 191 :param facet_sort: |
| 192 |
| 193 :type facet_top_n: |
| 194 :param facet_top_n: |
| 195 |
| 196 :type t: |
| 197 :param t: |
| 198 |
| 199 :rtype: :class:`exfm.cloudsearch.SearchResults` |
| 200 :return: A cloudsearch SearchResults object |
| 201 """ |
| 202 |
| 203 query = self.build_query(q=q, bq=bq, rank=rank, |
| 204 return_fields=return_fields, |
| 205 size=size, start=start, facet=facet, |
| 206 facet_constraints=facet_constraints, |
| 207 facet_sort=facet_sort, |
| 208 facet_top_n=facet_top_n, t=t) |
| 209 return self(query) |
| 210 |
| 211 def __call__(self, query): |
| 212 """Make a call to CloudSearch |
| 213 |
| 214 :type query: :class:`exfm.cloudsearch.Query` |
| 215 :param query: A fully specified Query instance |
| 216 |
| 217 :rtype: :class:`exfm.cloudsearch.SearchResults` |
| 218 :return: A cloudsearch SearchResults object |
| 219 """ |
| 220 url = "http://%s/2011-02-01/search" % (self.endpoint) |
| 221 params = query.to_params() |
| 222 |
| 223 r = requests.get(url, params=params) |
| 224 data = json.loads(r.content) |
| 225 data['query'] = query |
| 226 data['search_service'] = self |
| 227 |
| 228 if 'messages' in data and 'error' in data: |
| 229 for m in data['messages']: |
| 230 if m['severity'] == 'fatal': |
| 231 raise SearchServiceException("Error processing search %s " |
| 232 "=> %s" % (params, m['message']), query) |
| 233 elif 'error' in data: |
| 234 raise SearchServiceException("Unknown error processing search %s" |
| 235 % (params), query) |
| 236 |
| 237 return SearchResults(**data) |
| 238 |
| 239 def get_all_paged(self, query, per_page): |
| 240 """Get a generator to iterate over all pages of search results |
| 241 |
| 242 :type query: :class:`exfm.cloudsearch.Query` |
| 243 :param query: A fully specified Query instance |
| 244 |
| 245 :type per_page: int |
| 246 :param per_page: Number of docs in each SearchResults object. |
| 247 |
| 248 :rtype: generator |
| 249 :return: Generator containing :class:`exfm.cloudsearch.SearchResults` |
| 250 """ |
| 251 query.update_size(per_page) |
| 252 page = 0 |
| 253 num_pages_needed = 0 |
| 254 while page <= num_pages_needed: |
| 255 results = self(query) |
| 256 num_pages_needed = results.num_pages_needed |
| 257 yield results |
| 258 query.start += query.real_size |
| 259 page += 1 |
| 260 |
| 261 def get_all_hits(self, query): |
| 262 """Get a generator to iterate over all search results |
| 263 |
| 264 Transparently handles the results paging from Cloudsearch |
| 265 search results so even if you have many thousands of results |
| 266 you can iterate over all results in a reasonably efficient |
| 267 manner. |
| 268 |
| 269 :type query: :class:`exfm.cloudsearch.Query` |
| 270 :param query: A fully specified Query instance |
| 271 |
| 272 :rtype: generator |
| 273 :return: All docs matching query |
| 274 """ |
| 275 page = 0 |
| 276 num_pages_needed = 0 |
| 277 while page <= num_pages_needed: |
| 278 results = self(query) |
| 279 num_pages_needed = results.num_pages_needed |
| 280 for doc in results: |
| 281 yield doc |
| 282 query.start += query.real_size |
| 283 page += 1 |
| 284 |
| 285 def get_num_hits(self, query): |
| 286 """Return the total number of hits for query |
| 287 |
| 288 :type query: :class:`exfm.cloudsearch.Query` |
| 289 :param query: A fully specified Query instance |
| 290 |
| 291 :rtype: int |
| 292 :return: Total number of hits for query |
| 293 """ |
| 294 query.update_size(1) |
| 295 return self(query).hits |
| 296 |
| 297 |
| 298 |
OLD | NEW |