OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 package memory | |
6 | |
7 import ( | |
8 "bytes" | |
9 "fmt" | |
10 "sort" | |
11 "strings" | |
12 | |
13 ds "github.com/luci/gae/service/datastore" | |
14 "github.com/luci/gae/service/datastore/serialize" | |
15 ) | |
16 | |
17 // reducedQuery contains only the pieces of the query necessary to iterate for | |
18 // results. | |
19 // deduplication is applied externally | |
20 // projection / keysonly / entity retrieval is done externally | |
21 type reducedQuery struct { | |
22 ns string | |
23 kind string | |
24 | |
25 // eqFilters indicate the set of all prefix constraints which need to be | |
26 // fulfilled in the composite query. All of these will translate into pr efix | |
27 // bytes for SOME index. | |
28 eqFilters map[string]stringSet | |
29 | |
30 // suffixFormat is the PRECISE listing of the suffix columns that ALL in dexes | |
31 // in the multi query will have. | |
32 // | |
33 // suffixFormat ALWAYS includes the inequality filter (if any) as the 0t h | |
34 // element | |
35 // suffixFormat ALWAYS includes any additional projections (in ascending | |
36 // order) after all user defined sort orders | |
37 // suffixFormat ALWAYS has __key__ as the last column | |
38 suffixFormat []ds.IndexColumn | |
39 | |
40 // limits of the inequality and/or full sort order. This is ONLY a suffi x, | |
41 // and it will be appended to the prefix during iteration. | |
42 start []byte | |
43 end []byte | |
44 | |
45 // metadata describing the total number of columns that this query requi res to | |
46 // execute perfectly. | |
47 numCols int | |
48 } | |
49 | |
50 type IndexDefinitionSortable struct { | |
51 // eqFilts is the list of ACTUAL prefix columns. Note that it may contai n | |
52 // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becu ase | |
53 // (tag=1, tag=2) is a perfectly valid query). | |
54 eqFilts []ds.IndexColumn | |
55 coll *memCollection | |
56 } | |
57 | |
58 func (i *IndexDefinitionSortable) hasAncestor() bool { | |
59 return len(i.eqFilts) > 0 && i.eqFilts[0].Property == "__ancestor__" | |
60 } | |
61 | |
62 func (i *IndexDefinitionSortable) numEqHits(c *constraints) int { | |
63 ret := 0 | |
64 for _, filt := range i.eqFilts { | |
65 if _, ok := c.constraints[filt.Property]; ok { | |
66 ret++ | |
67 } | |
68 } | |
69 return ret | |
70 } | |
71 | |
72 type IndexDefinitionSortableSlice []IndexDefinitionSortable | |
73 | |
74 func (s IndexDefinitionSortableSlice) Len() int { return len(s) } | |
75 func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } | |
76 func (s IndexDefinitionSortableSlice) Less(i, j int) bool { | |
77 a, b := s[i], s[j] | |
78 if a.coll == nil && b.coll != nil { | |
79 return true | |
80 } else if a.coll != nil && b.coll == nil { | |
81 return false | |
82 } | |
83 | |
84 cmp := len(a.eqFilts) - len(b.eqFilts) | |
85 if cmp < 0 { | |
86 return true | |
87 } else if cmp > 0 { | |
88 return false | |
89 } | |
90 for k, col := range a.eqFilts { | |
91 ocol := b.eqFilts[k] | |
92 if col.Direction == ds.ASCENDING && ocol.Direction == ds.DESCEND ING { | |
93 return true | |
94 } else if col.Direction == ds.DESCENDING && ocol.Direction == ds .ASCENDING { | |
95 return false | |
96 } | |
97 if col.Property < ocol.Property { | |
98 return true | |
99 } else if col.Property > ocol.Property { | |
100 return false | |
101 } | |
102 } | |
103 return false | |
104 } | |
105 | |
106 // maybeAddDefinition possibly adds a new IndexDefinitionSortable to this slice. | |
107 // It's only added if it could be useful in servicing q, otherwise this function | |
108 // is a noop. | |
109 // | |
110 // This returns true iff the proposed index is OK and depletes missingTerms to | |
111 // empty. | |
112 // | |
113 // If the proposed index is PERFECT (e.g. contains enough columns to cover all | |
114 // equality filters, and also has the correct suffix), idxs will be replaced | |
115 // with JUST that index, and this will return true. | |
116 func (idxs *IndexDefinitionSortableSlice) maybeAddDefinition(q *reducedQuery, s *memStore, missingTerms map[string]struct{}, id *ds.IndexDefinition) bool { | |
117 // Kindless queries are handled elsewhere. | |
118 if id.Kind != q.kind { | |
119 impossible( | |
120 fmt.Errorf("maybeAddDefinition given index with wrong ki nd %q v %q", id.Kind, q.kind)) | |
121 } | |
122 | |
123 // If we're an ancestor query, and the index is compound, but doesn't in clude | |
124 // an Ancestor field, it doesn't work. Builtin indicies can be used for | |
125 // ancestor queries (and have !Ancestor), assuming that it's only equali ty | |
126 // filters (plus inequality on __key__), or a single inequality. | |
127 if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() { | |
128 impossible( | |
129 fmt.Errorf("maybeAddDefinition given compound index with wrong ancestor info: %s %#v", id, q)) | |
130 } | |
131 | |
132 // add __ancestor__ if necessary | |
133 sortBy := id.GetFullSortOrder() | |
134 | |
135 // If the index has fewer fields than we need for the suffix, it can't | |
136 // possibly help. | |
137 if len(sortBy) < len(q.suffixFormat) { | |
138 return false | |
139 } | |
140 | |
141 numEqFilts := len(sortBy) - len(q.suffixFormat) | |
142 // make sure the orders are precisely the same | |
143 for i, sb := range sortBy[numEqFilts:] { | |
144 if q.suffixFormat[i] != sb { | |
145 return false | |
146 } | |
147 } | |
148 | |
149 if id.Builtin() && numEqFilts == 0 { | |
150 if len(q.eqFilters) > 1 || (len(q.eqFilters) == 1 && q.eqFilters ["__ancestor__"] == nil) { | |
151 return false | |
152 } | |
153 } | |
154 | |
155 // Make sure the equalities section doesn't contain any properties we do n't | |
156 // want in our query. | |
157 // | |
158 // numByProp && totalEqFilts will be used to see if this is a perfect ma tch | |
159 // later. | |
160 numByProp := make(map[string]int, len(q.eqFilters)) | |
161 totalEqFilts := 0 | |
162 | |
163 eqFilts := sortBy[:numEqFilts] | |
164 for _, p := range eqFilts { | |
165 if _, ok := q.eqFilters[p.Property]; !ok { | |
166 return false | |
167 } | |
168 numByProp[p.Property]++ | |
169 totalEqFilts++ | |
170 } | |
171 | |
172 // ok, we can actually use this | |
173 | |
174 // Grab the collection for convenience later. We don't want to invalidat e this | |
175 // index's potential just because the collection doesn't exist. If it's | |
176 // a builtin and it doesn't exist, it still needs to be one of the 'poss ible' | |
177 // indexes... it just means that the user's query will end up with no re sults. | |
178 coll := s.GetCollection( | |
179 fmt.Sprintf("idx:%s:%s", q.ns, serialize.ToBytes(*id.PrepForIdxT able()))) | |
180 | |
181 // First, see if it's a perfect match. If it is, then our search is over . | |
182 // | |
183 // A perfect match contains ALL the equality filter columns (or more, si nce | |
184 // we can use residuals to fill in the extras). | |
185 toAdd := IndexDefinitionSortable{coll: coll} | |
186 toAdd.eqFilts = eqFilts | |
187 for _, sb := range toAdd.eqFilts { | |
188 delete(missingTerms, sb.Property) | |
189 } | |
190 | |
191 perfect := false | |
192 if len(sortBy) == q.numCols { | |
193 perfect = true | |
194 for k, num := range numByProp { | |
195 if num < len(q.eqFilters[k]) { | |
196 perfect = false | |
197 break | |
198 } | |
199 } | |
200 } | |
201 if perfect { | |
202 *idxs = IndexDefinitionSortableSlice{toAdd} | |
203 } else { | |
204 *idxs = append(*idxs, toAdd) | |
205 } | |
206 return len(missingTerms) == 0 | |
207 } | |
208 | |
209 // getRelevantIndicies retrieves the relevant indexes which could be used to | |
dnj
2015/08/28 20:15:04
nit: I was going function name, too :)
But if you
iannucci
2015/08/28 20:38:50
Oops... My grep missed it. Fixed now.
| |
210 // service q. It returns nil if it's not possible to service q with the current | |
211 // indicies. | |
212 func getRelevantIndicies(q *reducedQuery, s *memStore) (IndexDefinitionSortableS lice, error) { | |
213 missingTerms := map[string]struct{}{} | |
214 for k := range q.eqFilters { | |
215 if k == "__ancestor__" { | |
216 // ancestor is not a prefix which can be satisfied by a single index. It | |
217 // must be satisfied by ALL indexes (and has special log ic for this in | |
218 // the addDefinition logic) | |
219 continue | |
220 } | |
221 missingTerms[k] = struct{}{} | |
222 } | |
223 idxs := IndexDefinitionSortableSlice{} | |
224 | |
225 // First we add builtins | |
226 // add | |
227 // idx:KIND | |
228 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ | |
229 Kind: q.kind, | |
230 }) { | |
231 return idxs, nil | |
232 } | |
233 | |
234 // add | |
235 // idx:KIND:prop | |
236 // idx:KIND:-prop | |
237 props := map[string]struct{}{} | |
238 for prop := range q.eqFilters { | |
239 props[prop] = struct{}{} | |
240 } | |
241 for _, col := range q.suffixFormat[:len(q.suffixFormat)-1] { | |
242 props[col.Property] = struct{}{} | |
243 } | |
244 for prop := range props { | |
245 if strings.HasPrefix(prop, "__") && strings.HasSuffix(prop, "__" ) { | |
246 continue | |
247 } | |
248 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefiniti on{ | |
249 Kind: q.kind, | |
250 SortBy: []ds.IndexColumn{ | |
251 {Property: prop}, | |
252 }, | |
253 }) { | |
254 return idxs, nil | |
255 } | |
256 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefiniti on{ | |
257 Kind: q.kind, | |
258 SortBy: []ds.IndexColumn{ | |
259 {Property: prop, Direction: ds.DESCENDING}, | |
260 }, | |
261 }) { | |
262 return idxs, nil | |
263 } | |
264 } | |
265 | |
266 // Try adding all compound indicies whose suffix matches. | |
267 suffix := &ds.IndexDefinition{ | |
268 Kind: q.kind, | |
269 Ancestor: q.eqFilters["__ancestor__"] != nil, | |
270 SortBy: q.suffixFormat, | |
271 } | |
272 walkCompIdxs(s, suffix, func(def *ds.IndexDefinition) bool { | |
273 // keep walking until we find a perfect index. | |
274 return !idxs.maybeAddDefinition(q, s, missingTerms, def) | |
275 }) | |
276 | |
277 // this query is impossible to fulfil with the current indicies. Not all the | |
278 // terms (equality + projection) are satisfied. | |
279 if len(missingTerms) < 0 || len(idxs) == 0 { | |
280 remains := &ds.IndexDefinition{ | |
281 Kind: q.kind, | |
282 Ancestor: q.eqFilters["__ancestor__"] != nil, | |
283 } | |
284 terms := make([]string, 0, len(missingTerms)) | |
285 for mt := range missingTerms { | |
286 terms = append(terms, mt) | |
287 } | |
288 if serializationDeterministic { | |
289 sort.Strings(terms) | |
290 } | |
291 for _, term := range terms { | |
292 remains.SortBy = append(remains.SortBy, ds.IndexColumn{P roperty: term}) | |
293 } | |
294 remains.SortBy = append(remains.SortBy, q.suffixFormat...) | |
295 last := remains.SortBy[len(remains.SortBy)-1] | |
296 if last.Direction == ds.ASCENDING { | |
297 // this removes the __key__ column, since it's implicit. | |
298 remains.SortBy = remains.SortBy[:len(remains.SortBy)-1] | |
299 } | |
300 if remains.Builtin() { | |
301 impossible( | |
302 fmt.Errorf("recommended missing index would be a builtin: %s", remains)) | |
303 } | |
304 return nil, fmt.Errorf( | |
305 "Your indexes are insufficient! Try adding:\n %s", rema ins) | |
306 } | |
307 | |
308 return idxs, nil | |
309 } | |
310 | |
311 // generate generates a single iterDefinition for the given index. | |
312 func generate(q *reducedQuery, idx *IndexDefinitionSortable, c *constraints) *it erDefinition { | |
313 def := &iterDefinition{ | |
314 c: idx.coll, | |
315 start: q.start, | |
316 end: q.end, | |
317 } | |
318 toJoin := make([][]byte, len(idx.eqFilts)) | |
319 for _, sb := range idx.eqFilts { | |
320 val := c.peel(sb.Property) | |
321 if sb.Direction == ds.DESCENDING { | |
322 val = invert(val) | |
323 } | |
324 toJoin = append(toJoin, val) | |
325 } | |
326 def.prefix = bjoin(toJoin...) | |
327 def.prefixLen = len(def.prefix) | |
328 | |
329 if q.eqFilters["__ancestor__"] != nil && !idx.hasAncestor() { | |
330 // The query requires an ancestor, but the index doesn't explici tly have it | |
331 // as part of the prefix (otherwise it would have been the first eqFilt | |
332 // above). This happens when it's a builtin index, or if it's th e primary | |
333 // index (for a kindless query), or if it's the Kind index (for a filterless | |
334 // query). | |
335 // | |
336 // builtin indexes are: | |
337 // Kind/__key__ | |
338 // Kind/Prop/__key__ | |
339 // Kind/Prop/-__key__ | |
340 if len(q.suffixFormat) > 2 || q.suffixFormat[len(q.suffixFormat) -1].Property != "__key__" { | |
341 // This should never happen. One of the previous validat ors would have | |
342 // selected a different index. But just in case. | |
343 impossible(fmt.Errorf("cannot supply an implicit ancesto r for %#v", idx)) | |
344 } | |
345 | |
346 // This silly construction gets the __ancestor__ value, because it's a | |
347 // map[string]struct{} instead of a [][]byte{} (otherwise we'd j ust get | |
348 // the value at the 0th index). | |
349 anc := "" | |
350 for k := range q.eqFilters["__ancestor__"] { | |
351 anc = k | |
352 break | |
353 } | |
354 | |
355 // Intentionally do NOT update prefixLen. This allows multiItera tor to | |
356 // correctly include the entire key in the shared iterator suffi x, instead | |
357 // of just the remainder. | |
358 | |
359 // chop the terminal null byte off the q.ancestor key... we can accept | |
360 // anything which is a descendant or an exact match. Removing t he last byte | |
361 // from the key (the terminating null) allows this trick to work . Otherwise | |
362 // it would be a closed range of EXACTLY this key. | |
363 chopped := []byte(anc[:len(anc)-1]) | |
364 if q.suffixFormat[0].Direction == ds.DESCENDING { | |
365 chopped = invert(chopped) | |
366 } | |
367 def.prefix = bjoin(def.prefix, chopped) | |
368 | |
369 // Update start and end, since we know that if they contain anyt hing, they | |
370 // contain values for the __key__ field. | |
371 if def.start != nil { | |
372 offset := 0 | |
373 if len(q.suffixFormat) > 1 { | |
374 chunks, _ := parseSuffix(q.ns, q.suffixFormat, d ef.start, 1) | |
375 offset = len(chunks[0]) | |
376 } | |
377 if !bytes.HasPrefix(def.start[offset:], chopped) { | |
378 // again, shouldn't happen, but if it does, we w ant to know about it. | |
379 impossible(fmt.Errorf( | |
380 "start suffix for implied ancestor doesn 't start with ancestor! start:%v ancestor:%v", | |
381 def.start, chopped)) | |
382 } | |
383 def.start = def.start[:offset+len(chopped)] | |
384 } | |
385 if def.end != nil { | |
386 offset := 0 | |
387 if len(q.suffixFormat) > 1 { | |
388 chunks, _ := parseSuffix(q.ns, q.suffixFormat, d ef.end, 1) | |
389 offset = len(chunks[0]) | |
390 } | |
391 if !bytes.HasPrefix(def.end[offset:], chopped) { | |
392 impossible(fmt.Errorf( | |
393 "end suffix for implied ancestor doesn't start with ancestor! end:%v ancestor:%v", | |
394 def.end, chopped)) | |
395 } | |
396 def.end = def.end[:offset+len(chopped)] | |
397 } | |
398 } | |
399 | |
400 return def | |
401 } | |
402 | |
403 type constraints struct { | |
404 constraints map[string][][]byte | |
405 original map[string][][]byte | |
406 residualMapping map[string]int | |
407 } | |
408 | |
409 // peel picks a constraint value for the property. It then removes this value | |
410 // from constraints (possibly removing the entire row from constraints if it | |
411 // was the last value). If the value wasn't available in constraints, it picks | |
412 // the value from residuals. | |
413 func (c *constraints) peel(prop string) []byte { | |
414 ret := []byte(nil) | |
415 if vals, ok := c.constraints[prop]; ok { | |
416 ret = vals[0] | |
417 if len(vals) == 1 { | |
418 delete(c.constraints, prop) | |
419 } else { | |
420 c.constraints[prop] = vals[1:] | |
421 } | |
422 } else { | |
423 row := c.original[prop] | |
424 idx := c.residualMapping[prop] | |
425 c.residualMapping[prop]++ | |
426 ret = row[idx%len(row)] | |
427 } | |
428 return ret | |
429 } | |
430 | |
431 func (c *constraints) empty() bool { | |
432 return len(c.constraints) == 0 | |
433 } | |
434 | |
435 // calculateConstraints produces a mapping of all equality filters to the values | |
436 // that they're constrained to. It also calculates residuals, which are an | |
437 // arbitrary value for filling index prefixes which have more equality fields | |
438 // than are necessary. The value doesn't matter, as long as its an equality | |
439 // constraint in the original query. | |
440 func calculateConstraints(q *reducedQuery) *constraints { | |
441 ret := &constraints{ | |
442 original: make(map[string][][]byte, len(q.eqFilters)), | |
443 constraints: make(map[string][][]byte, len(q.eqFilters)), | |
444 residualMapping: make(map[string]int), | |
445 } | |
446 for prop, vals := range q.eqFilters { | |
447 bvals := make([][]byte, 0, len(vals)) | |
448 for val := range vals { | |
449 bvals = append(bvals, []byte(val)) | |
450 } | |
451 ret.original[prop] = bvals | |
452 if prop == "__ancestor__" { | |
453 // exclude __ancestor__ from the constraints. | |
454 // | |
455 // This is because it's handled specially during index p roposal and | |
456 // generation. Ancestor is used by ALL indexes, and so i ts residual value | |
457 // in ret.original above will be sufficient. | |
458 continue | |
459 } | |
460 ret.constraints[prop] = bvals | |
461 } | |
462 return ret | |
463 } | |
464 | |
465 // getIndexes returns a set of iterator definitions. Iterating over these | |
466 // will result in matching suffixes. | |
467 func getIndexes(q *reducedQuery, s *memStore) ([]*iterDefinition, error) { | |
468 relevantIdxs := IndexDefinitionSortableSlice(nil) | |
469 if q.kind == "" { | |
470 if coll := s.GetCollection("ents:" + q.ns); coll != nil { | |
471 relevantIdxs = IndexDefinitionSortableSlice{{coll: coll} } | |
472 } | |
473 } else { | |
474 err := error(nil) | |
475 relevantIdxs, err = getRelevantIndicies(q, s) | |
476 if err != nil { | |
477 return nil, err | |
478 } | |
479 } | |
480 if len(relevantIdxs) == 0 { | |
481 return nil, errQueryDone | |
482 } | |
483 | |
484 // This sorts it so that relevantIdxs goes less filters -> more filters. We | |
485 // traverse this list backwards, however, so we traverse it in more filt ers -> | |
486 // less filters order. | |
487 sort.Sort(relevantIdxs) | |
488 | |
489 constraints := calculateConstraints(q) | |
490 | |
491 ret := []*iterDefinition{} | |
492 for !constraints.empty() || len(ret) == 0 { | |
493 bestIdx := (*IndexDefinitionSortable)(nil) | |
494 if len(ret) == 0 { | |
495 // if ret is empty, take the biggest relevantIdx. It's g uaranteed to have | |
496 // the greatest number of equality filters of any index in the list, and | |
497 // we know that every equality filter will be pulled fro m constraints and | |
498 // not residual. | |
499 // | |
500 // This also takes care of the case when the query has n o equality filters, | |
501 // in which case relevantIdxs will actually only contain one index anyway | |
502 // :) | |
503 bestIdx = &relevantIdxs[len(relevantIdxs)-1] | |
504 if bestIdx.coll == nil { | |
505 return nil, errQueryDone | |
506 } | |
507 } else { | |
508 // If ret's not empty, then we need to find the best ind ex we can. The | |
509 // best index will be the one with the most matching equ ality columns. | |
510 // Since relevantIdxs is sorted primarially by the numbe r of equality | |
511 // columns, we walk down the list until the number of po ssible columns is | |
512 // worse than our best-so-far. | |
513 // | |
514 // Traversing the list backwards goes from more filters -> less filters, | |
515 // but also allows us to remove items from the list as w e iterate over it. | |
516 bestNumEqHits := 0 | |
517 for i := len(relevantIdxs) - 1; i >= 0; i-- { | |
518 idx := &relevantIdxs[i] | |
519 if len(idx.eqFilts) < bestNumEqHits { | |
520 // if the number of filters drops below our best hit, it's never going | |
521 // to get better than that. This index m ight be helpful on a later | |
522 // loop though, so don't remove it. | |
523 break | |
524 } | |
525 numHits := 0 | |
526 if idx.coll != nil { | |
527 numHits = idx.numEqHits(constraints) | |
528 } | |
529 if numHits > bestNumEqHits { | |
530 bestNumEqHits = numHits | |
531 bestIdx = idx | |
532 } else if numHits == 0 { | |
533 // This index will never become useful a gain, so remove it. | |
534 relevantIdxs = append(relevantIdxs[:i], relevantIdxs[i+1:]...) | |
535 } | |
536 } | |
537 } | |
538 if bestIdx == nil { | |
539 // something is really wrong here... if relevantIdxs is !nil, then we | |
540 // should always be able to make progress in this loop. | |
541 impossible(fmt.Errorf("deadlock: cannot fulfil query?")) | |
542 } | |
543 ret = append(ret, generate(q, bestIdx, constraints)) | |
544 } | |
545 | |
546 return ret, nil | |
547 } | |
OLD | NEW |