Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(441)

Side by Side Diff: impl/memory/datastore_index_selection.go

Issue 1302813003: impl/memory: Implement Queries (Closed) Base URL: https://github.com/luci/gae.git@add_multi_iterator
Patch Set: minor fixes Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 package memory
6
7 import (
8 "bytes"
9 "fmt"
10 "sort"
11 "strings"
12
13 ds "github.com/luci/gae/service/datastore"
14 "github.com/luci/gae/service/datastore/serialize"
15 )
16
17 // reducedQuery contains only the pieces of the query necessary to iterate for
18 // results.
19 // deduplication is applied externally
20 // projection / keysonly / entity retrieval is done externally
21 type reducedQuery struct {
22 ns string
23 kind string
24
25 // eqFilters indicate the set of all prefix constraints which need to be
26 // fulfilled in the composite query. All of these will translate into pr efix
27 // bytes for SOME index.
28 eqFilters map[string]map[string]struct{}
dnj (Google) 2015/08/28 17:54:22 nit: Consider defining a type for "map[string]stru
iannucci 2015/08/28 19:48:55 done, though it doesn't really do too much.
29
30 // suffixFormat is the PRECISE listing of the suffix columns that ALL in dexes
31 // in the multi query will have.
32 //
33 // suffixFormat ALWAYS includes the inequality filter (if any) as the 0t h
34 // element
35 // suffixFormat ALWAYS includes any additional projections (in ascending
36 // order) after all user defined sort orders
37 // suffixFormat ALWAYS has __key__ as the last column
38 suffixFormat []ds.IndexColumn
39
40 // limits of the inequality and/or full sort order. This is ONLY a suffi x,
41 // and it will be appended to the prefix during iteration.
42 start []byte
43 end []byte
44
45 // metadata describing the total number of columns that this query requi res to
46 // execute perfectly.
47 numCols int
48 }
49
50 type IndexDefinitionSortable struct {
51 // eqFilts is the list of ACTUAL prefix columns. Note that it may contai n
52 // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becu ase
53 // (tag=1, tag=2) is a perfectly valid query).
54 eqFilts []ds.IndexColumn
55 coll *memCollection
56 }
57
58 func (i *IndexDefinitionSortable) hasAncestor() bool {
59 return len(i.eqFilts) > 0 && i.eqFilts[0].Property == "__ancestor__"
60 }
61
62 func (i *IndexDefinitionSortable) numEqHits(c *constraints) int {
63 ret := 0
64 for _, filt := range i.eqFilts {
65 if _, ok := c.constraints[filt.Property]; ok {
66 ret++
67 }
68 }
69 return ret
70 }
71
72 type IndexDefinitionSortableSlice []IndexDefinitionSortable
73
74 func (s IndexDefinitionSortableSlice) Len() int { return len(s) }
75 func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
76 func (s IndexDefinitionSortableSlice) Less(i, j int) bool {
77 a, b := s[i], s[j]
78 if a.coll == nil && b.coll != nil {
79 return true
80 } else if a.coll != nil && b.coll == nil {
81 return false
82 }
83
84 cmp := len(a.eqFilts) - len(b.eqFilts)
85 if cmp < 0 {
86 return true
87 } else if cmp > 0 {
88 return false
89 }
90 for k, col := range a.eqFilts {
91 ocol := b.eqFilts[k]
92 if col.Direction == ds.ASCENDING && ocol.Direction == ds.DESCEND ING {
93 return true
94 } else if col.Direction == ds.DESCENDING && ocol.Direction == ds .DESCENDING {
dnj (Google) 2015/08/28 17:54:21 Why "false" if we're both descending? Did you mean
iannucci 2015/08/28 19:48:55 oops. Done.
95 return false
96 }
97 if col.Property < ocol.Property {
98 return true
99 } else if col.Property > ocol.Property {
100 return false
101 }
102 }
103 return false
104 }
105
106 // maybeAddDefinition possibly adds a new IndexDefinitionSortable to this slice.
107 // It's only added if it could be useful in servicing q, otherwise this function
108 // is a noop.
109 //
110 // This returns true iff the proposed index is OK and depletes missingTerms to
111 // empty.
112 //
113 // If the proposed index is PERFECT (e.g. contains enough columns to cover all
114 // equality filters, and also has the correct suffix), idxs will be replaced
115 // with JUST that index, and this will return true.
116 func (idxs *IndexDefinitionSortableSlice) maybeAddDefinition(q *reducedQuery, s *memStore, missingTerms map[string]struct{}, id *ds.IndexDefinition) bool {
117 // Kindless queries are handled elsewhere.
118 if id.Kind != q.kind {
119 impossible(
120 fmt.Errorf("maybeAddDefinition given index with wrong ki nd %q v %q", id.Kind, q.kind))
121 }
122
123 // If we're an ancestor query, and the index is compound, but doesn't in clude
124 // an Ancestor field, it doesn't work. Builtin indicies can be used for
125 // ancestor queries (and have !Ancestor), assuming that it's only equali ty
126 // filters (plus inequality on __key__), or a single inequality.
127 if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() {
128 impossible(
129 fmt.Errorf("maybeAddDefinition given compound index with wrong ancestor info: %s %#v", id, q))
130 }
131
132 // add __ancestor__ if necessary
133 sortBy := id.GetFullSortOrder()
134
135 // If the index has fewer fields than we need for the suffix, it can't
136 // possibly help.
137 if len(sortBy) < len(q.suffixFormat) {
138 return false
139 }
140
141 numEqFilts := len(sortBy) - len(q.suffixFormat)
142 // make sure the orders are precisely the same
143 for i, sb := range sortBy[numEqFilts:] {
144 if q.suffixFormat[i] != sb {
145 return false
146 }
147 }
148
149 if id.Builtin() && numEqFilts == 0 {
150 requireSomeEqFilter := len(q.eqFilters) > 1
dnj (Google) 2015/08/28 17:54:21 if len(q.eqFilters) > 1 || (len(q.eqFilters) == 1
iannucci 2015/08/28 19:48:56 done
151 if !requireSomeEqFilter && len(q.eqFilters) == 1 && q.eqFilters[ "__ancestor__"] == nil {
152 requireSomeEqFilter = true
153 }
154 if requireSomeEqFilter {
155 return false
156 }
157 }
158
159 // Make sure the equalities section doesn't contain any properties we do n't
160 // want in our query.
161 //
162 // numByProp && totalEqFilts will be used to see if this is a perfect ma tch
163 // later.
164 numByProp := make(map[string]int, len(q.eqFilters))
165 totalEqFilts := 0
166
167 eqFilts := sortBy[:numEqFilts]
168 for _, p := range eqFilts {
169 if _, ok := q.eqFilters[p.Property]; !ok {
170 return false
171 }
172 numByProp[p.Property]++
173 totalEqFilts++
174 }
175
176 // See if we actually have this index for the correct namespace
177 coll := s.GetCollection(
dnj (Google) 2015/08/28 17:54:21 Need to test if "coll" is nil?
iannucci 2015/08/28 19:48:55 nope. Clarified in comment.
178 fmt.Sprintf("idx:%s:%s", q.ns, serialize.ToBytes(*id.PrepForIdxT able())))
179
180 // ok, we can actually use this
181
182 // First, see if it's a perfect match. If it is, then our search is over .
183 //
184 // A perfect match contains ALL the equality filter columns (or more, si nce
185 // we can use residuals to fill in the extras).
186 toAdd := IndexDefinitionSortable{coll: coll}
187 toAdd.eqFilts = eqFilts
188 for _, sb := range toAdd.eqFilts {
189 delete(missingTerms, sb.Property)
190 }
191
192 perfect := false
193 if len(sortBy) == q.numCols {
194 perfect = true
195 for k, num := range numByProp {
196 if num < len(q.eqFilters[k]) {
197 perfect = false
198 break
199 }
200 }
201 }
202 if perfect {
203 *idxs = IndexDefinitionSortableSlice{toAdd}
204 } else {
205 *idxs = append(*idxs, toAdd)
206 }
207 return len(missingTerms) == 0
208 }
209
210 // getRelevantIndicies retrieves the relevant indices which could be used to
dnj (Google) 2015/08/28 17:54:22 Indexes?
iannucci 2015/08/28 19:48:56 done
211 // service q. It returns nil if it's not possible to service q with the current
212 // indicies.
213 func getRelevantIndicies(q *reducedQuery, s *memStore) (IndexDefinitionSortableS lice, error) {
214 missingTerms := map[string]struct{}{}
215 for k := range q.eqFilters {
216 if k == "__ancestor__" {
217 // ancestor is not a prefix which can be satisfied by a single index. It
218 // must be satisfied by ALL indices (and has special log ic for this in
219 // the addDefinition logic)
220 continue
221 }
222 missingTerms[k] = struct{}{}
223 }
224 idxs := IndexDefinitionSortableSlice{}
225
226 // First we add builtins
227 // add
228 // idx:KIND
229 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{
230 Kind: q.kind,
231 }) {
232 return idxs, nil
233 }
234
235 // add
236 // idx:KIND:prop
237 // idx:KIND:-prop
238 props := map[string]struct{}{}
239 for prop := range q.eqFilters {
240 props[prop] = struct{}{}
241 }
242 for _, col := range q.suffixFormat[:len(q.suffixFormat)-1] {
243 props[col.Property] = struct{}{}
244 }
245 for prop := range props {
246 if strings.HasPrefix(prop, "__") && strings.HasSuffix(prop, "__" ) {
247 continue
248 }
249 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefiniti on{
250 Kind: q.kind,
251 SortBy: []ds.IndexColumn{
252 {Property: prop},
253 },
254 }) {
255 return idxs, nil
256 }
257 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefiniti on{
258 Kind: q.kind,
259 SortBy: []ds.IndexColumn{
260 {Property: prop, Direction: ds.DESCENDING},
261 },
262 }) {
263 return idxs, nil
264 }
265 }
266
267 // Try adding all compound indicies whose suffix matches.
268 suffix := &ds.IndexDefinition{
269 Kind: q.kind,
270 Ancestor: q.eqFilters["__ancestor__"] != nil,
271 SortBy: q.suffixFormat,
272 }
273 walkCompIdxs(s, suffix, func(def *ds.IndexDefinition) bool {
274 // keep walking until we find a perfect index.
275 return !idxs.maybeAddDefinition(q, s, missingTerms, def)
276 })
277
278 // this query is impossible to fulfil with the current indicies. Not all the
279 // terms (equality + projection) are satisfied.
280 if len(missingTerms) < 0 || len(idxs) == 0 {
281 remains := &ds.IndexDefinition{
282 Kind: q.kind,
283 Ancestor: q.eqFilters["__ancestor__"] != nil,
284 }
285 terms := make([]string, 0, len(missingTerms))
286 for mt := range missingTerms {
287 terms = append(terms, mt)
288 }
289 if serializationDeterministic {
290 sort.Strings(terms)
291 }
292 for _, term := range terms {
293 remains.SortBy = append(remains.SortBy, ds.IndexColumn{P roperty: term})
294 }
295 remains.SortBy = append(remains.SortBy, q.suffixFormat...)
296 last := remains.SortBy[len(remains.SortBy)-1]
297 if last.Direction == ds.ASCENDING {
298 // this is implied
299 remains.SortBy = remains.SortBy[:len(remains.SortBy)-1]
dnj (Google) 2015/08/28 17:54:21 Can just assign to "last".
iannucci 2015/08/28 19:48:55 That would have no effect? Did you mean something
dnj 2015/08/28 19:57:53 Oh nope, just overlooked the colon. Nevermind.
300 }
301 if remains.Builtin() {
302 impossible(
303 fmt.Errorf("recommended missing index would be a builtin: %s", remains))
304 }
305 return nil, fmt.Errorf(
306 "Your indicies are insufficient! Try adding:\n %s", rem ains)
dnj (Google) 2015/08/28 17:54:22 indexes! Also not sure this is the best way to ret
iannucci 2015/08/28 19:48:55 yes, I was planning on changing that in a later CL
307 }
308
309 return idxs, nil
310 }
311
312 // generate generates a single iterDefinition for the given index.
313 func generate(q *reducedQuery, idx *IndexDefinitionSortable, c *constraints) *it erDefinition {
314 def := &iterDefinition{
315 c: idx.coll,
316 start: q.start,
317 end: q.end,
318 }
319 toJoin := [][]byte{}
dnj (Google) 2015/08/28 17:54:22 Can pre-allocate capacity to len(idx.eqFilts)
iannucci 2015/08/28 19:48:56 Done
320 for _, sb := range idx.eqFilts {
321 val := c.peel(sb.Property)
322 if sb.Direction == ds.DESCENDING {
323 val = invert(val)
324 }
325 toJoin = append(toJoin, val)
326 }
327 def.prefix = bjoin(toJoin...)
328 def.prefixLen = len(def.prefix)
329
330 if q.eqFilters["__ancestor__"] != nil && !idx.hasAncestor() {
331 // The query requires an ancestor, but the index doesn't explici tly have it
332 // as part of the prefix (otherwise it would have been the first eqFilt
333 // above). This happens when it's a builtin index, or if it's th e primary
334 // index (for a kindless query), or if it's the Kind index (for a filterless
335 // query).
336 //
337 // builtin indexes are:
338 // Kind/__key__
339 // Kind/Prop/__key__
340 // Kind/Prop/-__key__
341 if len(q.suffixFormat) > 2 || q.suffixFormat[len(q.suffixFormat) -1].Property != "__key__" {
342 // This should never happen. One of the previous validat ors would have
343 // selected a different index. But just in case.
344 impossible(fmt.Errorf("cannot supply an implicit ancesto r for %#v", idx))
345 }
346
347 // chop the terminal null byte off the q.ancestor key... we can accept
dnj (Google) 2015/08/28 17:54:22 You don't do this here.
iannucci 2015/08/28 19:48:55 oops. moved comment.
348 // anything which is a descendant or an exact match.
349
350 // This silly construction gets the __ancestor__ value, because it's a
351 // map[string]struct{} instead of a [][]byte{} (otherwise we'd j ust get
352 // the value at the 0th index).
353 anc := ""
354 for k := range q.eqFilters["__ancestor__"] {
355 anc = k
356 break
357 }
358
359 // Intentionally do NOT update prefixLen. This allows multiItera tor to
360 // correctly include the entire key in the shared iterator suffi x, instead
361 // of just the remainder.
362
363 // Removing the last byte from the key (the terminating null) al lows this
364 // trick to work. Otherwise it would be a closed range of EXACTL Y this key.
365 chopped := []byte(anc[:len(anc)-1])
366 if q.suffixFormat[0].Direction == ds.DESCENDING {
367 chopped = invert(chopped)
368 }
369 def.prefix = bjoin(def.prefix, chopped)
370
371 // Update start and end, since we know that if they contain anyt hing, they
372 // contain values for the __key__ field.
373 if def.start != nil {
374 offset := 0
375 if len(q.suffixFormat) > 1 {
376 chunks, _ := parseSuffix(q.ns, q.suffixFormat, d ef.start, 1)
377 offset = len(chunks[0])
378 }
379 if !bytes.HasPrefix(def.start[offset:], chopped) {
380 // again, shouldn't happen, but if it does, we w ant to know about it.
381 impossible(fmt.Errorf(
382 "start suffix for implied ancestor doesn 't start with ancestor! start:%v ancestor:%v",
383 def.start, chopped))
384 }
385 def.start = def.start[:offset+len(chopped)]
386 }
387 if def.end != nil {
388 offset := 0
389 if len(q.suffixFormat) > 1 {
390 chunks, _ := parseSuffix(q.ns, q.suffixFormat, d ef.end, 1)
391 offset = len(chunks[0])
392 }
393 if !bytes.HasPrefix(def.end[offset:], chopped) {
394 impossible(fmt.Errorf(
395 "end suffix for implied ancestor doesn't start with ancestor! start:%v ancestor:%v",
dnj (Google) 2015/08/28 17:54:22 end:
iannucci 2015/08/28 19:48:56 done
396 def.end, chopped))
397 }
398 def.end = def.end[:offset+len(chopped)]
399 }
400 }
401
402 return def
403 }
404
405 type constraints struct {
406 constraints map[string][][]byte
407 original map[string][][]byte
408 residualMapping map[string]int
409 }
410
411 // peel picks a constraint value for the property. It then removes this value
412 // from constraints (possibly removing the entire row from constraints if it
413 // was the last value). If the value wasn't available in constraints, it picks
414 // the value from residuals.
415 func (c *constraints) peel(prop string) []byte {
416 ret := []byte(nil)
417 if vals, ok := c.constraints[prop]; ok {
418 ret = vals[0]
419 if len(vals) == 1 {
420 delete(c.constraints, prop)
421 } else {
422 c.constraints[prop] = vals[1:]
423 }
424 } else {
425 row := c.original[prop]
426 idx := c.residualMapping[prop]
427 c.residualMapping[prop]++
428 ret = row[idx%len(row)]
dnj (Google) 2015/08/28 17:54:21 Please document why this is done (pseudorandom pad
iannucci 2015/08/28 19:48:56 It's not random. A PRNG would be slower and serve
429 }
430 return ret
431 }
432
433 func (c *constraints) empty() bool {
434 return len(c.constraints) == 0
435 }
436
437 // calculateConstraints produces a mapping of all equality filters to the values
438 // that they're constrained to. It also calculates residuals, which are an
439 // arbitrary value for filling index prefixes which have more equality fields
440 // than are necessary. The value doesn't matter, as long as its an equality
441 // constraint in the original query.
442 func calculateConstraints(q *reducedQuery) *constraints {
443 ret := &constraints{
444 original: make(map[string][][]byte, len(q.eqFilters)),
445 constraints: make(map[string][][]byte, len(q.eqFilters)),
446 residualMapping: make(map[string]int),
447 }
448 for prop, vals := range q.eqFilters {
449 bvals := make([][]byte, 0, len(vals))
450 for val := range vals {
451 bvals = append(bvals, []byte(val))
452 }
453 ret.original[prop] = bvals
454 if prop == "__ancestor__" {
455 // exclude __ancestor__ from the constraints.
456 //
457 // This is because it's handled specially during index p roposal and
458 // generation. Ancestor is used by ALL indices, and so i ts residual value
459 // in ret.original above will be sufficient.
460 continue
461 }
462 ret.constraints[prop] = bvals
463 }
464 return ret
465 }
466
467 // getIndicies returns a set of iterator definitions. Iterating over these
468 // will result in matching suffixes.
469 func getIndicies(q *reducedQuery, s *memStore) ([]*iterDefinition, error) {
dnj (Google) 2015/08/28 17:54:22 Indexes
iannucci 2015/08/28 19:48:55 You know that both spellings are correct, right?
dnj 2015/08/28 19:57:53 Yes, but you've been generally consistent using "i
470 relevantIdxs := IndexDefinitionSortableSlice(nil)
471 if q.kind == "" {
472 if coll := s.GetCollection("ents:" + q.ns); coll != nil {
473 relevantIdxs = IndexDefinitionSortableSlice{{coll: coll} }
474 }
475 } else {
476 err := error(nil)
477 relevantIdxs, err = getRelevantIndicies(q, s)
478 if err != nil {
479 return nil, err
480 }
481 }
482 if len(relevantIdxs) == 0 {
483 return nil, errQueryDone
484 }
485
486 // This sorts it so that relevantIdxs goes less filters -> more filters. We
487 // traverse this list backwards, however, so we traverse it in more filt ers ->
488 // less filters order.
489 sort.Sort(relevantIdxs)
490
491 // TODO(riannucci): When filling from residual, choose pseudorandom elem ents
dnj (Google) 2015/08/28 17:54:22 You do this with the aforementioned modulus, so no
iannucci 2015/08/28 19:48:56 yes
492 // from the ori ginal constraints. This will allow the use
493 // of large ind exes with good hitrates.
494 constraints := calculateConstraints(q)
495
496 ret := []*iterDefinition{}
497 for !constraints.empty() || len(ret) == 0 {
498 bestIdx := (*IndexDefinitionSortable)(nil)
499 if len(ret) == 0 {
500 // if ret is empty, take the biggest relevantIdx. It's g uaranteed to have
501 // the greatest number of equality filters of any index in the list, and
502 // we know that every equality filter will be pulled fro m constraints and
503 // not residual.
504 //
505 // This also takes care of the case when the query has n o equality filters,
506 // in which case relevantIdxs will actually only contain one index anyway
507 // :)
508 bestIdx = &relevantIdxs[len(relevantIdxs)-1]
509 if bestIdx.coll == nil {
510 return nil, errQueryDone
511 }
512 } else {
513 // If ret's not empty, then we need to find the best ind ex we can. The
514 // best index will be the one with the most matching equ ality columns.
515 // Since relevantIdxs is sorted primarially by the numbe r of equality
516 // columns, we walk down the list until the number of po ssible columns is
517 // worse than our best-so-far.
518 //
519 // Traversing the list backwards goes from more filters -> less filters,
520 // but also allows us to remove items from the list as w e iterate over it.
521 bestNumEqHits := 0
522 for i := len(relevantIdxs) - 1; i >= 0; i-- {
523 idx := &relevantIdxs[i]
524 if len(idx.eqFilts) < bestNumEqHits {
525 // if the number of filters drops below our best hit, it's never going
526 // to get better than that. This index m ight be helpful on a later
527 // loop though, so don't remove it.
528 break
529 }
530 numHits := 0
531 if idx.coll != nil {
532 numHits = idx.numEqHits(constraints)
533 }
534 if numHits > bestNumEqHits {
535 bestNumEqHits = numHits
536 bestIdx = idx
537 } else if numHits == 0 {
538 // This index will never become useful a gain, so remove it.
539 relevantIdxs = append(relevantIdxs[:i], relevantIdxs[i+1:]...)
540 }
541 }
542 }
543 if bestIdx == nil {
544 // something is really wrong here... if relevantIdxs is !nil, then we
545 // should always be able to make progress in this loop.
546 impossible(fmt.Errorf("deadlock: cannot fulfil query?"))
547 }
548 ret = append(ret, generate(q, bestIdx, constraints))
549 }
550
551 return ret, nil
552 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698