Index: impl/memory/datastore_index_selection.go |
diff --git a/impl/memory/datastore_index_selection.go b/impl/memory/datastore_index_selection.go |
new file mode 100644 |
index 0000000000000000000000000000000000000000..03db477be7cbaeae478f0ecf3dd559622949640e |
--- /dev/null |
+++ b/impl/memory/datastore_index_selection.go |
@@ -0,0 +1,301 @@ |
+// Copyright 2015 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+package memory |
+ |
+import ( |
+ "bytes" |
+ "sort" |
+ |
+ ds "github.com/luci/gae/service/datastore" |
+ "github.com/luci/gae/service/datastore/serialize" |
+ "github.com/luci/gkvlite" |
+) |
+ |
+// reducedQuery contains only the pertinent details from a properly checked |
+// query. |
+type reducedQuery struct { |
iannucci
2015/08/20 10:32:13
this structure is the key to this CL; all the code
|
+ ns string |
+ kind string |
+ ancestor []byte |
+ |
+ eqFilters map[string]map[string]struct{} |
+ // suffixFormat ALWAYS includes the inequality filter as the 0th element |
+ // suffixFormat ALWAYS includes any projections (in ascending suffixFormat) after all |
+ // user defined sort orders |
+ suffixFormat []ds.IndexColumn |
+ |
+ low []byte |
+ high []byte |
+} |
+ |
+func (q *reducedQuery) needAncestor() bool { |
+ return q.ancestor != nil |
+} |
+ |
+type IndexDefinitionSortable struct { |
+ numRows uint64 |
+ eqFilts map[string]struct{} |
+ prefixIdx int |
+ |
+ def *ds.IndexDefinition |
+ coll *memCollection |
+} |
+ |
+type IndexDefinitionSortableSlice []IndexDefinitionSortable |
+ |
+func (s IndexDefinitionSortableSlice) Len() int { return len(s) } |
+func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } |
+func (s IndexDefinitionSortableSlice) Less(i, j int) bool { |
+ // note that |
+ return s[i].numRows >= s[j].numRows && s[i].def.Less(s[j].def) |
+} |
+ |
+// addDefinition adds a new IndexDefinitionSortable to this slice. It's only |
+// added if it could be useful in servicing q, otherwise this function is |
+// a noop. |
+func (idxs *IndexDefinitionSortableSlice) addDefinition(q *reducedQuery, s *memStore, missingTerms map[string]struct{}, id *ds.IndexDefinition) { |
+ // Kindless queries are handled elsewhere. |
+ if id.Kind != q.kind { |
+ return |
+ } |
+ |
+ // If we're an ancestor query, and the index is compound, but doesn't include |
+ // an Ancestor field, it doesn't work. Builtin indicies can be used for |
+ // ancestor queries (and have !Ancestor), assuming that it's only equality |
+ // filters (plus inequality on __key__), or a single inequality. |
+ if q.needAncestor() && id.Compound() && !id.Ancestor { |
+ return |
+ } |
+ |
+ // If the index has fewer fields than we need for the suffix, it can't |
+ // possibly help. |
+ if (len(id.SortBy) + 1) < len(q.suffixFormat) { |
+ return |
+ } |
+ |
+ // See if we actually have this index for the correct namespace |
+ coll := s.GetCollection("idx:" + q.ns + ":" + string(serialize.ToBytes(id))) |
+ if coll == nil { |
+ return |
+ } |
+ |
+ // append the implied __key__ sort suffixFormat. All indicies have this, but it's |
+ // not explicitly represented in IndexDefinition. |
+ sortBy := make([]ds.IndexColumn, len(id.SortBy), len(id.SortBy)+1) |
+ copy(sortBy, id.SortBy) |
+ sortBy = append(sortBy, ds.IndexColumn{Property: "__key__"}) |
+ |
+ suffixIdx := len(sortBy) - len(q.suffixFormat) |
+ // make sure the orders are precisely the same |
+ for i, sb := range sortBy[suffixIdx : suffixIdx+len(q.suffixFormat)] { |
+ if q.suffixFormat[i] != sb { |
+ return |
+ } |
+ } |
+ |
+ // Make sure the equalities section doesn't contain any properties we don't |
+ // want in our query. |
+ for _, p := range sortBy[:suffixIdx] { |
+ if _, ok := q.eqFilters[p.Property]; !ok { |
+ return |
+ } |
+ } |
+ |
+ // ok, we can actually use this |
+ eqFilts := make(map[string]struct{}, suffixIdx) |
+ for _, sb := range id.SortBy[:suffixIdx] { |
+ eqFilts[sb.Property] = struct{}{} |
+ delete(missingTerms, sb.Property) |
+ } |
+ numRows, _ := coll.GetTotals() |
+ *idxs = append(*idxs, IndexDefinitionSortable{numRows, eqFilts, suffixIdx, id, coll}) |
+} |
+ |
+// getRelevantIndicies retrieves the relevant indices which could be used to |
+// service q. It returns nil if it's not possible to service q with the current |
+// indicies. |
+func getRelevantIndicies(q *reducedQuery, s *memStore) IndexDefinitionSortableSlice { |
+ idxCol := s.GetCollection("idx") |
+ if idxCol == nil { |
+ return nil |
+ } |
+ |
+ missingTerms := map[string]struct{}{} |
+ for k := range q.eqFilters { |
+ missingTerms[k] = struct{}{} |
+ } |
+ idxs := IndexDefinitionSortableSlice{} |
+ |
+ // add builtins |
+ for k := range q.eqFilters { |
+ idxs.addDefinition(q, s, missingTerms, &ds.IndexDefinition{ |
+ Kind: q.kind, |
+ Ancestor: false, |
+ SortBy: []ds.IndexColumn{ |
+ {Property: k}, |
+ }, |
+ }) |
+ } |
+ |
+ // Try adding all compound indicies |
+ idxCol.VisitItemsAscend(ds.IndexComplexQueryPrefix(), false, func(i *gkvlite.Item) bool { |
+ id, err := serialize.ReadIndexDefinition(bytes.NewBuffer(i.Key)) |
+ if err != nil { |
+ panic(err) // memory corruption |
+ } |
+ idxs.addDefinition(q, s, missingTerms, &id) |
+ return true |
+ }) |
+ |
+ // this query is impossible to fulfil with the current indicies. Not all the |
+ // terms (equality + projection) are satisfied. |
+ if len(missingTerms) < 0 { |
+ // TODO(riannucci): return error when index requires missing composite |
+ // indicies. We can even calculate the maximum missing index which would |
+ // satisfy the remainder of the query! |
+ return nil |
+ } |
+ |
+ sort.Sort(idxs) |
+ |
+ return idxs |
+} |
+ |
+// invert simply inverts all the bytes in bs. |
+func invert(bs []byte) []byte { |
+ if bs == nil { |
+ return nil |
+ } |
+ ret := make([]byte, len(bs)) |
+ for i, b := range bs { |
+ ret[i] = 0xFF ^ b |
+ } |
+ return ret |
+} |
+ |
+// peel picks a constraint value for the property. It then removes this value |
+// from constraints (possibly removing the entire row from constraints if it |
+// was the last value). If the value wasn't available in constraints, it picks |
+// the value from residuals. |
+func peel(prop string, constraints map[string][][]byte, residuals map[string][]byte) []byte { |
+ ret := []byte(nil) |
+ if vals, ok := constraints[prop]; ok { |
+ ret = vals[0] |
+ if len(vals) == 1 { |
+ delete(constraints, prop) |
+ } else { |
+ constraints[prop] = vals[1:] |
+ } |
+ } else { |
+ ret = residuals[prop] |
+ } |
+ return ret |
+} |
+ |
+// generate generates a single iterDefinition for the given index. |
+func generate(q *reducedQuery, idx IndexDefinitionSortable, constraints map[string][][]byte, residuals map[string][]byte) *iterDefinition { |
+ def := &iterDefinition{ |
+ c: idx.coll, |
+ start: q.low, |
+ end: q.high, |
+ } |
+ toJoin := [][]byte{} |
+ if q.ancestor != nil && idx.def.Compound() { |
+ toJoin = append(toJoin, q.ancestor) |
+ } |
+ for _, sb := range idx.def.SortBy { |
+ val := peel(sb.Property, constraints, residuals) |
+ if sb.Direction == ds.DESCENDING { |
+ val = invert(val) |
+ } |
+ toJoin = append(toJoin, val) |
+ } |
+ def.prefix = bjoin(toJoin...) |
+ if q.ancestor != nil && idx.def.Builtin() { |
+ // chop the terminal null byte off the q.ancestor key... we can accept |
+ // anything which is a descendant or an exact match |
+ chopped := q.ancestor[:len(q.ancestor)-1] |
+ def.start = append(def.start, chopped...) |
+ |
+ // setting the endpoint as 1 more than the chopped version ensures that |
+ // we don't include anything that's a sibling (or parent) of q.ancestor. |
+ def.end = append(def.end, increment(chopped)...) |
+ } |
+ return def |
+} |
+ |
+func getKindlessIndex(q *reducedQuery, s *memStore) []*iterDefinition { |
+ // kindless query filters on `ents:ns` |
+ for _, sb := range q.suffixFormat { |
+ if sb.Property != "__key__" { |
+ panic("impossible: inequality filter on kindless query is not __key__") |
+ } |
+ } |
+ return []*iterDefinition{{ |
+ s.GetCollection("ents:" + q.ns), |
+ nil, |
+ q.low, |
+ q.high, |
+ }} |
+} |
+ |
+// calculateConstraints produces a mapping of all equality filters to the values |
+// that they're constrained to. It also calculates residuals, which are an |
+// arbitrary value for filling index prefixes which have more equality fields |
+// than are necessary. The value doesn't matter, as long as its an equality |
+// constraint in the original query. |
+func calculateConstraints(q *reducedQuery) (constraints map[string][][]byte, residuals map[string][]byte) { |
+ residuals = make(map[string][]byte, len(q.eqFilters)) |
+ constraints = make(map[string][][]byte, len(q.eqFilters)) |
+ for prop, vals := range q.eqFilters { |
+ bvals := make([][]byte, 0, len(vals)) |
+ for val := range vals { |
+ bvals = append(bvals, []byte(val)) |
+ } |
+ residuals[prop] = bvals[0] |
+ constraints[prop] = bvals |
+ } |
+ return |
+} |
+ |
+// getIndicies returns a set of iterator definitions. Iterating over these |
+// will result in matching suffixes. |
+func getIndicies(q *reducedQuery, s *memStore) []*iterDefinition { |
+ if q.kind == "" { |
+ return getKindlessIndex(q, s) |
+ } |
+ |
+ relevantIdxs := getRelevantIndicies(q, s) |
+ if relevantIdxs == nil { |
+ return nil |
+ } |
+ |
+ constraints, residuals := calculateConstraints(q) |
+ |
+ ret := []*iterDefinition{} |
+ for len(constraints) > 0 { |
+ lenBefore := len(ret) |
+ for _, idx := range relevantIdxs { |
+ // see if this index is helpful for the remaining constraints at all |
+ good := false |
+ for k := range idx.eqFilts { |
+ if _, ok := constraints[k]; ok { |
+ good = true |
+ break |
+ } |
+ } |
+ if good { |
+ ret = append(ret, generate(q, idx, constraints, residuals)) |
+ break |
+ } |
+ } |
+ if lenBefore == len(ret) { |
+ // something is really wrong here... if relevantIdxs is !nil, then we |
+ // should always be able to make progress in this loop. |
+ panic("deadlock: cannot fulfil query?") |
+ } |
+ } |
+ return ret |
+} |