Chromium Code Reviews| Index: impl/memory/datastore_index_selection.go |
| diff --git a/impl/memory/datastore_index_selection.go b/impl/memory/datastore_index_selection.go |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..03db477be7cbaeae478f0ecf3dd559622949640e |
| --- /dev/null |
| +++ b/impl/memory/datastore_index_selection.go |
| @@ -0,0 +1,301 @@ |
| +// Copyright 2015 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +package memory |
| + |
| +import ( |
| + "bytes" |
| + "sort" |
| + |
| + ds "github.com/luci/gae/service/datastore" |
| + "github.com/luci/gae/service/datastore/serialize" |
| + "github.com/luci/gkvlite" |
| +) |
| + |
| +// reducedQuery contains only the pertinent details from a properly checked |
| +// query. |
| +type reducedQuery struct { |
|
iannucci
2015/08/20 10:32:13
this structure is the key to this CL; all the code
|
| + ns string |
| + kind string |
| + ancestor []byte |
| + |
| + eqFilters map[string]map[string]struct{} |
| + // suffixFormat ALWAYS includes the inequality filter as the 0th element |
| + // suffixFormat ALWAYS includes any projections (in ascending suffixFormat) after all |
| + // user defined sort orders |
| + suffixFormat []ds.IndexColumn |
| + |
| + low []byte |
| + high []byte |
| +} |
| + |
| +func (q *reducedQuery) needAncestor() bool { |
| + return q.ancestor != nil |
| +} |
| + |
| +type IndexDefinitionSortable struct { |
| + numRows uint64 |
| + eqFilts map[string]struct{} |
| + prefixIdx int |
| + |
| + def *ds.IndexDefinition |
| + coll *memCollection |
| +} |
| + |
| +type IndexDefinitionSortableSlice []IndexDefinitionSortable |
| + |
| +func (s IndexDefinitionSortableSlice) Len() int { return len(s) } |
| +func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } |
| +func (s IndexDefinitionSortableSlice) Less(i, j int) bool { |
| + // note that |
| + return s[i].numRows >= s[j].numRows && s[i].def.Less(s[j].def) |
| +} |
| + |
| +// addDefinition adds a new IndexDefinitionSortable to this slice. It's only |
| +// added if it could be useful in servicing q, otherwise this function is |
| +// a noop. |
| +func (idxs *IndexDefinitionSortableSlice) addDefinition(q *reducedQuery, s *memStore, missingTerms map[string]struct{}, id *ds.IndexDefinition) { |
| + // Kindless queries are handled elsewhere. |
| + if id.Kind != q.kind { |
| + return |
| + } |
| + |
| + // If we're an ancestor query, and the index is compound, but doesn't include |
| + // an Ancestor field, it doesn't work. Builtin indicies can be used for |
| + // ancestor queries (and have !Ancestor), assuming that it's only equality |
| + // filters (plus inequality on __key__), or a single inequality. |
| + if q.needAncestor() && id.Compound() && !id.Ancestor { |
| + return |
| + } |
| + |
| + // If the index has fewer fields than we need for the suffix, it can't |
| + // possibly help. |
| + if (len(id.SortBy) + 1) < len(q.suffixFormat) { |
| + return |
| + } |
| + |
| + // See if we actually have this index for the correct namespace |
| + coll := s.GetCollection("idx:" + q.ns + ":" + string(serialize.ToBytes(id))) |
| + if coll == nil { |
| + return |
| + } |
| + |
| + // append the implied __key__ sort suffixFormat. All indicies have this, but it's |
| + // not explicitly represented in IndexDefinition. |
| + sortBy := make([]ds.IndexColumn, len(id.SortBy), len(id.SortBy)+1) |
| + copy(sortBy, id.SortBy) |
| + sortBy = append(sortBy, ds.IndexColumn{Property: "__key__"}) |
| + |
| + suffixIdx := len(sortBy) - len(q.suffixFormat) |
| + // make sure the orders are precisely the same |
| + for i, sb := range sortBy[suffixIdx : suffixIdx+len(q.suffixFormat)] { |
| + if q.suffixFormat[i] != sb { |
| + return |
| + } |
| + } |
| + |
| + // Make sure the equalities section doesn't contain any properties we don't |
| + // want in our query. |
| + for _, p := range sortBy[:suffixIdx] { |
| + if _, ok := q.eqFilters[p.Property]; !ok { |
| + return |
| + } |
| + } |
| + |
| + // ok, we can actually use this |
| + eqFilts := make(map[string]struct{}, suffixIdx) |
| + for _, sb := range id.SortBy[:suffixIdx] { |
| + eqFilts[sb.Property] = struct{}{} |
| + delete(missingTerms, sb.Property) |
| + } |
| + numRows, _ := coll.GetTotals() |
| + *idxs = append(*idxs, IndexDefinitionSortable{numRows, eqFilts, suffixIdx, id, coll}) |
| +} |
| + |
| +// getRelevantIndicies retrieves the relevant indices which could be used to |
| +// service q. It returns nil if it's not possible to service q with the current |
| +// indicies. |
| +func getRelevantIndicies(q *reducedQuery, s *memStore) IndexDefinitionSortableSlice { |
| + idxCol := s.GetCollection("idx") |
| + if idxCol == nil { |
| + return nil |
| + } |
| + |
| + missingTerms := map[string]struct{}{} |
| + for k := range q.eqFilters { |
| + missingTerms[k] = struct{}{} |
| + } |
| + idxs := IndexDefinitionSortableSlice{} |
| + |
| + // add builtins |
| + for k := range q.eqFilters { |
| + idxs.addDefinition(q, s, missingTerms, &ds.IndexDefinition{ |
| + Kind: q.kind, |
| + Ancestor: false, |
| + SortBy: []ds.IndexColumn{ |
| + {Property: k}, |
| + }, |
| + }) |
| + } |
| + |
| + // Try adding all compound indicies |
| + idxCol.VisitItemsAscend(ds.IndexComplexQueryPrefix(), false, func(i *gkvlite.Item) bool { |
| + id, err := serialize.ReadIndexDefinition(bytes.NewBuffer(i.Key)) |
| + if err != nil { |
| + panic(err) // memory corruption |
| + } |
| + idxs.addDefinition(q, s, missingTerms, &id) |
| + return true |
| + }) |
| + |
| + // this query is impossible to fulfil with the current indicies. Not all the |
| + // terms (equality + projection) are satisfied. |
| + if len(missingTerms) < 0 { |
| + // TODO(riannucci): return error when index requires missing composite |
| + // indicies. We can even calculate the maximum missing index which would |
| + // satisfy the remainder of the query! |
| + return nil |
| + } |
| + |
| + sort.Sort(idxs) |
| + |
| + return idxs |
| +} |
| + |
| +// invert simply inverts all the bytes in bs. |
| +func invert(bs []byte) []byte { |
| + if bs == nil { |
| + return nil |
| + } |
| + ret := make([]byte, len(bs)) |
| + for i, b := range bs { |
| + ret[i] = 0xFF ^ b |
| + } |
| + return ret |
| +} |
| + |
| +// peel picks a constraint value for the property. It then removes this value |
| +// from constraints (possibly removing the entire row from constraints if it |
| +// was the last value). If the value wasn't available in constraints, it picks |
| +// the value from residuals. |
| +func peel(prop string, constraints map[string][][]byte, residuals map[string][]byte) []byte { |
| + ret := []byte(nil) |
| + if vals, ok := constraints[prop]; ok { |
| + ret = vals[0] |
| + if len(vals) == 1 { |
| + delete(constraints, prop) |
| + } else { |
| + constraints[prop] = vals[1:] |
| + } |
| + } else { |
| + ret = residuals[prop] |
| + } |
| + return ret |
| +} |
| + |
| +// generate generates a single iterDefinition for the given index. |
| +func generate(q *reducedQuery, idx IndexDefinitionSortable, constraints map[string][][]byte, residuals map[string][]byte) *iterDefinition { |
| + def := &iterDefinition{ |
| + c: idx.coll, |
| + start: q.low, |
| + end: q.high, |
| + } |
| + toJoin := [][]byte{} |
| + if q.ancestor != nil && idx.def.Compound() { |
| + toJoin = append(toJoin, q.ancestor) |
| + } |
| + for _, sb := range idx.def.SortBy { |
| + val := peel(sb.Property, constraints, residuals) |
| + if sb.Direction == ds.DESCENDING { |
| + val = invert(val) |
| + } |
| + toJoin = append(toJoin, val) |
| + } |
| + def.prefix = bjoin(toJoin...) |
| + if q.ancestor != nil && idx.def.Builtin() { |
| + // chop the terminal null byte off the q.ancestor key... we can accept |
| + // anything which is a descendant or an exact match |
| + chopped := q.ancestor[:len(q.ancestor)-1] |
| + def.start = append(def.start, chopped...) |
| + |
| + // setting the endpoint as 1 more than the chopped version ensures that |
| + // we don't include anything that's a sibling (or parent) of q.ancestor. |
| + def.end = append(def.end, increment(chopped)...) |
| + } |
| + return def |
| +} |
| + |
| +func getKindlessIndex(q *reducedQuery, s *memStore) []*iterDefinition { |
| + // kindless query filters on `ents:ns` |
| + for _, sb := range q.suffixFormat { |
| + if sb.Property != "__key__" { |
| + panic("impossible: inequality filter on kindless query is not __key__") |
| + } |
| + } |
| + return []*iterDefinition{{ |
| + s.GetCollection("ents:" + q.ns), |
| + nil, |
| + q.low, |
| + q.high, |
| + }} |
| +} |
| + |
| +// calculateConstraints produces a mapping of all equality filters to the values |
| +// that they're constrained to. It also calculates residuals, which are an |
| +// arbitrary value for filling index prefixes which have more equality fields |
| +// than are necessary. The value doesn't matter, as long as its an equality |
| +// constraint in the original query. |
| +func calculateConstraints(q *reducedQuery) (constraints map[string][][]byte, residuals map[string][]byte) { |
| + residuals = make(map[string][]byte, len(q.eqFilters)) |
| + constraints = make(map[string][][]byte, len(q.eqFilters)) |
| + for prop, vals := range q.eqFilters { |
| + bvals := make([][]byte, 0, len(vals)) |
| + for val := range vals { |
| + bvals = append(bvals, []byte(val)) |
| + } |
| + residuals[prop] = bvals[0] |
| + constraints[prop] = bvals |
| + } |
| + return |
| +} |
| + |
| +// getIndicies returns a set of iterator definitions. Iterating over these |
| +// will result in matching suffixes. |
| +func getIndicies(q *reducedQuery, s *memStore) []*iterDefinition { |
| + if q.kind == "" { |
| + return getKindlessIndex(q, s) |
| + } |
| + |
| + relevantIdxs := getRelevantIndicies(q, s) |
| + if relevantIdxs == nil { |
| + return nil |
| + } |
| + |
| + constraints, residuals := calculateConstraints(q) |
| + |
| + ret := []*iterDefinition{} |
| + for len(constraints) > 0 { |
| + lenBefore := len(ret) |
| + for _, idx := range relevantIdxs { |
| + // see if this index is helpful for the remaining constraints at all |
| + good := false |
| + for k := range idx.eqFilts { |
| + if _, ok := constraints[k]; ok { |
| + good = true |
| + break |
| + } |
| + } |
| + if good { |
| + ret = append(ret, generate(q, idx, constraints, residuals)) |
| + break |
| + } |
| + } |
| + if lenBefore == len(ret) { |
| + // something is really wrong here... if relevantIdxs is !nil, then we |
| + // should always be able to make progress in this loop. |
| + panic("deadlock: cannot fulfil query?") |
| + } |
| + } |
| + return ret |
| +} |