Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(90)

Unified Diff: impl/memory/datastore_index_selection.go

Issue 1302813003: impl/memory: Implement Queries (Closed) Base URL: https://github.com/luci/gae.git@add_multi_iterator
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « impl/memory/datastore_index.go ('k') | impl/memory/datastore_index_test.go » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: impl/memory/datastore_index_selection.go
diff --git a/impl/memory/datastore_index_selection.go b/impl/memory/datastore_index_selection.go
new file mode 100644
index 0000000000000000000000000000000000000000..03db477be7cbaeae478f0ecf3dd559622949640e
--- /dev/null
+++ b/impl/memory/datastore_index_selection.go
@@ -0,0 +1,301 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package memory
+
+import (
+ "bytes"
+ "sort"
+
+ ds "github.com/luci/gae/service/datastore"
+ "github.com/luci/gae/service/datastore/serialize"
+ "github.com/luci/gkvlite"
+)
+
+// reducedQuery contains only the pertinent details from a properly checked
+// query.
+type reducedQuery struct {
iannucci 2015/08/20 10:32:13 this structure is the key to this CL; all the code
+ ns string
+ kind string
+ ancestor []byte
+
+ eqFilters map[string]map[string]struct{}
+ // suffixFormat ALWAYS includes the inequality filter as the 0th element
+ // suffixFormat ALWAYS includes any projections (in ascending suffixFormat) after all
+ // user defined sort orders
+ suffixFormat []ds.IndexColumn
+
+ low []byte
+ high []byte
+}
+
+func (q *reducedQuery) needAncestor() bool {
+ return q.ancestor != nil
+}
+
+type IndexDefinitionSortable struct {
+ numRows uint64
+ eqFilts map[string]struct{}
+ prefixIdx int
+
+ def *ds.IndexDefinition
+ coll *memCollection
+}
+
+type IndexDefinitionSortableSlice []IndexDefinitionSortable
+
+func (s IndexDefinitionSortableSlice) Len() int { return len(s) }
+func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+func (s IndexDefinitionSortableSlice) Less(i, j int) bool {
+ // note that
+ return s[i].numRows >= s[j].numRows && s[i].def.Less(s[j].def)
+}
+
+// addDefinition adds a new IndexDefinitionSortable to this slice. It's only
+// added if it could be useful in servicing q, otherwise this function is
+// a noop.
+func (idxs *IndexDefinitionSortableSlice) addDefinition(q *reducedQuery, s *memStore, missingTerms map[string]struct{}, id *ds.IndexDefinition) {
+ // Kindless queries are handled elsewhere.
+ if id.Kind != q.kind {
+ return
+ }
+
+ // If we're an ancestor query, and the index is compound, but doesn't include
+ // an Ancestor field, it doesn't work. Builtin indicies can be used for
+ // ancestor queries (and have !Ancestor), assuming that it's only equality
+ // filters (plus inequality on __key__), or a single inequality.
+ if q.needAncestor() && id.Compound() && !id.Ancestor {
+ return
+ }
+
+ // If the index has fewer fields than we need for the suffix, it can't
+ // possibly help.
+ if (len(id.SortBy) + 1) < len(q.suffixFormat) {
+ return
+ }
+
+ // See if we actually have this index for the correct namespace
+ coll := s.GetCollection("idx:" + q.ns + ":" + string(serialize.ToBytes(id)))
+ if coll == nil {
+ return
+ }
+
+ // append the implied __key__ sort suffixFormat. All indicies have this, but it's
+ // not explicitly represented in IndexDefinition.
+ sortBy := make([]ds.IndexColumn, len(id.SortBy), len(id.SortBy)+1)
+ copy(sortBy, id.SortBy)
+ sortBy = append(sortBy, ds.IndexColumn{Property: "__key__"})
+
+ suffixIdx := len(sortBy) - len(q.suffixFormat)
+ // make sure the orders are precisely the same
+ for i, sb := range sortBy[suffixIdx : suffixIdx+len(q.suffixFormat)] {
+ if q.suffixFormat[i] != sb {
+ return
+ }
+ }
+
+ // Make sure the equalities section doesn't contain any properties we don't
+ // want in our query.
+ for _, p := range sortBy[:suffixIdx] {
+ if _, ok := q.eqFilters[p.Property]; !ok {
+ return
+ }
+ }
+
+ // ok, we can actually use this
+ eqFilts := make(map[string]struct{}, suffixIdx)
+ for _, sb := range id.SortBy[:suffixIdx] {
+ eqFilts[sb.Property] = struct{}{}
+ delete(missingTerms, sb.Property)
+ }
+ numRows, _ := coll.GetTotals()
+ *idxs = append(*idxs, IndexDefinitionSortable{numRows, eqFilts, suffixIdx, id, coll})
+}
+
+// getRelevantIndicies retrieves the relevant indices which could be used to
+// service q. It returns nil if it's not possible to service q with the current
+// indicies.
+func getRelevantIndicies(q *reducedQuery, s *memStore) IndexDefinitionSortableSlice {
+ idxCol := s.GetCollection("idx")
+ if idxCol == nil {
+ return nil
+ }
+
+ missingTerms := map[string]struct{}{}
+ for k := range q.eqFilters {
+ missingTerms[k] = struct{}{}
+ }
+ idxs := IndexDefinitionSortableSlice{}
+
+ // add builtins
+ for k := range q.eqFilters {
+ idxs.addDefinition(q, s, missingTerms, &ds.IndexDefinition{
+ Kind: q.kind,
+ Ancestor: false,
+ SortBy: []ds.IndexColumn{
+ {Property: k},
+ },
+ })
+ }
+
+ // Try adding all compound indicies
+ idxCol.VisitItemsAscend(ds.IndexComplexQueryPrefix(), false, func(i *gkvlite.Item) bool {
+ id, err := serialize.ReadIndexDefinition(bytes.NewBuffer(i.Key))
+ if err != nil {
+ panic(err) // memory corruption
+ }
+ idxs.addDefinition(q, s, missingTerms, &id)
+ return true
+ })
+
+ // this query is impossible to fulfil with the current indicies. Not all the
+ // terms (equality + projection) are satisfied.
+ if len(missingTerms) < 0 {
+ // TODO(riannucci): return error when index requires missing composite
+ // indicies. We can even calculate the maximum missing index which would
+ // satisfy the remainder of the query!
+ return nil
+ }
+
+ sort.Sort(idxs)
+
+ return idxs
+}
+
+// invert simply inverts all the bytes in bs.
+func invert(bs []byte) []byte {
+ if bs == nil {
+ return nil
+ }
+ ret := make([]byte, len(bs))
+ for i, b := range bs {
+ ret[i] = 0xFF ^ b
+ }
+ return ret
+}
+
+// peel picks a constraint value for the property. It then removes this value
+// from constraints (possibly removing the entire row from constraints if it
+// was the last value). If the value wasn't available in constraints, it picks
+// the value from residuals.
+func peel(prop string, constraints map[string][][]byte, residuals map[string][]byte) []byte {
+ ret := []byte(nil)
+ if vals, ok := constraints[prop]; ok {
+ ret = vals[0]
+ if len(vals) == 1 {
+ delete(constraints, prop)
+ } else {
+ constraints[prop] = vals[1:]
+ }
+ } else {
+ ret = residuals[prop]
+ }
+ return ret
+}
+
+// generate generates a single iterDefinition for the given index.
+func generate(q *reducedQuery, idx IndexDefinitionSortable, constraints map[string][][]byte, residuals map[string][]byte) *iterDefinition {
+ def := &iterDefinition{
+ c: idx.coll,
+ start: q.low,
+ end: q.high,
+ }
+ toJoin := [][]byte{}
+ if q.ancestor != nil && idx.def.Compound() {
+ toJoin = append(toJoin, q.ancestor)
+ }
+ for _, sb := range idx.def.SortBy {
+ val := peel(sb.Property, constraints, residuals)
+ if sb.Direction == ds.DESCENDING {
+ val = invert(val)
+ }
+ toJoin = append(toJoin, val)
+ }
+ def.prefix = bjoin(toJoin...)
+ if q.ancestor != nil && idx.def.Builtin() {
+ // chop the terminal null byte off the q.ancestor key... we can accept
+ // anything which is a descendant or an exact match
+ chopped := q.ancestor[:len(q.ancestor)-1]
+ def.start = append(def.start, chopped...)
+
+ // setting the endpoint as 1 more than the chopped version ensures that
+ // we don't include anything that's a sibling (or parent) of q.ancestor.
+ def.end = append(def.end, increment(chopped)...)
+ }
+ return def
+}
+
+func getKindlessIndex(q *reducedQuery, s *memStore) []*iterDefinition {
+ // kindless query filters on `ents:ns`
+ for _, sb := range q.suffixFormat {
+ if sb.Property != "__key__" {
+ panic("impossible: inequality filter on kindless query is not __key__")
+ }
+ }
+ return []*iterDefinition{{
+ s.GetCollection("ents:" + q.ns),
+ nil,
+ q.low,
+ q.high,
+ }}
+}
+
+// calculateConstraints produces a mapping of all equality filters to the values
+// that they're constrained to. It also calculates residuals, which are an
+// arbitrary value for filling index prefixes which have more equality fields
+// than are necessary. The value doesn't matter, as long as its an equality
+// constraint in the original query.
+func calculateConstraints(q *reducedQuery) (constraints map[string][][]byte, residuals map[string][]byte) {
+ residuals = make(map[string][]byte, len(q.eqFilters))
+ constraints = make(map[string][][]byte, len(q.eqFilters))
+ for prop, vals := range q.eqFilters {
+ bvals := make([][]byte, 0, len(vals))
+ for val := range vals {
+ bvals = append(bvals, []byte(val))
+ }
+ residuals[prop] = bvals[0]
+ constraints[prop] = bvals
+ }
+ return
+}
+
+// getIndicies returns a set of iterator definitions. Iterating over these
+// will result in matching suffixes.
+func getIndicies(q *reducedQuery, s *memStore) []*iterDefinition {
+ if q.kind == "" {
+ return getKindlessIndex(q, s)
+ }
+
+ relevantIdxs := getRelevantIndicies(q, s)
+ if relevantIdxs == nil {
+ return nil
+ }
+
+ constraints, residuals := calculateConstraints(q)
+
+ ret := []*iterDefinition{}
+ for len(constraints) > 0 {
+ lenBefore := len(ret)
+ for _, idx := range relevantIdxs {
+ // see if this index is helpful for the remaining constraints at all
+ good := false
+ for k := range idx.eqFilts {
+ if _, ok := constraints[k]; ok {
+ good = true
+ break
+ }
+ }
+ if good {
+ ret = append(ret, generate(q, idx, constraints, residuals))
+ break
+ }
+ }
+ if lenBefore == len(ret) {
+ // something is really wrong here... if relevantIdxs is !nil, then we
+ // should always be able to make progress in this loop.
+ panic("deadlock: cannot fulfil query?")
+ }
+ }
+ return ret
+}
« no previous file with comments | « impl/memory/datastore_index.go ('k') | impl/memory/datastore_index_test.go » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698