Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Unified Diff: impl/memory/datastore_index_selection.go

Issue 1302813003: impl/memory: Implement Queries (Closed) Base URL: https://github.com/luci/gae.git@add_multi_iterator
Patch Set: Baby's first query! Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: impl/memory/datastore_index_selection.go
diff --git a/impl/memory/datastore_index_selection.go b/impl/memory/datastore_index_selection.go
new file mode 100644
index 0000000000000000000000000000000000000000..36526e38220ede0ef92bf7b9aeac9044565abefb
--- /dev/null
+++ b/impl/memory/datastore_index_selection.go
@@ -0,0 +1,404 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package memory
+
+import (
+ "bytes"
+ "fmt"
+ "sort"
+
+ ds "github.com/luci/gae/service/datastore"
+ "github.com/luci/gae/service/datastore/serialize"
+ "github.com/luci/gkvlite"
+)
+
+// reducedQuery contains only the pieces of the query necessary to iteratate for
dnj (Google) 2015/08/23 06:50:06 ITERATATE!
iannucci 2015/08/23 18:19:42 Done.
+// results.
+// deduplication is applied externally
+// projection / keysonly / entity retrieval is done externally
+type reducedQuery struct {
+ ns string
+ kind string
+
+ // eqFilters indicate the set of all prefix constraints which need to be
+ // fulfilled in the composite query. All of these will translate into prefix
+ // bytes for SOME index.
+ eqFilters map[string]map[string]struct{}
+
+ // suffixFormat is the PRECISE listing of the suffix columns that ALL indexes
+ // in the multi query will have.
+ //
+ // suffixFormat ALWAYS includes the inequality filter (if any) as the 0th
+ // element
+ // suffixFormat ALWAYS includes any additional projections (in ascending
+ // order) after all user defined sort orders
+ // suffixFormat ALWAYS has __key__ as the last column
+ suffixFormat []ds.IndexColumn
+
+ // limits of the inequality and/or full sort order. This is ONLY a suffix,
+ // and it will be appended to the prefix during iteration.
+ start []byte
+ end []byte
+}
+
+type IndexDefinitionSortable struct {
+ numRows uint64
+ suffixIdx int
+
+ // eqFilts is the list of ACTUAL prefix columns. Note that it may contain
+ // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becuase
+ // (tag=1, tag=2) is a perfectly valid query).
+ eqFilts []ds.IndexColumn
+ coll *memCollection
+}
+
+func (i *IndexDefinitionSortable) has(property string) bool {
+ for _, col := range i.eqFilts {
+ if col.Property == property {
+ return true
+ }
+ if property == "__ancestor__" {
+ // ancestor can only be the first column, so abort early
+ break
+ }
+ }
+ return false
+}
+
+type IndexDefinitionSortableSlice []IndexDefinitionSortable
+
+func (s IndexDefinitionSortableSlice) Len() int { return len(s) }
+func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+func (s IndexDefinitionSortableSlice) Less(i, j int) bool {
+ // prefer smaller indicies, and indicies which have fewer suffix indexes.
+ //
+ // After that, it doesn't matter. Note that this Less implementation is only
+ // for quick sorts, and NOT for equality comparison (e.g. !(x < y) && !(y < x)
+ // does NOT imply x == y).
+ return s[i].numRows < s[j].numRows && s[i].suffixIdx < s[j].suffixIdx
+}
+
+// maybeAddDefinition possibly adds a new IndexDefinitionSortable to this slice.
+// It's only added if it could be useful in servicing q, otherwise this function
+// is a noop.
+func (idxs *IndexDefinitionSortableSlice) maybeAddDefinition(q *reducedQuery, s *memStore, missingTerms map[string]struct{}, id *ds.IndexDefinition) {
+ // Kindless queries are handled elsewhere.
+ if id.Kind != q.kind {
+ return
+ }
+
+ // If we're an ancestor query, and the index is compound, but doesn't include
+ // an Ancestor field, it doesn't work. Builtin indicies can be used for
+ // ancestor queries (and have !Ancestor), assuming that it's only equality
+ // filters (plus inequality on __key__), or a single inequality.
+ if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() {
+ return
+ }
+
+ // add __ancestor__, __key__, if necessary
+ sortBy := id.NormalizeOrder()
+
+ // If the index has fewer fields than we need for the suffix, it can't
+ // possibly help.
+ if len(sortBy) < len(q.suffixFormat) {
+ return
+ }
+
+ // See if we actually have this index for the correct namespace
+ coll := s.GetCollection("idx:" + q.ns + ":" + string(serialize.ToBytes(*id)))
+ if coll == nil {
+ return
+ }
+
+ suffixIdx := len(sortBy) - len(q.suffixFormat)
+ // make sure the orders are precisely the same
+ for i, sb := range sortBy[suffixIdx:] {
+ if q.suffixFormat[i] != sb {
+ return
+ }
+ }
+
+ // Make sure the equalities section doesn't contain any properties we don't
+ // want in our query.
+ for _, p := range sortBy[:suffixIdx] {
+ if _, ok := q.eqFilters[p.Property]; !ok {
+ return
+ }
+ }
+
+ // ok, we can actually use this
+ numRows, _ := coll.GetTotals()
+ toAdd := IndexDefinitionSortable{
+ numRows: numRows, suffixIdx: suffixIdx, coll: coll}
+ toAdd.eqFilts = sortBy[:suffixIdx]
+ for _, sb := range toAdd.eqFilts {
+ delete(missingTerms, sb.Property)
+ }
+ *idxs = append(*idxs, toAdd)
+}
+
+// getRelevantIndicies retrieves the relevant indices which could be used to
+// service q. It returns nil if it's not possible to service q with the current
+// indicies.
+func getRelevantIndicies(q *reducedQuery, s *memStore) (IndexDefinitionSortableSlice, error) {
+ missingTerms := map[string]struct{}{}
+ for k := range q.eqFilters {
+ if k == "__ancestor__" {
+ // ancestor is not a prefix which can be satisfied by a single index. It
+ // must be satisfied by ALL indices (and has special logic for this in
+ // the addDefinition logic)
+ continue
+ }
+ missingTerms[k] = struct{}{}
+ }
+ idxs := IndexDefinitionSortableSlice{}
+
+ // add builtins
+ idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{
+ Kind: q.kind,
+ })
+ for prop := range q.eqFilters {
+ idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{
+ Kind: q.kind,
+ SortBy: []ds.IndexColumn{
+ {Property: prop},
+ },
+ })
+ idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{
+ Kind: q.kind,
+ SortBy: []ds.IndexColumn{
+ {Property: prop, Direction: ds.DESCENDING},
+ },
+ })
+ }
+
+ // Try adding all compound indicies
+ idxCol := s.GetCollection("idx")
+ if idxCol != nil {
+ idxCol.VisitItemsAscend(ds.IndexComplexQueryPrefix(), false, func(i *gkvlite.Item) bool {
+ id, err := serialize.ReadIndexDefinition(bytes.NewBuffer(i.Key))
+ memoryCorruption(err)
+
+ idxs.maybeAddDefinition(q, s, missingTerms, &id)
+ return true
+ })
+ }
+
+ // this query is impossible to fulfil with the current indicies. Not all the
+ // terms (equality + projection) are satisfied.
+ if len(missingTerms) < 0 || len(idxs) == 0 {
+ // TODO(riannucci): return error when index requires missing composite
+ // indicies. We can even calculate the maximum missing index which would
+ // satisfy the remainder of the query!
+ remains := &ds.IndexDefinition{
+ Kind: q.kind,
+ Ancestor: q.eqFilters["__ancestor__"] != nil,
+ }
+ terms := make([]string, 0, len(missingTerms))
+ for mt := range missingTerms {
+ terms = append(terms, mt)
+ }
+ if serializationDeterministic {
+ sort.Strings(terms)
+ }
+ for _, term := range terms {
+ remains.SortBy = append(remains.SortBy, ds.IndexColumn{Property: term})
+ }
+ remains.SortBy = append(remains.SortBy, q.suffixFormat...)
+ last := remains.SortBy[len(remains.SortBy)-1]
+ if last.Direction == ds.ASCENDING {
+ // this is implied
+ remains.SortBy = remains.SortBy[:len(remains.SortBy)-1]
+ }
+ if remains.Builtin() {
+ // don't recommend that they add a builtin query... just have the query be
+ // empty.
+ return nil, nil
+ }
+ return nil, fmt.Errorf(
+ "Your indicies are insufficient! Try adding:\n %s", remains)
+ }
+
+ return idxs, nil
+}
+
+// peel picks a constraint value for the property. It then removes this value
+// from constraints (possibly removing the entire row from constraints if it
+// was the last value). If the value wasn't available in constraints, it picks
+// the value from residuals.
+func peel(prop string, constraints map[string][][]byte, residuals map[string][]byte) []byte {
+ ret := []byte(nil)
+ if vals, ok := constraints[prop]; ok {
+ ret = vals[0]
+ if len(vals) == 1 {
+ delete(constraints, prop)
+ } else {
+ constraints[prop] = vals[1:]
+ }
+ } else {
+ ret = residuals[prop]
+ }
+ return ret
+}
+
+// generate generates a single iterDefinition for the given index.
+func generate(q *reducedQuery, idx IndexDefinitionSortable, constraints map[string][][]byte, residuals map[string][]byte) *iterDefinition {
+ def := &iterDefinition{
+ c: idx.coll,
+ start: q.start,
+ end: q.end,
+ }
+ toJoin := [][]byte{}
+ for _, sb := range idx.eqFilts {
+ val := peel(sb.Property, constraints, residuals)
+ if sb.Direction == ds.DESCENDING {
+ val = invert(val)
+ }
+ toJoin = append(toJoin, val)
+ }
+ def.prefix = bjoin(toJoin...)
+ def.prefixLen = len(def.prefix)
+
+ if q.eqFilters["__ancestor__"] != nil && !idx.has("__ancestor__") {
+ // The query requires an ancestor, but the index doesn't explicitly have it
+ // as part of the prefix (otherwise it would have been the first eqFilt
+ // above). This happens when it's a builtin index, or if it's the primary
+ // index (for a kindless query), or if it's the Kind index (for a filterless
+ // query).
+ if len(q.suffixFormat) != 1 && q.suffixFormat[0].Property != "__key__" {
+ // This should never happen. One of the previous validators would have
+ // selected a different index. But just in case.
+ impossible(fmt.Errorf("cannot supply an implicit ancestor for %#v", q))
+ }
+
+ // chop the terminal null byte off the q.ancestor key... we can accept
+ // anything which is a descendant or an exact match.
+
+ // This silly construction gets the __ancestor__ value, because it's a
+ // map[string]struct{} instead of a [][]byte{} (otherwise we'd just get
+ // the value at the 0th index).
+ anc := ""
+ for k := range q.eqFilters["__ancestor__"] {
+ anc = k
+ break
+ }
+
+ // Intentionally do NOT update prefixLen. This allows multiIterator to
+ // correctly include the entire key in the shared iterator suffix, instead
+ // of just the remainder.
+ chopped := []byte(anc[:len(anc)-1])
+ if q.suffixFormat[0].Direction == ds.DESCENDING {
+ chopped = invert(chopped)
+ }
+ def.prefix = bjoin(def.prefix, chopped)
+
+ // Update start and end, since we know that if they contain anything, they
+ // contain values for the __key__ field.
+ if def.start != nil {
+ if !bytes.HasPrefix(def.start, chopped) {
+ // again, shouldn't happen, but if it does, we want to know about it.
+ impossible(fmt.Errorf(
+ "start suffix for implied ancestor doesn't start with ancestor! start:%v ancestor:%v",
+ def.start, chopped))
+ }
+ def.start = def.start[:len(chopped)]
+ }
+ if def.end != nil {
+ if !bytes.HasPrefix(def.end, chopped) {
+ impossible(fmt.Errorf(
+ "end suffix for implied ancestor doesn't start with ancestor! start:%v ancestor:%v",
+ def.end, chopped))
+ }
+ def.end = def.end[:len(chopped)]
+ }
+ }
+
+ return def
+}
+
+// calculateConstraints produces a mapping of all equality filters to the values
+// that they're constrained to. It also calculates residuals, which are an
+// arbitrary value for filling index prefixes which have more equality fields
+// than are necessary. The value doesn't matter, as long as its an equality
+// constraint in the original query.
+func calculateConstraints(q *reducedQuery) (constraints map[string][][]byte, residuals map[string][]byte) {
+ residuals = make(map[string][]byte, len(q.eqFilters))
+ constraints = make(map[string][][]byte, len(q.eqFilters))
+ for prop, vals := range q.eqFilters {
+ bvals := make([][]byte, 0, len(vals))
+ for val := range vals {
+ bvals = append(bvals, []byte(val))
+ }
+ residuals[prop] = bvals[0]
+ if prop == "__ancestor__" {
+ // exclude __ancestor__ from the constraints.
+ //
+ // This is because it's handled specially during index proposal and
+ // generation. Ancestor is used by ALL indices, and so its residual value
+ // above will be sufficient.
+ continue
+ }
+ constraints[prop] = bvals
+ }
+ return
+}
+
+// getIndicies returns a set of iterator definitions. Iterating over these
+// will result in matching suffixes.
+func getIndicies(q *reducedQuery, s *memStore) ([]*iterDefinition, error) {
+ relevantIdxs := IndexDefinitionSortableSlice(nil)
+ if q.kind == "" {
+ if coll := s.GetCollection("ents:" + q.ns); coll != nil {
+ relevantIdxs = IndexDefinitionSortableSlice{{coll: coll}}
+ }
+ } else {
+ err := error(nil)
+ relevantIdxs, err = getRelevantIndicies(q, s)
+ if err != nil {
+ return nil, err
+ }
+ }
+ if len(relevantIdxs) == 0 {
+ return nil, errQueryDone
+ }
+ sort.Sort(sort.Reverse(relevantIdxs))
+
+ constraints, residuals := calculateConstraints(q)
+
+ ret := []*iterDefinition{}
+ for len(constraints) > 0 || len(ret) == 0 {
+ lenBefore := len(ret)
+ for i := len(relevantIdxs) - 1; i >= 0; i-- {
+ // see if this index is helpful for the remaining constraints at all
+ //
+ // We pick the the first index, always. This makes it so that filters with
+ // NO filters hit this at the minimum.
+ useful := len(ret) == 0
+ if !useful {
+ for _, col := range relevantIdxs[i].eqFilts {
+ if _, ok := constraints[col.Property]; ok || len(ret) == 0 {
+ // it allows us to make progress on at least one constraint! Yay!
+ useful = true
+ break
+ }
+ }
+ }
+ if useful {
+ ret = append(ret, generate(q, relevantIdxs[i], constraints, residuals))
+ } else {
+ // a useless index will never become useful, because we never add more
+ // constraints. Nuke it.
+ relevantIdxs = append(relevantIdxs[:i], relevantIdxs[i+1:]...)
+ }
+ }
+ if lenBefore == len(ret) {
+ // something is really wrong here... if relevantIdxs is !nil, then we
+ // should always be able to make progress in this loop.
+ impossible(fmt.Errorf("deadlock: cannot fulfil query?"))
+ }
+ }
+
+ return ret, nil
+}

Powered by Google App Engine
This is Rietveld 408576698