impl/memory/datastore_index_selection.go - Issue 1302813003: impl/memory: Implement Queries

Unified Diff: impl/memory/datastore_index_selection.go

Issue 1302813003: impl/memory: Implement Queries (Closed) Base URL: https://github.com/luci/gae.git@add_multi_iterator

Patch Set: Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: impl/memory/datastore_index_selection.go

diff --git a/impl/memory/datastore_index_selection.go b/impl/memory/datastore_index_selection.go

new file mode 100644

index 0000000000000000000000000000000000000000..03db477be7cbaeae478f0ecf3dd559622949640e

--- /dev/null

+++ b/impl/memory/datastore_index_selection.go

@@ -0,0 +1,301 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+package memory

+import (

+ "bytes"

+ "sort"

+ ds "github.com/luci/gae/service/datastore"

+ "github.com/luci/gae/service/datastore/serialize"

+ "github.com/luci/gkvlite"

+// reducedQuery contains only the pertinent details from a properly checked

+// query.

+type reducedQuery struct {

iannucci 2015/08/20 10:32:13 this structure is the key to this CL; all the code

+ ns string

+ kind string

+ ancestor []byte

+ eqFilters map[string]map[string]struct{}

+ // suffixFormat ALWAYS includes the inequality filter as the 0th element

+ // suffixFormat ALWAYS includes any projections (in ascending suffixFormat) after all

+ // user defined sort orders

+ suffixFormat []ds.IndexColumn

+ low []byte

+ high []byte

+func (q *reducedQuery) needAncestor() bool {

+ return q.ancestor != nil

+type IndexDefinitionSortable struct {

+ numRows uint64

+ eqFilts map[string]struct{}

+ prefixIdx int

+ def *ds.IndexDefinition

+ coll *memCollection

+type IndexDefinitionSortableSlice []IndexDefinitionSortable

+func (s IndexDefinitionSortableSlice) Len() int { return len(s) }

+func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

+func (s IndexDefinitionSortableSlice) Less(i, j int) bool {

+ // note that

+ return s[i].numRows >= s[j].numRows && s[i].def.Less(s[j].def)

+// addDefinition adds a new IndexDefinitionSortable to this slice. It's only

+// added if it could be useful in servicing q, otherwise this function is

+// a noop.

+func (idxs *IndexDefinitionSortableSlice) addDefinition(q *reducedQuery, s *memStore, missingTerms map[string]struct{}, id *ds.IndexDefinition) {

+ // Kindless queries are handled elsewhere.

+ if id.Kind != q.kind {

+ return

+ }

+ // If we're an ancestor query, and the index is compound, but doesn't include

+ // an Ancestor field, it doesn't work. Builtin indicies can be used for

+ // ancestor queries (and have !Ancestor), assuming that it's only equality

+ // filters (plus inequality on __key__), or a single inequality.

+ if q.needAncestor() && id.Compound() && !id.Ancestor {

+ return

+ }

+ // If the index has fewer fields than we need for the suffix, it can't

+ // possibly help.

+ if (len(id.SortBy) + 1) < len(q.suffixFormat) {

+ return

+ }

+ // See if we actually have this index for the correct namespace

+ coll := s.GetCollection("idx:" + q.ns + ":" + string(serialize.ToBytes(id)))

+ if coll == nil {

+ return

+ }

+ // append the implied __key__ sort suffixFormat. All indicies have this, but it's

+ // not explicitly represented in IndexDefinition.

+ sortBy := make([]ds.IndexColumn, len(id.SortBy), len(id.SortBy)+1)

+ copy(sortBy, id.SortBy)

+ sortBy = append(sortBy, ds.IndexColumn{Property: "__key__"})

+ suffixIdx := len(sortBy) - len(q.suffixFormat)

+ // make sure the orders are precisely the same

+ for i, sb := range sortBy[suffixIdx : suffixIdx+len(q.suffixFormat)] {

+ if q.suffixFormat[i] != sb {

+ return

+ }

+ // Make sure the equalities section doesn't contain any properties we don't

+ // want in our query.

+ for _, p := range sortBy[:suffixIdx] {

+ if _, ok := q.eqFilters[p.Property]; !ok {

+ return

+ }

+ // ok, we can actually use this

+ eqFilts := make(map[string]struct{}, suffixIdx)

+ for _, sb := range id.SortBy[:suffixIdx] {

+ eqFilts[sb.Property] = struct{}{}

+ delete(missingTerms, sb.Property)

+ }

+ numRows, _ := coll.GetTotals()

+ *idxs = append(*idxs, IndexDefinitionSortable{numRows, eqFilts, suffixIdx, id, coll})

+// getRelevantIndicies retrieves the relevant indices which could be used to

+// service q. It returns nil if it's not possible to service q with the current

+// indicies.

+func getRelevantIndicies(q *reducedQuery, s *memStore) IndexDefinitionSortableSlice {

+ idxCol := s.GetCollection("idx")

+ if idxCol == nil {

+ return nil

+ }

+ missingTerms := map[string]struct{}{}

+ for k := range q.eqFilters {

+ missingTerms[k] = struct{}{}

+ }

+ idxs := IndexDefinitionSortableSlice{}

+ // add builtins

+ for k := range q.eqFilters {

+ idxs.addDefinition(q, s, missingTerms, &ds.IndexDefinition{

+ Kind: q.kind,

+ Ancestor: false,

+ SortBy: []ds.IndexColumn{

+ {Property: k},

+ },

+ })

+ }

+ // Try adding all compound indicies

+ idxCol.VisitItemsAscend(ds.IndexComplexQueryPrefix(), false, func(i *gkvlite.Item) bool {

+ id, err := serialize.ReadIndexDefinition(bytes.NewBuffer(i.Key))

+ if err != nil {

+ panic(err) // memory corruption

+ }

+ idxs.addDefinition(q, s, missingTerms, &id)

+ return true

+ })

+ // this query is impossible to fulfil with the current indicies. Not all the

+ // terms (equality + projection) are satisfied.

+ if len(missingTerms) < 0 {

+ // TODO(riannucci): return error when index requires missing composite

+ // indicies. We can even calculate the maximum missing index which would

+ // satisfy the remainder of the query!

+ return nil

+ }

+ sort.Sort(idxs)

+ return idxs

+// invert simply inverts all the bytes in bs.

+func invert(bs []byte) []byte {

+ if bs == nil {

+ return nil

+ }

+ ret := make([]byte, len(bs))

+ for i, b := range bs {

+ ret[i] = 0xFF ^ b

+ }

+ return ret

+// peel picks a constraint value for the property. It then removes this value

+// from constraints (possibly removing the entire row from constraints if it

+// was the last value). If the value wasn't available in constraints, it picks

+// the value from residuals.

+func peel(prop string, constraints map[string][][]byte, residuals map[string][]byte) []byte {

+ ret := []byte(nil)

+ if vals, ok := constraints[prop]; ok {

+ ret = vals[0]

+ if len(vals) == 1 {

+ delete(constraints, prop)

+ } else {

+ constraints[prop] = vals[1:]

+ }

+ } else {

+ ret = residuals[prop]

+ }

+ return ret

+// generate generates a single iterDefinition for the given index.

+func generate(q *reducedQuery, idx IndexDefinitionSortable, constraints map[string][][]byte, residuals map[string][]byte) *iterDefinition {

+ def := &iterDefinition{

+ c: idx.coll,

+ start: q.low,

+ end: q.high,

+ }

+ toJoin := [][]byte{}

+ if q.ancestor != nil && idx.def.Compound() {

+ toJoin = append(toJoin, q.ancestor)

+ }

+ for _, sb := range idx.def.SortBy {

+ val := peel(sb.Property, constraints, residuals)

+ if sb.Direction == ds.DESCENDING {

+ val = invert(val)

+ }

+ toJoin = append(toJoin, val)

+ }

+ def.prefix = bjoin(toJoin...)

+ if q.ancestor != nil && idx.def.Builtin() {

+ // chop the terminal null byte off the q.ancestor key... we can accept

+ // anything which is a descendant or an exact match

+ chopped := q.ancestor[:len(q.ancestor)-1]

+ def.start = append(def.start, chopped...)

+ // setting the endpoint as 1 more than the chopped version ensures that

+ // we don't include anything that's a sibling (or parent) of q.ancestor.

+ def.end = append(def.end, increment(chopped)...)

+ }

+ return def

+func getKindlessIndex(q *reducedQuery, s *memStore) []*iterDefinition {

+ // kindless query filters on `ents:ns`

+ for _, sb := range q.suffixFormat {

+ if sb.Property != "__key__" {

+ panic("impossible: inequality filter on kindless query is not __key__")

+ }

+ return []*iterDefinition{{

+ s.GetCollection("ents:" + q.ns),

+ nil,

+ q.low,

+ q.high,

+ }}

+// calculateConstraints produces a mapping of all equality filters to the values

+// that they're constrained to. It also calculates residuals, which are an

+// arbitrary value for filling index prefixes which have more equality fields

+// than are necessary. The value doesn't matter, as long as its an equality

+// constraint in the original query.

+func calculateConstraints(q *reducedQuery) (constraints map[string][][]byte, residuals map[string][]byte) {

+ residuals = make(map[string][]byte, len(q.eqFilters))

+ constraints = make(map[string][][]byte, len(q.eqFilters))

+ for prop, vals := range q.eqFilters {

+ bvals := make([][]byte, 0, len(vals))

+ for val := range vals {

+ bvals = append(bvals, []byte(val))

+ }

+ residuals[prop] = bvals[0]

+ constraints[prop] = bvals

+ }

+ return

+// getIndicies returns a set of iterator definitions. Iterating over these

+// will result in matching suffixes.

+func getIndicies(q *reducedQuery, s *memStore) []*iterDefinition {

+ if q.kind == "" {

+ return getKindlessIndex(q, s)

+ }

+ relevantIdxs := getRelevantIndicies(q, s)

+ if relevantIdxs == nil {

+ return nil

+ }

+ constraints, residuals := calculateConstraints(q)

+ ret := []*iterDefinition{}

+ for len(constraints) > 0 {

+ lenBefore := len(ret)

+ for _, idx := range relevantIdxs {

+ // see if this index is helpful for the remaining constraints at all

+ good := false

+ for k := range idx.eqFilts {

+ if _, ok := constraints[k]; ok {

+ good = true

+ break

+ }

+ if good {

+ ret = append(ret, generate(q, idx, constraints, residuals))

+ break

+ }

+ if lenBefore == len(ret) {

+ // something is really wrong here... if relevantIdxs is !nil, then we

+ // should always be able to make progress in this loop.

+ panic("deadlock: cannot fulfil query?")

+ }

+ return ret

« no previous file with comments | « impl/memory/datastore_index.go ('k') | impl/memory/datastore_index_test.go » ('j') | no next file with comments »