impl/memory/datastore_index_selection.go - Issue 1302813003: impl/memory: Implement Queries

Side by Side Diff: impl/memory/datastore_index_selection.go

Issue 1302813003: impl/memory: Implement Queries (Closed) Base URL: https://github.com/luci/gae.git@add_multi_iterator

Patch Set: Baby's first query! Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2015 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 package memory

	6

	7 import (

	8 "bytes"

	9 "fmt"

	10 "sort"

	11

	12 ds "github.com/luci/gae/service/datastore"

	13 "github.com/luci/gae/service/datastore/serialize"

	14 "github.com/luci/gkvlite"

	15 )

	16

	17 // reducedQuery contains only the pieces of the query necessary to iteratate for
	dnj (Google) 2015/08/23 06:50:06 ITERATATE! ITERATATE! iannucci 2015/08/23 18:19:42 Done. Show quoted text On 2015/08/23 06:50:06, dnj (Google) wrote: > ITERATATE! Done.
	18 // results.

	19 // deduplication is applied externally

	20 // projection / keysonly / entity retrieval is done externally

	21 type reducedQuery struct {

	22 ns string

	23 kind string

	24

	25 // eqFilters indicate the set of all prefix constraints which need to be

	26 // fulfilled in the composite query. All of these will translate into pr efix

	27 // bytes for SOME index.

	28 eqFilters map[string]map[string]struct{}

	29

	30 // suffixFormat is the PRECISE listing of the suffix columns that ALL in dexes

	31 // in the multi query will have.

	32 //

	33 // suffixFormat ALWAYS includes the inequality filter (if any) as the 0t h

	34 // element

	35 // suffixFormat ALWAYS includes any additional projections (in ascending

	36 // order) after all user defined sort orders

	37 // suffixFormat ALWAYS has __key__ as the last column

	38 suffixFormat []ds.IndexColumn

	39

	40 // limits of the inequality and/or full sort order. This is ONLY a suffi x,

	41 // and it will be appended to the prefix during iteration.

	42 start []byte

	43 end []byte

	44 }

	45

	46 type IndexDefinitionSortable struct {

	47 numRows uint64

	48 suffixIdx int

	49

	50 // eqFilts is the list of ACTUAL prefix columns. Note that it may contai n

	51 // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becu ase

	52 // (tag=1, tag=2) is a perfectly valid query).

	53 eqFilts []ds.IndexColumn

	54 coll *memCollection

	55 }

	56

	57 func (i *IndexDefinitionSortable) has(property string) bool {

	58 for _, col := range i.eqFilts {

	59 if col.Property == property {

	60 return true

	61 }

	62 if property == "__ancestor__" {

	63 // ancestor can only be the first column, so abort early

	64 break

	65 }

	66 }

	67 return false

	68 }

	69

	70 type IndexDefinitionSortableSlice []IndexDefinitionSortable

	71

	72 func (s IndexDefinitionSortableSlice) Len() int { return len(s) }

	73 func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

	74 func (s IndexDefinitionSortableSlice) Less(i, j int) bool {

	75 // prefer smaller indicies, and indicies which have fewer suffix indexes .

	76 //

	77 // After that, it doesn't matter. Note that this Less implementation is only

	78 // for quick sorts, and NOT for equality comparison (e.g. !(x < y) && !( y < x)

	79 // does NOT imply x == y).

	80 return s[i].numRows < s[j].numRows && s[i].suffixIdx < s[j].suffixIdx

	81 }

	82

	83 // maybeAddDefinition possibly adds a new IndexDefinitionSortable to this slice.

	84 // It's only added if it could be useful in servicing q, otherwise this function

	85 // is a noop.

	86 func (idxs IndexDefinitionSortableSlice) maybeAddDefinition(q reducedQuery, s memStore, missingTerms map[string]struct{}, id ds.IndexDefinition) {

	87 // Kindless queries are handled elsewhere.

	88 if id.Kind != q.kind {

	89 return

	90 }

	91

	92 // If we're an ancestor query, and the index is compound, but doesn't in clude

	93 // an Ancestor field, it doesn't work. Builtin indicies can be used for

	94 // ancestor queries (and have !Ancestor), assuming that it's only equali ty

	95 // filters (plus inequality on __key__), or a single inequality.

	96 if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() {

	97 return

	98 }

	99

	100 // add __ancestor__, __key__, if necessary

	101 sortBy := id.NormalizeOrder()

	102

	103 // If the index has fewer fields than we need for the suffix, it can't

	104 // possibly help.

	105 if len(sortBy) < len(q.suffixFormat) {

	106 return

	107 }

	108

	109 // See if we actually have this index for the correct namespace

	110 coll := s.GetCollection("idx:" + q.ns + ":" + string(serialize.ToBytes(* id)))

	111 if coll == nil {

	112 return

	113 }

	114

	115 suffixIdx := len(sortBy) - len(q.suffixFormat)

	116 // make sure the orders are precisely the same

	117 for i, sb := range sortBy[suffixIdx:] {

	118 if q.suffixFormat[i] != sb {

	119 return

	120 }

	121 }

	122

	123 // Make sure the equalities section doesn't contain any properties we do n't

	124 // want in our query.

	125 for _, p := range sortBy[:suffixIdx] {

	126 if _, ok := q.eqFilters[p.Property]; !ok {

	127 return

	128 }

	129 }

	130

	131 // ok, we can actually use this

	132 numRows, _ := coll.GetTotals()

	133 toAdd := IndexDefinitionSortable{

	134 numRows: numRows, suffixIdx: suffixIdx, coll: coll}

	135 toAdd.eqFilts = sortBy[:suffixIdx]

	136 for _, sb := range toAdd.eqFilts {

	137 delete(missingTerms, sb.Property)

	138 }

	139 idxs = append(idxs, toAdd)

	140 }

	141

	142 // getRelevantIndicies retrieves the relevant indices which could be used to

	143 // service q. It returns nil if it's not possible to service q with the current

	144 // indicies.

	145 func getRelevantIndicies(q reducedQuery, s memStore) (IndexDefinitionSortableS lice, error) {

	146 missingTerms := map[string]struct{}{}

	147 for k := range q.eqFilters {

	148 if k == "__ancestor__" {

	149 // ancestor is not a prefix which can be satisfied by a single index. It

	150 // must be satisfied by ALL indices (and has special log ic for this in

	151 // the addDefinition logic)

	152 continue

	153 }

	154 missingTerms[k] = struct{}{}

	155 }

	156 idxs := IndexDefinitionSortableSlice{}

	157

	158 // add builtins

	159 idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{

	160 Kind: q.kind,

	161 })

	162 for prop := range q.eqFilters {

	163 idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{

	164 Kind: q.kind,

	165 SortBy: []ds.IndexColumn{

	166 {Property: prop},

	167 },

	168 })

	169 idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{

	170 Kind: q.kind,

	171 SortBy: []ds.IndexColumn{

	172 {Property: prop, Direction: ds.DESCENDING},

	173 },

	174 })

	175 }

	176

	177 // Try adding all compound indicies

	178 idxCol := s.GetCollection("idx")

	179 if idxCol != nil {

	180 idxCol.VisitItemsAscend(ds.IndexComplexQueryPrefix(), false, fun c(i *gkvlite.Item) bool {

	181 id, err := serialize.ReadIndexDefinition(bytes.NewBuffer (i.Key))

	182 memoryCorruption(err)

	183

	184 idxs.maybeAddDefinition(q, s, missingTerms, &id)

	185 return true

	186 })

	187 }

	188

	189 // this query is impossible to fulfil with the current indicies. Not all the

	190 // terms (equality + projection) are satisfied.

	191 if len(missingTerms) < 0 \|\| len(idxs) == 0 {

	192 // TODO(riannucci): return error when index requires missing com posite

	193 // indicies. We can even calculate the maximum missing index whi ch would

	194 // satisfy the remainder of the query!

	195 remains := &ds.IndexDefinition{

	196 Kind: q.kind,

	197 Ancestor: q.eqFilters["__ancestor__"] != nil,

	198 }

	199 terms := make([]string, 0, len(missingTerms))

	200 for mt := range missingTerms {

	201 terms = append(terms, mt)

	202 }

	203 if serializationDeterministic {

	204 sort.Strings(terms)

	205 }

	206 for _, term := range terms {

	207 remains.SortBy = append(remains.SortBy, ds.IndexColumn{P roperty: term})

	208 }

	209 remains.SortBy = append(remains.SortBy, q.suffixFormat...)

	210 last := remains.SortBy[len(remains.SortBy)-1]

	211 if last.Direction == ds.ASCENDING {

	212 // this is implied

	213 remains.SortBy = remains.SortBy[:len(remains.SortBy)-1]

	214 }

	215 if remains.Builtin() {

	216 // don't recommend that they add a builtin query... just have the query be

	217 // empty.

	218 return nil, nil

	219 }

	220 return nil, fmt.Errorf(

	221 "Your indicies are insufficient! Try adding:\n %s", rem ains)

	222 }

	223

	224 return idxs, nil

	225 }

	226

	227 // peel picks a constraint value for the property. It then removes this value

	228 // from constraints (possibly removing the entire row from constraints if it

	229 // was the last value). If the value wasn't available in constraints, it picks

	230 // the value from residuals.

	231 func peel(prop string, constraints map[string][][]byte, residuals map[string][]b yte) []byte {

	232 ret := []byte(nil)

	233 if vals, ok := constraints[prop]; ok {

	234 ret = vals[0]

	235 if len(vals) == 1 {

	236 delete(constraints, prop)

	237 } else {

	238 constraints[prop] = vals[1:]

	239 }

	240 } else {

	241 ret = residuals[prop]

	242 }

	243 return ret

	244 }

	245

	246 // generate generates a single iterDefinition for the given index.

	247 func generate(q reducedQuery, idx IndexDefinitionSortable, constraints map[stri ng][][]byte, residuals map[string][]byte) iterDefinition {

	248 def := &iterDefinition{

	249 c: idx.coll,

	250 start: q.start,

	251 end: q.end,

	252 }

	253 toJoin := [][]byte{}

	254 for _, sb := range idx.eqFilts {

	255 val := peel(sb.Property, constraints, residuals)

	256 if sb.Direction == ds.DESCENDING {

	257 val = invert(val)

	258 }

	259 toJoin = append(toJoin, val)

	260 }

	261 def.prefix = bjoin(toJoin...)

	262 def.prefixLen = len(def.prefix)

	263

	264 if q.eqFilters["__ancestor__"] != nil && !idx.has("__ancestor__") {

	265 // The query requires an ancestor, but the index doesn't explici tly have it

	266 // as part of the prefix (otherwise it would have been the first eqFilt

	267 // above). This happens when it's a builtin index, or if it's th e primary

	268 // index (for a kindless query), or if it's the Kind index (for a filterless

	269 // query).

	270 if len(q.suffixFormat) != 1 && q.suffixFormat[0].Property != "__ key__" {

	271 // This should never happen. One of the previous validat ors would have

	272 // selected a different index. But just in case.

	273 impossible(fmt.Errorf("cannot supply an implicit ancesto r for %#v", q))

	274 }

	275

	276 // chop the terminal null byte off the q.ancestor key... we can accept

	277 // anything which is a descendant or an exact match.

	278

	279 // This silly construction gets the __ancestor__ value, because it's a

	280 // map[string]struct{} instead of a [][]byte{} (otherwise we'd j ust get

	281 // the value at the 0th index).

	282 anc := ""

	283 for k := range q.eqFilters["__ancestor__"] {

	284 anc = k

	285 break

	286 }

	287

	288 // Intentionally do NOT update prefixLen. This allows multiItera tor to

	289 // correctly include the entire key in the shared iterator suffi x, instead

	290 // of just the remainder.

	291 chopped := []byte(anc[:len(anc)-1])

	292 if q.suffixFormat[0].Direction == ds.DESCENDING {

	293 chopped = invert(chopped)

	294 }

	295 def.prefix = bjoin(def.prefix, chopped)

	296

	297 // Update start and end, since we know that if they contain anyt hing, they

	298 // contain values for the __key__ field.

	299 if def.start != nil {

	300 if !bytes.HasPrefix(def.start, chopped) {

	301 // again, shouldn't happen, but if it does, we w ant to know about it.

	302 impossible(fmt.Errorf(

	303 "start suffix for implied ancestor doesn 't start with ancestor! start:%v ancestor:%v",

	304 def.start, chopped))

	305 }

	306 def.start = def.start[:len(chopped)]

	307 }

	308 if def.end != nil {

	309 if !bytes.HasPrefix(def.end, chopped) {

	310 impossible(fmt.Errorf(

	311 "end suffix for implied ancestor doesn't start with ancestor! start:%v ancestor:%v",

	312 def.end, chopped))

	313 }

	314 def.end = def.end[:len(chopped)]

	315 }

	316 }

	317

	318 return def

	319 }

	320

	321 // calculateConstraints produces a mapping of all equality filters to the values

	322 // that they're constrained to. It also calculates residuals, which are an

	323 // arbitrary value for filling index prefixes which have more equality fields

	324 // than are necessary. The value doesn't matter, as long as its an equality

	325 // constraint in the original query.

	326 func calculateConstraints(q *reducedQuery) (constraints map[string][][]byte, res iduals map[string][]byte) {

	327 residuals = make(map[string][]byte, len(q.eqFilters))

	328 constraints = make(map[string][][]byte, len(q.eqFilters))

	329 for prop, vals := range q.eqFilters {

	330 bvals := make([][]byte, 0, len(vals))

	331 for val := range vals {

	332 bvals = append(bvals, []byte(val))

	333 }

	334 residuals[prop] = bvals[0]

	335 if prop == "__ancestor__" {

	336 // exclude __ancestor__ from the constraints.

	337 //

	338 // This is because it's handled specially during index p roposal and

	339 // generation. Ancestor is used by ALL indices, and so i ts residual value

	340 // above will be sufficient.

	341 continue

	342 }

	343 constraints[prop] = bvals

	344 }

	345 return

	346 }

	347

	348 // getIndicies returns a set of iterator definitions. Iterating over these

	349 // will result in matching suffixes.

	350 func getIndicies(q reducedQuery, s memStore) ([]*iterDefinition, error) {

	351 relevantIdxs := IndexDefinitionSortableSlice(nil)

	352 if q.kind == "" {

	353 if coll := s.GetCollection("ents:" + q.ns); coll != nil {

	354 relevantIdxs = IndexDefinitionSortableSlice{{coll: coll} }

	355 }

	356 } else {

	357 err := error(nil)

	358 relevantIdxs, err = getRelevantIndicies(q, s)

	359 if err != nil {

	360 return nil, err

	361 }

	362 }

	363 if len(relevantIdxs) == 0 {

	364 return nil, errQueryDone

	365 }

	366 sort.Sort(sort.Reverse(relevantIdxs))

	367

	368 constraints, residuals := calculateConstraints(q)

	369

	370 ret := []*iterDefinition{}

	371 for len(constraints) > 0 \|\| len(ret) == 0 {

	372 lenBefore := len(ret)

	373 for i := len(relevantIdxs) - 1; i >= 0; i-- {

	374 // see if this index is helpful for the remaining constr aints at all

	375 //

	376 // We pick the the first index, always. This makes it so that filters with

	377 // NO filters hit this at the minimum.

	378 useful := len(ret) == 0

	379 if !useful {

	380 for _, col := range relevantIdxs[i].eqFilts {

	381 if _, ok := constraints[col.Property]; o k \|\| len(ret) == 0 {

	382 // it allows us to make progress on at least one constraint! Yay!

	383 useful = true

	384 break

	385 }

	386 }

	387 }

	388 if useful {

	389 ret = append(ret, generate(q, relevantIdxs[i], c onstraints, residuals))

	390 } else {

	391 // a useless index will never become useful, bec ause we never add more

	392 // constraints. Nuke it.

	393 relevantIdxs = append(relevantIdxs[:i], relevant Idxs[i+1:]...)

	394 }

	395 }

	396 if lenBefore == len(ret) {

	397 // something is really wrong here... if relevantIdxs is !nil, then we

	398 // should always be able to make progress in this loop.

	399 impossible(fmt.Errorf("deadlock: cannot fulfil query?"))

	400 }

	401 }

	402

	403 return ret, nil

	404 }

OLD	NEW

« no previous file with comments | « impl/memory/datastore_index.go ('k') | impl/memory/datastore_index_test.go » ('j') | impl/memory/datastore_query.go » ('J')