impl/memory/datastore_index_selection.go - Issue 1302813003: impl/memory: Implement Queries

Side by Side Diff: impl/memory/datastore_index_selection.go

Issue 1302813003: impl/memory: Implement Queries (Closed) Base URL: https://github.com/luci/gae.git@add_multi_iterator

Patch Set: stringSet everywhere Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2015 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 package memory

	6

	7 import (

	8 "bytes"

	9 "fmt"

	10 "sort"

	11 "strings"

	12

	13 ds "github.com/luci/gae/service/datastore"

	14 "github.com/luci/gae/service/datastore/serialize"

	15 )

	16

	17 // reducedQuery contains only the pieces of the query necessary to iterate for

	18 // results.

	19 // deduplication is applied externally

	20 // projection / keysonly / entity retrieval is done externally

	21 type reducedQuery struct {

	22 ns string

	23 kind string

	24

	25 // eqFilters indicate the set of all prefix constraints which need to be

	26 // fulfilled in the composite query. All of these will translate into pr efix

	27 // bytes for SOME index.

	28 eqFilters map[string]stringSet

	29

	30 // suffixFormat is the PRECISE listing of the suffix columns that ALL in dexes

	31 // in the multi query will have.

	32 //

	33 // suffixFormat ALWAYS includes the inequality filter (if any) as the 0t h

	34 // element

	35 // suffixFormat ALWAYS includes any additional projections (in ascending

	36 // order) after all user defined sort orders

	37 // suffixFormat ALWAYS has __key__ as the last column

	38 suffixFormat []ds.IndexColumn

	39

	40 // limits of the inequality and/or full sort order. This is ONLY a suffi x,

	41 // and it will be appended to the prefix during iteration.

	42 start []byte

	43 end []byte

	44

	45 // metadata describing the total number of columns that this query requi res to

	46 // execute perfectly.

	47 numCols int

	48 }

	49

	50 type IndexDefinitionSortable struct {

	51 // eqFilts is the list of ACTUAL prefix columns. Note that it may contai n

	52 // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becu ase

	53 // (tag=1, tag=2) is a perfectly valid query).

	54 eqFilts []ds.IndexColumn

	55 coll *memCollection

	56 }

	57

	58 func (i *IndexDefinitionSortable) hasAncestor() bool {

	59 return len(i.eqFilts) > 0 && i.eqFilts[0].Property == "__ancestor__"

	60 }

	61

	62 func (i IndexDefinitionSortable) numEqHits(c constraints) int {

	63 ret := 0

	64 for _, filt := range i.eqFilts {

	65 if _, ok := c.constraints[filt.Property]; ok {

	66 ret++

	67 }

	68 }

	69 return ret

	70 }

	71

	72 type IndexDefinitionSortableSlice []IndexDefinitionSortable

	73

	74 func (s IndexDefinitionSortableSlice) Len() int { return len(s) }

	75 func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

	76 func (s IndexDefinitionSortableSlice) Less(i, j int) bool {

	77 a, b := s[i], s[j]

	78 if a.coll == nil && b.coll != nil {

	79 return true

	80 } else if a.coll != nil && b.coll == nil {

	81 return false

	82 }

	83

	84 cmp := len(a.eqFilts) - len(b.eqFilts)

	85 if cmp < 0 {

	86 return true

	87 } else if cmp > 0 {

	88 return false

	89 }

	90 for k, col := range a.eqFilts {

	91 ocol := b.eqFilts[k]

	92 if col.Direction == ds.ASCENDING && ocol.Direction == ds.DESCEND ING {

	93 return true

	94 } else if col.Direction == ds.DESCENDING && ocol.Direction == ds .ASCENDING {

	95 return false

	96 }

	97 if col.Property < ocol.Property {

	98 return true

	99 } else if col.Property > ocol.Property {

	100 return false

	101 }

	102 }

	103 return false

	104 }

	105

	106 // maybeAddDefinition possibly adds a new IndexDefinitionSortable to this slice.

	107 // It's only added if it could be useful in servicing q, otherwise this function

	108 // is a noop.

	109 //

	110 // This returns true iff the proposed index is OK and depletes missingTerms to

	111 // empty.

	112 //

	113 // If the proposed index is PERFECT (e.g. contains enough columns to cover all

	114 // equality filters, and also has the correct suffix), idxs will be replaced

	115 // with JUST that index, and this will return true.

	116 func (idxs IndexDefinitionSortableSlice) maybeAddDefinition(q reducedQuery, s memStore, missingTerms stringSet, id ds.IndexDefinition) bool {

	117 // Kindless queries are handled elsewhere.

	118 if id.Kind != q.kind {

	119 impossible(

	120 fmt.Errorf("maybeAddDefinition given index with wrong ki nd %q v %q", id.Kind, q.kind))

	121 }

	122

	123 // If we're an ancestor query, and the index is compound, but doesn't in clude

	124 // an Ancestor field, it doesn't work. Builtin indexes can be used for

	125 // ancestor queries (and have !Ancestor), assuming that it's only equali ty

	126 // filters (plus inequality on __key__), or a single inequality.

	127 if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() {

	128 impossible(

	129 fmt.Errorf("maybeAddDefinition given compound index with wrong ancestor info: %s %#v", id, q))

	130 }

	131

	132 // add __ancestor__ if necessary

	133 sortBy := id.GetFullSortOrder()

	134

	135 // If the index has fewer fields than we need for the suffix, it can't

	136 // possibly help.

	137 if len(sortBy) < len(q.suffixFormat) {

	138 return false

	139 }

	140

	141 numEqFilts := len(sortBy) - len(q.suffixFormat)

	142 // make sure the orders are precisely the same

	143 for i, sb := range sortBy[numEqFilts:] {

	144 if q.suffixFormat[i] != sb {

	145 return false

	146 }

	147 }

	148

	149 if id.Builtin() && numEqFilts == 0 {

	150 if len(q.eqFilters) > 1 \|\| (len(q.eqFilters) == 1 && q.eqFilters ["__ancestor__"] == nil) {

	151 return false

	152 }

	153 }

	154

	155 // Make sure the equalities section doesn't contain any properties we do n't

	156 // want in our query.

	157 //

	158 // numByProp && totalEqFilts will be used to see if this is a perfect ma tch

	159 // later.

	160 numByProp := make(map[string]int, len(q.eqFilters))

	161 totalEqFilts := 0

	162

	163 eqFilts := sortBy[:numEqFilts]

	164 for _, p := range eqFilts {

	165 if _, ok := q.eqFilters[p.Property]; !ok {

	166 return false

	167 }

	168 numByProp[p.Property]++

	169 totalEqFilts++

	170 }

	171

	172 // ok, we can actually use this

	173

	174 // Grab the collection for convenience later. We don't want to invalidat e this

	175 // index's potential just because the collection doesn't exist. If it's

	176 // a builtin and it doesn't exist, it still needs to be one of the 'poss ible'

	177 // indexes... it just means that the user's query will end up with no re sults.

	178 coll := s.GetCollection(

	179 fmt.Sprintf("idx:%s:%s", q.ns, serialize.ToBytes(*id.PrepForIdxT able())))

	180

	181 // First, see if it's a perfect match. If it is, then our search is over .

	182 //

	183 // A perfect match contains ALL the equality filter columns (or more, si nce

	184 // we can use residuals to fill in the extras).

	185 toAdd := IndexDefinitionSortable{coll: coll}

	186 toAdd.eqFilts = eqFilts

	187 for _, sb := range toAdd.eqFilts {

	188 missingTerms.rm(sb.Property)

	189 }

	190

	191 perfect := false

	192 if len(sortBy) == q.numCols {

	193 perfect = true

	194 for k, num := range numByProp {

	195 if num < len(q.eqFilters[k]) {

	196 perfect = false

	197 break

	198 }

	199 }

	200 }

	201 if perfect {

	202 *idxs = IndexDefinitionSortableSlice{toAdd}

	203 } else {

	204 idxs = append(idxs, toAdd)

	205 }

	206 return len(missingTerms) == 0

	207 }

	208

	209 // getRelevantIndexes retrieves the relevant indexes which could be used to

	210 // service q. It returns nil if it's not possible to service q with the current

	211 // indexes.

	212 func getRelevantIndexes(q reducedQuery, s memStore) (IndexDefinitionSortableSl ice, error) {

	213 missingTerms := stringSet{}

	214 for k := range q.eqFilters {

	215 if k == "__ancestor__" {

	216 // ancestor is not a prefix which can be satisfied by a single index. It

	217 // must be satisfied by ALL indexes (and has special log ic for this in

	218 // the addDefinition logic)

	219 continue

	220 }

	221 missingTerms.add(k)

	222 }

	223 idxs := IndexDefinitionSortableSlice{}

	224

	225 // First we add builtins

	226 // add

	227 // idx:KIND

	228 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{

	229 Kind: q.kind,

	230 }) {

	231 return idxs, nil

	232 }

	233

	234 // add

	235 // idx:KIND:prop

	236 // idx:KIND:-prop

	237 props := stringSet{}

	238 for prop := range q.eqFilters {

	239 props.add(prop)

	240 }

	241 for _, col := range q.suffixFormat[:len(q.suffixFormat)-1] {

	242 props.add(col.Property)

	243 }

	244 for prop := range props {

	245 if strings.HasPrefix(prop, "__") && strings.HasSuffix(prop, "__" ) {

	246 continue

	247 }

	248 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefiniti on{

	249 Kind: q.kind,

	250 SortBy: []ds.IndexColumn{

	251 {Property: prop},

	252 },

	253 }) {

	254 return idxs, nil

	255 }

	256 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefiniti on{

	257 Kind: q.kind,

	258 SortBy: []ds.IndexColumn{

	259 {Property: prop, Direction: ds.DESCENDING},

	260 },

	261 }) {

	262 return idxs, nil

	263 }

	264 }

	265

	266 // Try adding all compound indexes whose suffix matches.

	267 suffix := &ds.IndexDefinition{

	268 Kind: q.kind,

	269 Ancestor: q.eqFilters["__ancestor__"] != nil,

	270 SortBy: q.suffixFormat,

	271 }

	272 walkCompIdxs(s, suffix, func(def *ds.IndexDefinition) bool {

	273 // keep walking until we find a perfect index.

	274 return !idxs.maybeAddDefinition(q, s, missingTerms, def)

	275 })

	276

	277 // this query is impossible to fulfil with the current indexes. Not all the

	278 // terms (equality + projection) are satisfied.

	279 if len(missingTerms) < 0 \|\| len(idxs) == 0 {

	280 remains := &ds.IndexDefinition{

	281 Kind: q.kind,

	282 Ancestor: q.eqFilters["__ancestor__"] != nil,

	283 }

	284 terms := make([]string, 0, len(missingTerms))

	285 for mt := range missingTerms {

	286 terms = append(terms, mt)

	287 }

	288 if serializationDeterministic {

	289 sort.Strings(terms)

	290 }

	291 for _, term := range terms {

	292 remains.SortBy = append(remains.SortBy, ds.IndexColumn{P roperty: term})

	293 }

	294 remains.SortBy = append(remains.SortBy, q.suffixFormat...)

	295 last := remains.SortBy[len(remains.SortBy)-1]

	296 if last.Direction == ds.ASCENDING {

	297 // this removes the __key__ column, since it's implicit.

	298 remains.SortBy = remains.SortBy[:len(remains.SortBy)-1]

	299 }

	300 if remains.Builtin() {

	301 impossible(

	302 fmt.Errorf("recommended missing index would be a builtin: %s", remains))

	303 }

	304 return nil, fmt.Errorf(

	305 "Your indexes are insufficient! Try adding:\n %s", rema ins)

	306 }

	307

	308 return idxs, nil

	309 }

	310

	311 // generate generates a single iterDefinition for the given index.

	312 func generate(q reducedQuery, idx IndexDefinitionSortable, c constraints) it erDefinition {

	313 def := &iterDefinition{

	314 c: idx.coll,

	315 start: q.start,

	316 end: q.end,

	317 }

	318 toJoin := make([][]byte, len(idx.eqFilts))

	319 for _, sb := range idx.eqFilts {

	320 val := c.peel(sb.Property)

	321 if sb.Direction == ds.DESCENDING {

	322 val = invert(val)

	323 }

	324 toJoin = append(toJoin, val)

	325 }

	326 def.prefix = bjoin(toJoin...)

	327 def.prefixLen = len(def.prefix)

	328

	329 if q.eqFilters["__ancestor__"] != nil && !idx.hasAncestor() {

	330 // The query requires an ancestor, but the index doesn't explici tly have it

	331 // as part of the prefix (otherwise it would have been the first eqFilt

	332 // above). This happens when it's a builtin index, or if it's th e primary

	333 // index (for a kindless query), or if it's the Kind index (for a filterless

	334 // query).

	335 //

	336 // builtin indexes are:

	337 // Kind/__key__

	338 // Kind/Prop/__key__

	339 // Kind/Prop/-__key__

	340 if len(q.suffixFormat) > 2 \|\| q.suffixFormat[len(q.suffixFormat) -1].Property != "__key__" {

	341 // This should never happen. One of the previous validat ors would have

	342 // selected a different index. But just in case.

	343 impossible(fmt.Errorf("cannot supply an implicit ancesto r for %#v", idx))

	344 }

	345

	346 // get the only value out of __ancestor__

	347 anc := q.eqFilters["__ancestor__"].getOne()

	348

	349 // Intentionally do NOT update prefixLen. This allows multiItera tor to

	350 // correctly include the entire key in the shared iterator suffi x, instead

	351 // of just the remainder.

	352

	353 // chop the terminal null byte off the q.ancestor key... we can accept

	354 // anything which is a descendant or an exact match. Removing t he last byte

	355 // from the key (the terminating null) allows this trick to work . Otherwise

	356 // it would be a closed range of EXACTLY this key.

	357 chopped := []byte(anc[:len(anc)-1])

	358 if q.suffixFormat[0].Direction == ds.DESCENDING {

	359 chopped = invert(chopped)

	360 }

	361 def.prefix = bjoin(def.prefix, chopped)

	362

	363 // Update start and end, since we know that if they contain anyt hing, they

	364 // contain values for the __key__ field.

	365 if def.start != nil {

	366 offset := 0

	367 if len(q.suffixFormat) > 1 {

	368 chunks, _ := parseSuffix(q.ns, q.suffixFormat, d ef.start, 1)

	369 offset = len(chunks[0])

	370 }

	371 if !bytes.HasPrefix(def.start[offset:], chopped) {

	372 // again, shouldn't happen, but if it does, we w ant to know about it.

	373 impossible(fmt.Errorf(

	374 "start suffix for implied ancestor doesn 't start with ancestor! start:%v ancestor:%v",

	375 def.start, chopped))

	376 }

	377 def.start = def.start[:offset+len(chopped)]

	378 }

	379 if def.end != nil {

	380 offset := 0

	381 if len(q.suffixFormat) > 1 {

	382 chunks, _ := parseSuffix(q.ns, q.suffixFormat, d ef.end, 1)

	383 offset = len(chunks[0])

	384 }

	385 if !bytes.HasPrefix(def.end[offset:], chopped) {

	386 impossible(fmt.Errorf(

	387 "end suffix for implied ancestor doesn't start with ancestor! end:%v ancestor:%v",

	388 def.end, chopped))

	389 }

	390 def.end = def.end[:offset+len(chopped)]

	391 }

	392 }

	393

	394 return def

	395 }

	396

	397 type constraints struct {

	398 constraints map[string][][]byte

	399 original map[string][][]byte

	400 residualMapping map[string]int

	401 }

	402

	403 // peel picks a constraint value for the property. It then removes this value

	404 // from constraints (possibly removing the entire row from constraints if it

	405 // was the last value). If the value wasn't available in constraints, it picks

	406 // the value from residuals.

	407 func (c *constraints) peel(prop string) []byte {

	408 ret := []byte(nil)

	409 if vals, ok := c.constraints[prop]; ok {

	410 ret = vals[0]

	411 if len(vals) == 1 {

	412 delete(c.constraints, prop)

	413 } else {

	414 c.constraints[prop] = vals[1:]

	415 }

	416 } else {

	417 row := c.original[prop]

	418 idx := c.residualMapping[prop]

	419 c.residualMapping[prop]++

	420 ret = row[idx%len(row)]

	421 }

	422 return ret

	423 }

	424

	425 func (c *constraints) empty() bool {

	426 return len(c.constraints) == 0

	427 }

	428

	429 // calculateConstraints produces a mapping of all equality filters to the values

	430 // that they're constrained to. It also calculates residuals, which are an

	431 // arbitrary value for filling index prefixes which have more equality fields

	432 // than are necessary. The value doesn't matter, as long as its an equality

	433 // constraint in the original query.

	434 func calculateConstraints(q reducedQuery) constraints {

	435 ret := &constraints{

	436 original: make(map[string][][]byte, len(q.eqFilters)),

	437 constraints: make(map[string][][]byte, len(q.eqFilters)),

	438 residualMapping: make(map[string]int),

	439 }

	440 for prop, vals := range q.eqFilters {

	441 bvals := make([][]byte, 0, len(vals))

	442 for val := range vals {

	443 bvals = append(bvals, []byte(val))

	444 }

	445 ret.original[prop] = bvals

	446 if prop == "__ancestor__" {

	447 // exclude __ancestor__ from the constraints.

	448 //

	449 // This is because it's handled specially during index p roposal and

	450 // generation. Ancestor is used by ALL indexes, and so i ts residual value

	451 // in ret.original above will be sufficient.

	452 continue

	453 }

	454 ret.constraints[prop] = bvals

	455 }

	456 return ret

	457 }

	458

	459 // getIndexes returns a set of iterator definitions. Iterating over these

	460 // will result in matching suffixes.

	461 func getIndexes(q reducedQuery, s memStore) ([]*iterDefinition, error) {

	462 relevantIdxs := IndexDefinitionSortableSlice(nil)

	463 if q.kind == "" {

	464 if coll := s.GetCollection("ents:" + q.ns); coll != nil {

	465 relevantIdxs = IndexDefinitionSortableSlice{{coll: coll} }

	466 }

	467 } else {

	468 err := error(nil)

	469 relevantIdxs, err = getRelevantIndexes(q, s)

	470 if err != nil {

	471 return nil, err

	472 }

	473 }

	474 if len(relevantIdxs) == 0 {

	475 return nil, errQueryDone

	476 }

	477

	478 // This sorts it so that relevantIdxs goes less filters -> more filters. We

	479 // traverse this list backwards, however, so we traverse it in more filt ers ->

	480 // less filters order.

	481 sort.Sort(relevantIdxs)

	482

	483 constraints := calculateConstraints(q)

	484

	485 ret := []*iterDefinition{}

	486 for !constraints.empty() \|\| len(ret) == 0 {

	487 bestIdx := (*IndexDefinitionSortable)(nil)

	488 if len(ret) == 0 {

	489 // if ret is empty, take the biggest relevantIdx. It's g uaranteed to have

	490 // the greatest number of equality filters of any index in the list, and

	491 // we know that every equality filter will be pulled fro m constraints and

	492 // not residual.

	493 //

	494 // This also takes care of the case when the query has n o equality filters,

	495 // in which case relevantIdxs will actually only contain one index anyway

	496 // :)

	497 bestIdx = &relevantIdxs[len(relevantIdxs)-1]

	498 if bestIdx.coll == nil {

	499 return nil, errQueryDone

	500 }

	501 } else {

	502 // If ret's not empty, then we need to find the best ind ex we can. The

	503 // best index will be the one with the most matching equ ality columns.

	504 // Since relevantIdxs is sorted primarially by the numbe r of equality

	505 // columns, we walk down the list until the number of po ssible columns is

	506 // worse than our best-so-far.

	507 //

	508 // Traversing the list backwards goes from more filters -> less filters,

	509 // but also allows us to remove items from the list as w e iterate over it.

	510 bestNumEqHits := 0

	511 for i := len(relevantIdxs) - 1; i >= 0; i-- {

	512 idx := &relevantIdxs[i]

	513 if len(idx.eqFilts) < bestNumEqHits {

	514 // if the number of filters drops below our best hit, it's never going

	515 // to get better than that. This index m ight be helpful on a later

	516 // loop though, so don't remove it.

	517 break

	518 }

	519 numHits := 0

	520 if idx.coll != nil {

	521 numHits = idx.numEqHits(constraints)

	522 }

	523 if numHits > bestNumEqHits {

	524 bestNumEqHits = numHits

	525 bestIdx = idx

	526 } else if numHits == 0 {

	527 // This index will never become useful a gain, so remove it.

	528 relevantIdxs = append(relevantIdxs[:i], relevantIdxs[i+1:]...)

	529 }

	530 }

	531 }

	532 if bestIdx == nil {

	533 // something is really wrong here... if relevantIdxs is !nil, then we

	534 // should always be able to make progress in this loop.

	535 impossible(fmt.Errorf("deadlock: cannot fulfil query?"))

	536 }

	537 ret = append(ret, generate(q, bestIdx, constraints))

	538 }

	539

	540 return ret, nil

	541 }

OLD	NEW

« no previous file with comments | « impl/memory/datastore_index.go ('k') | impl/memory/datastore_index_test.go » ('j') | no next file with comments »