impl/memory/datastore_index_selection.go - Issue 1302813003: impl/memory: Implement Queries

Side by Side Diff: impl/memory/datastore_index_selection.go

Issue 1302813003: impl/memory: Implement Queries (Closed) Base URL: https://github.com/luci/gae.git@add_multi_iterator

Patch Set: remove limit double-set restriction Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2015 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 package memory

	6

	7 import (

	8 "bytes"

	9 "fmt"

	10 "sort"

	11 "strings"

	12

	13 ds "github.com/luci/gae/service/datastore"

	14 "github.com/luci/gae/service/datastore/serialize"

	15 )

	16

	17 // reducedQuery contains only the pieces of the query necessary to iterate for

	18 // results.

	19 // deduplication is applied externally

	20 // projection / keysonly / entity retrieval is done externally

	21 type reducedQuery struct {

	22 ns string

	23 kind string

	24

	25 // eqFilters indicate the set of all prefix constraints which need to be

	26 // fulfilled in the composite query. All of these will translate into pr efix

	27 // bytes for SOME index.

	28 eqFilters map[string]stringSet

	29

	30 // suffixFormat is the PRECISE listing of the suffix columns that ALL in dexes

	31 // in the multi query will have.

	32 //

	33 // suffixFormat ALWAYS includes the inequality filter (if any) as the 0t h

	34 // element

	35 // suffixFormat ALWAYS includes any additional projections (in ascending

	36 // order) after all user defined sort orders

	37 // suffixFormat ALWAYS has __key__ as the last column

	38 suffixFormat []ds.IndexColumn

	39

	40 // limits of the inequality and/or full sort order. This is ONLY a suffi x,

	41 // and it will be appended to the prefix during iteration.

	42 start []byte

	43 end []byte

	44

	45 // metadata describing the total number of columns that this query requi res to

	46 // execute perfectly.

	47 numCols int

	48 }

	49

	50 type IndexDefinitionSortable struct {

	51 // eqFilts is the list of ACTUAL prefix columns. Note that it may contai n

	52 // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becu ase

	53 // (tag=1, tag=2) is a perfectly valid query).

	54 eqFilts []ds.IndexColumn

	55 coll *memCollection

	56 }

	57

	58 func (i *IndexDefinitionSortable) hasAncestor() bool {

	59 return len(i.eqFilts) > 0 && i.eqFilts[0].Property == "__ancestor__"

	60 }

	61

	62 func (i IndexDefinitionSortable) numEqHits(c constraints) int {

	63 ret := 0

	64 for _, filt := range i.eqFilts {

	65 if _, ok := c.constraints[filt.Property]; ok {

	66 ret++

	67 }

	68 }

	69 return ret

	70 }

	71

	72 type IndexDefinitionSortableSlice []IndexDefinitionSortable

	73

	74 func (s IndexDefinitionSortableSlice) Len() int { return len(s) }

	75 func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

	76 func (s IndexDefinitionSortableSlice) Less(i, j int) bool {

	77 a, b := s[i], s[j]

	78 if a.coll == nil && b.coll != nil {

	79 return true

	80 } else if a.coll != nil && b.coll == nil {

	81 return false

	82 }

	83

	84 cmp := len(a.eqFilts) - len(b.eqFilts)

	85 if cmp < 0 {

	86 return true

	87 } else if cmp > 0 {

	88 return false

	89 }

	90 for k, col := range a.eqFilts {

	91 ocol := b.eqFilts[k]

	92 if col.Direction == ds.ASCENDING && ocol.Direction == ds.DESCEND ING {

	93 return true

	94 } else if col.Direction == ds.DESCENDING && ocol.Direction == ds .ASCENDING {

	95 return false

	96 }

	97 if col.Property < ocol.Property {

	98 return true

	99 } else if col.Property > ocol.Property {

	100 return false

	101 }

	102 }

	103 return false

	104 }

	105

	106 // maybeAddDefinition possibly adds a new IndexDefinitionSortable to this slice.

	107 // It's only added if it could be useful in servicing q, otherwise this function

	108 // is a noop.

	109 //

	110 // This returns true iff the proposed index is OK and depletes missingTerms to

	111 // empty.

	112 //

	113 // If the proposed index is PERFECT (e.g. contains enough columns to cover all

	114 // equality filters, and also has the correct suffix), idxs will be replaced

	115 // with JUST that index, and this will return true.

	116 func (idxs IndexDefinitionSortableSlice) maybeAddDefinition(q reducedQuery, s memStore, missingTerms map[string]struct{}, id ds.IndexDefinition) bool {

	117 // Kindless queries are handled elsewhere.

	118 if id.Kind != q.kind {

	119 impossible(

	120 fmt.Errorf("maybeAddDefinition given index with wrong ki nd %q v %q", id.Kind, q.kind))

	121 }

	122

	123 // If we're an ancestor query, and the index is compound, but doesn't in clude

	124 // an Ancestor field, it doesn't work. Builtin indicies can be used for

	125 // ancestor queries (and have !Ancestor), assuming that it's only equali ty

	126 // filters (plus inequality on __key__), or a single inequality.

	127 if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() {

	128 impossible(

	129 fmt.Errorf("maybeAddDefinition given compound index with wrong ancestor info: %s %#v", id, q))

	130 }

	131

	132 // add __ancestor__ if necessary

	133 sortBy := id.GetFullSortOrder()

	134

	135 // If the index has fewer fields than we need for the suffix, it can't

	136 // possibly help.

	137 if len(sortBy) < len(q.suffixFormat) {

	138 return false

	139 }

	140

	141 numEqFilts := len(sortBy) - len(q.suffixFormat)

	142 // make sure the orders are precisely the same

	143 for i, sb := range sortBy[numEqFilts:] {

	144 if q.suffixFormat[i] != sb {

	145 return false

	146 }

	147 }

	148

	149 if id.Builtin() && numEqFilts == 0 {

	150 if len(q.eqFilters) > 1 \|\| (len(q.eqFilters) == 1 && q.eqFilters ["__ancestor__"] == nil) {

	151 return false

	152 }

	153 }

	154

	155 // Make sure the equalities section doesn't contain any properties we do n't

	156 // want in our query.

	157 //

	158 // numByProp && totalEqFilts will be used to see if this is a perfect ma tch

	159 // later.

	160 numByProp := make(map[string]int, len(q.eqFilters))

	161 totalEqFilts := 0

	162

	163 eqFilts := sortBy[:numEqFilts]

	164 for _, p := range eqFilts {

	165 if _, ok := q.eqFilters[p.Property]; !ok {

	166 return false

	167 }

	168 numByProp[p.Property]++

	169 totalEqFilts++

	170 }

	171

	172 // ok, we can actually use this

	173

	174 // Grab the collection for convenience later. We don't want to invalidat e this

	175 // index's potential just because the collection doesn't exist. If it's

	176 // a builtin and it doesn't exist, it still needs to be one of the 'poss ible'

	177 // indexes... it just means that the user's query will end up with no re sults.

	178 coll := s.GetCollection(

	179 fmt.Sprintf("idx:%s:%s", q.ns, serialize.ToBytes(*id.PrepForIdxT able())))

	180

	181 // First, see if it's a perfect match. If it is, then our search is over .

	182 //

	183 // A perfect match contains ALL the equality filter columns (or more, si nce

	184 // we can use residuals to fill in the extras).

	185 toAdd := IndexDefinitionSortable{coll: coll}

	186 toAdd.eqFilts = eqFilts

	187 for _, sb := range toAdd.eqFilts {

	188 delete(missingTerms, sb.Property)

	189 }

	190

	191 perfect := false

	192 if len(sortBy) == q.numCols {

	193 perfect = true

	194 for k, num := range numByProp {

	195 if num < len(q.eqFilters[k]) {

	196 perfect = false

	197 break

	198 }

	199 }

	200 }

	201 if perfect {

	202 *idxs = IndexDefinitionSortableSlice{toAdd}

	203 } else {

	204 idxs = append(idxs, toAdd)

	205 }

	206 return len(missingTerms) == 0

	207 }

	208

	209 // getRelevantIndicies retrieves the relevant indexes which could be used to
	dnj 2015/08/28 20:15:04 nit: I was going function name, too :) But if you nit: I was going function name, too :) But if you like "indices" I don't really care; just attempting to alert on an inconsistency in an otherwise consistent CL. iannucci 2015/08/28 20:38:50 Oops... My grep missed it. Fixed now. Show quoted text On 2015/08/28 at 20:15:04, dnj wrote: > nit: I was going function name, too :) > > But if you like "indices" I don't really care; just attempting to alert on an inconsistency in an otherwise consistent CL. Oops... My grep missed it. Fixed now.
	210 // service q. It returns nil if it's not possible to service q with the current

	211 // indicies.

	212 func getRelevantIndicies(q reducedQuery, s memStore) (IndexDefinitionSortableS lice, error) {

	213 missingTerms := map[string]struct{}{}

	214 for k := range q.eqFilters {

	215 if k == "__ancestor__" {

	216 // ancestor is not a prefix which can be satisfied by a single index. It

	217 // must be satisfied by ALL indexes (and has special log ic for this in

	218 // the addDefinition logic)

	219 continue

	220 }

	221 missingTerms[k] = struct{}{}

	222 }

	223 idxs := IndexDefinitionSortableSlice{}

	224

	225 // First we add builtins

	226 // add

	227 // idx:KIND

	228 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{

	229 Kind: q.kind,

	230 }) {

	231 return idxs, nil

	232 }

	233

	234 // add

	235 // idx:KIND:prop

	236 // idx:KIND:-prop

	237 props := map[string]struct{}{}

	238 for prop := range q.eqFilters {

	239 props[prop] = struct{}{}

	240 }

	241 for _, col := range q.suffixFormat[:len(q.suffixFormat)-1] {

	242 props[col.Property] = struct{}{}

	243 }

	244 for prop := range props {

	245 if strings.HasPrefix(prop, "__") && strings.HasSuffix(prop, "__" ) {

	246 continue

	247 }

	248 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefiniti on{

	249 Kind: q.kind,

	250 SortBy: []ds.IndexColumn{

	251 {Property: prop},

	252 },

	253 }) {

	254 return idxs, nil

	255 }

	256 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefiniti on{

	257 Kind: q.kind,

	258 SortBy: []ds.IndexColumn{

	259 {Property: prop, Direction: ds.DESCENDING},

	260 },

	261 }) {

	262 return idxs, nil

	263 }

	264 }

	265

	266 // Try adding all compound indicies whose suffix matches.

	267 suffix := &ds.IndexDefinition{

	268 Kind: q.kind,

	269 Ancestor: q.eqFilters["__ancestor__"] != nil,

	270 SortBy: q.suffixFormat,

	271 }

	272 walkCompIdxs(s, suffix, func(def *ds.IndexDefinition) bool {

	273 // keep walking until we find a perfect index.

	274 return !idxs.maybeAddDefinition(q, s, missingTerms, def)

	275 })

	276

	277 // this query is impossible to fulfil with the current indicies. Not all the

	278 // terms (equality + projection) are satisfied.

	279 if len(missingTerms) < 0 \|\| len(idxs) == 0 {

	280 remains := &ds.IndexDefinition{

	281 Kind: q.kind,

	282 Ancestor: q.eqFilters["__ancestor__"] != nil,

	283 }

	284 terms := make([]string, 0, len(missingTerms))

	285 for mt := range missingTerms {

	286 terms = append(terms, mt)

	287 }

	288 if serializationDeterministic {

	289 sort.Strings(terms)

	290 }

	291 for _, term := range terms {

	292 remains.SortBy = append(remains.SortBy, ds.IndexColumn{P roperty: term})

	293 }

	294 remains.SortBy = append(remains.SortBy, q.suffixFormat...)

	295 last := remains.SortBy[len(remains.SortBy)-1]

	296 if last.Direction == ds.ASCENDING {

	297 // this removes the __key__ column, since it's implicit.

	298 remains.SortBy = remains.SortBy[:len(remains.SortBy)-1]

	299 }

	300 if remains.Builtin() {

	301 impossible(

	302 fmt.Errorf("recommended missing index would be a builtin: %s", remains))

	303 }

	304 return nil, fmt.Errorf(

	305 "Your indexes are insufficient! Try adding:\n %s", rema ins)

	306 }

	307

	308 return idxs, nil

	309 }

	310

	311 // generate generates a single iterDefinition for the given index.

	312 func generate(q reducedQuery, idx IndexDefinitionSortable, c constraints) it erDefinition {

	313 def := &iterDefinition{

	314 c: idx.coll,

	315 start: q.start,

	316 end: q.end,

	317 }

	318 toJoin := make([][]byte, len(idx.eqFilts))

	319 for _, sb := range idx.eqFilts {

	320 val := c.peel(sb.Property)

	321 if sb.Direction == ds.DESCENDING {

	322 val = invert(val)

	323 }

	324 toJoin = append(toJoin, val)

	325 }

	326 def.prefix = bjoin(toJoin...)

	327 def.prefixLen = len(def.prefix)

	328

	329 if q.eqFilters["__ancestor__"] != nil && !idx.hasAncestor() {

	330 // The query requires an ancestor, but the index doesn't explici tly have it

	331 // as part of the prefix (otherwise it would have been the first eqFilt

	332 // above). This happens when it's a builtin index, or if it's th e primary

	333 // index (for a kindless query), or if it's the Kind index (for a filterless

	334 // query).

	335 //

	336 // builtin indexes are:

	337 // Kind/__key__

	338 // Kind/Prop/__key__

	339 // Kind/Prop/-__key__

	340 if len(q.suffixFormat) > 2 \|\| q.suffixFormat[len(q.suffixFormat) -1].Property != "__key__" {

	341 // This should never happen. One of the previous validat ors would have

	342 // selected a different index. But just in case.

	343 impossible(fmt.Errorf("cannot supply an implicit ancesto r for %#v", idx))

	344 }

	345

	346 // This silly construction gets the __ancestor__ value, because it's a

	347 // map[string]struct{} instead of a [][]byte{} (otherwise we'd j ust get

	348 // the value at the 0th index).

	349 anc := ""

	350 for k := range q.eqFilters["__ancestor__"] {

	351 anc = k

	352 break

	353 }

	354

	355 // Intentionally do NOT update prefixLen. This allows multiItera tor to

	356 // correctly include the entire key in the shared iterator suffi x, instead

	357 // of just the remainder.

	358

	359 // chop the terminal null byte off the q.ancestor key... we can accept

	360 // anything which is a descendant or an exact match. Removing t he last byte

	361 // from the key (the terminating null) allows this trick to work . Otherwise

	362 // it would be a closed range of EXACTLY this key.

	363 chopped := []byte(anc[:len(anc)-1])

	364 if q.suffixFormat[0].Direction == ds.DESCENDING {

	365 chopped = invert(chopped)

	366 }

	367 def.prefix = bjoin(def.prefix, chopped)

	368

	369 // Update start and end, since we know that if they contain anyt hing, they

	370 // contain values for the __key__ field.

	371 if def.start != nil {

	372 offset := 0

	373 if len(q.suffixFormat) > 1 {

	374 chunks, _ := parseSuffix(q.ns, q.suffixFormat, d ef.start, 1)

	375 offset = len(chunks[0])

	376 }

	377 if !bytes.HasPrefix(def.start[offset:], chopped) {

	378 // again, shouldn't happen, but if it does, we w ant to know about it.

	379 impossible(fmt.Errorf(

	380 "start suffix for implied ancestor doesn 't start with ancestor! start:%v ancestor:%v",

	381 def.start, chopped))

	382 }

	383 def.start = def.start[:offset+len(chopped)]

	384 }

	385 if def.end != nil {

	386 offset := 0

	387 if len(q.suffixFormat) > 1 {

	388 chunks, _ := parseSuffix(q.ns, q.suffixFormat, d ef.end, 1)

	389 offset = len(chunks[0])

	390 }

	391 if !bytes.HasPrefix(def.end[offset:], chopped) {

	392 impossible(fmt.Errorf(

	393 "end suffix for implied ancestor doesn't start with ancestor! end:%v ancestor:%v",

	394 def.end, chopped))

	395 }

	396 def.end = def.end[:offset+len(chopped)]

	397 }

	398 }

	399

	400 return def

	401 }

	402

	403 type constraints struct {

	404 constraints map[string][][]byte

	405 original map[string][][]byte

	406 residualMapping map[string]int

	407 }

	408

	409 // peel picks a constraint value for the property. It then removes this value

	410 // from constraints (possibly removing the entire row from constraints if it

	411 // was the last value). If the value wasn't available in constraints, it picks

	412 // the value from residuals.

	413 func (c *constraints) peel(prop string) []byte {

	414 ret := []byte(nil)

	415 if vals, ok := c.constraints[prop]; ok {

	416 ret = vals[0]

	417 if len(vals) == 1 {

	418 delete(c.constraints, prop)

	419 } else {

	420 c.constraints[prop] = vals[1:]

	421 }

	422 } else {

	423 row := c.original[prop]

	424 idx := c.residualMapping[prop]

	425 c.residualMapping[prop]++

	426 ret = row[idx%len(row)]

	427 }

	428 return ret

	429 }

	430

	431 func (c *constraints) empty() bool {

	432 return len(c.constraints) == 0

	433 }

	434

	435 // calculateConstraints produces a mapping of all equality filters to the values

	436 // that they're constrained to. It also calculates residuals, which are an

	437 // arbitrary value for filling index prefixes which have more equality fields

	438 // than are necessary. The value doesn't matter, as long as its an equality

	439 // constraint in the original query.

	440 func calculateConstraints(q reducedQuery) constraints {

	441 ret := &constraints{

	442 original: make(map[string][][]byte, len(q.eqFilters)),

	443 constraints: make(map[string][][]byte, len(q.eqFilters)),

	444 residualMapping: make(map[string]int),

	445 }

	446 for prop, vals := range q.eqFilters {

	447 bvals := make([][]byte, 0, len(vals))

	448 for val := range vals {

	449 bvals = append(bvals, []byte(val))

	450 }

	451 ret.original[prop] = bvals

	452 if prop == "__ancestor__" {

	453 // exclude __ancestor__ from the constraints.

	454 //

	455 // This is because it's handled specially during index p roposal and

	456 // generation. Ancestor is used by ALL indexes, and so i ts residual value

	457 // in ret.original above will be sufficient.

	458 continue

	459 }

	460 ret.constraints[prop] = bvals

	461 }

	462 return ret

	463 }

	464

	465 // getIndexes returns a set of iterator definitions. Iterating over these

	466 // will result in matching suffixes.

	467 func getIndexes(q reducedQuery, s memStore) ([]*iterDefinition, error) {

	468 relevantIdxs := IndexDefinitionSortableSlice(nil)

	469 if q.kind == "" {

	470 if coll := s.GetCollection("ents:" + q.ns); coll != nil {

	471 relevantIdxs = IndexDefinitionSortableSlice{{coll: coll} }

	472 }

	473 } else {

	474 err := error(nil)

	475 relevantIdxs, err = getRelevantIndicies(q, s)

	476 if err != nil {

	477 return nil, err

	478 }

	479 }

	480 if len(relevantIdxs) == 0 {

	481 return nil, errQueryDone

	482 }

	483

	484 // This sorts it so that relevantIdxs goes less filters -> more filters. We

	485 // traverse this list backwards, however, so we traverse it in more filt ers ->

	486 // less filters order.

	487 sort.Sort(relevantIdxs)

	488

	489 constraints := calculateConstraints(q)

	490

	491 ret := []*iterDefinition{}

	492 for !constraints.empty() \|\| len(ret) == 0 {

	493 bestIdx := (*IndexDefinitionSortable)(nil)

	494 if len(ret) == 0 {

	495 // if ret is empty, take the biggest relevantIdx. It's g uaranteed to have

	496 // the greatest number of equality filters of any index in the list, and

	497 // we know that every equality filter will be pulled fro m constraints and

	498 // not residual.

	499 //

	500 // This also takes care of the case when the query has n o equality filters,

	501 // in which case relevantIdxs will actually only contain one index anyway

	502 // :)

	503 bestIdx = &relevantIdxs[len(relevantIdxs)-1]

	504 if bestIdx.coll == nil {

	505 return nil, errQueryDone

	506 }

	507 } else {

	508 // If ret's not empty, then we need to find the best ind ex we can. The

	509 // best index will be the one with the most matching equ ality columns.

	510 // Since relevantIdxs is sorted primarially by the numbe r of equality

	511 // columns, we walk down the list until the number of po ssible columns is

	512 // worse than our best-so-far.

	513 //

	514 // Traversing the list backwards goes from more filters -> less filters,

	515 // but also allows us to remove items from the list as w e iterate over it.

	516 bestNumEqHits := 0

	517 for i := len(relevantIdxs) - 1; i >= 0; i-- {

	518 idx := &relevantIdxs[i]

	519 if len(idx.eqFilts) < bestNumEqHits {

	520 // if the number of filters drops below our best hit, it's never going

	521 // to get better than that. This index m ight be helpful on a later

	522 // loop though, so don't remove it.

	523 break

	524 }

	525 numHits := 0

	526 if idx.coll != nil {

	527 numHits = idx.numEqHits(constraints)

	528 }

	529 if numHits > bestNumEqHits {

	530 bestNumEqHits = numHits

	531 bestIdx = idx

	532 } else if numHits == 0 {

	533 // This index will never become useful a gain, so remove it.

	534 relevantIdxs = append(relevantIdxs[:i], relevantIdxs[i+1:]...)

	535 }

	536 }

	537 }

	538 if bestIdx == nil {

	539 // something is really wrong here... if relevantIdxs is !nil, then we

	540 // should always be able to make progress in this loop.

	541 impossible(fmt.Errorf("deadlock: cannot fulfil query?"))

	542 }

	543 ret = append(ret, generate(q, bestIdx, constraints))

	544 }

	545

	546 return ret, nil

	547 }

OLD	NEW

« no previous file with comments | « impl/memory/datastore_index.go ('k') | impl/memory/datastore_index_test.go » ('j') | impl/memory/stringset.go » ('J')