| Index: impl/memory/datastore_index_selection.go | 
| diff --git a/impl/memory/datastore_index_selection.go b/impl/memory/datastore_index_selection.go | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..b472b3e6dd6c8844478c9ea461341cf726f75b18 | 
| --- /dev/null | 
| +++ b/impl/memory/datastore_index_selection.go | 
| @@ -0,0 +1,417 @@ | 
| +// Copyright 2015 The Chromium Authors. All rights reserved. | 
| +// Use of this source code is governed by a BSD-style license that can be | 
| +// found in the LICENSE file. | 
| + | 
| +package memory | 
| + | 
| +import ( | 
| +	"bytes" | 
| +	"fmt" | 
| +	"sort" | 
| + | 
| +	ds "github.com/luci/gae/service/datastore" | 
| +	"github.com/luci/gae/service/datastore/serialize" | 
| +	"github.com/luci/gkvlite" | 
| +) | 
| + | 
| +// reducedQuery contains only the pieces of the query necessary to iteratate for | 
| +// results. | 
| +//   deduplication is applied externally | 
| +//   projection / keysonly / entity retrieval is done externally | 
| +type reducedQuery struct { | 
| +	ns   string | 
| +	kind string | 
| + | 
| +	// eqFilters indicate the set of all prefix constraints which need to be | 
| +	// fulfilled in the composite query. All of these will translate into prefix | 
| +	// bytes for SOME index. | 
| +	eqFilters map[string]map[string]struct{} | 
| + | 
| +	// suffixFormat is the PRECISE listing of the suffix columns that ALL indexes | 
| +	//   in the multi query will have. | 
| +	// | 
| +	// suffixFormat ALWAYS includes the inequality filter (if any) as the 0th | 
| +	//   element | 
| +	// suffixFormat ALWAYS includes any additional projections (in ascending | 
| +	//   order) after all user defined sort orders | 
| +	// suffixFormat ALWAYS has __key__ as the last column | 
| +	suffixFormat []ds.IndexColumn | 
| + | 
| +	// limits of the inequality and/or full sort order. This is ONLY a suffix, | 
| +	// and it will be appended to the prefix during iteration. | 
| +	start []byte | 
| +	end   []byte | 
| +} | 
| + | 
| +type IndexDefinitionSortable struct { | 
| +	numRows   uint64 | 
| +	suffixIdx int | 
| + | 
| +	// eqFilts is the list of ACTUAL prefix columns. Note that it may contain | 
| +	// redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becuase | 
| +	// (tag=1, tag=2) is a perfectly valid query). | 
| +	eqFilts []ds.IndexColumn | 
| +	coll    *memCollection | 
| +} | 
| + | 
| +func (i *IndexDefinitionSortable) has(property string) bool { | 
| +	for _, col := range i.eqFilts { | 
| +		if col.Property == property { | 
| +			return true | 
| +		} | 
| +		if property == "__ancestor__" { | 
| +			// ancestor can only be the first column, so abort early | 
| +			break | 
| +		} | 
| +	} | 
| +	return false | 
| +} | 
| + | 
| +type IndexDefinitionSortableSlice []IndexDefinitionSortable | 
| + | 
| +func (s IndexDefinitionSortableSlice) Len() int      { return len(s) } | 
| +func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } | 
| +func (s IndexDefinitionSortableSlice) Less(i, j int) bool { | 
| +	// prefer smaller indicies, and indicies which have fewer suffix indexes. | 
| +	// | 
| +	// After that, it doesn't matter. Note that this Less implementation is only | 
| +	// for quick sorts, and NOT for equality comparison (e.g. !(x < y) && !(y < x) | 
| +	// does NOT imply x == y). | 
| +	return s[i].numRows < s[j].numRows && s[i].suffixIdx < s[j].suffixIdx | 
| +} | 
| + | 
| +// maybeAddDefinition possibly adds a new IndexDefinitionSortable to this slice. | 
| +// It's only added if it could be useful in servicing q, otherwise this function | 
| +// is a noop. | 
| +func (idxs *IndexDefinitionSortableSlice) maybeAddDefinition(q *reducedQuery, s *memStore, missingTerms map[string]struct{}, id *ds.IndexDefinition) { | 
| +	// Kindless queries are handled elsewhere. | 
| +	if id.Kind != q.kind { | 
| +		return | 
| +	} | 
| + | 
| +	// If we're an ancestor query, and the index is compound, but doesn't include | 
| +	// an Ancestor field, it doesn't work. Builtin indicies can be used for | 
| +	// ancestor queries (and have !Ancestor), assuming that it's only equality | 
| +	// filters (plus inequality on __key__), or a single inequality. | 
| +	if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() { | 
| +		return | 
| +	} | 
| + | 
| +	// add __ancestor__, __key__, if necessary | 
| +	sortBy := id.NormalizeOrder() | 
| + | 
| +	// If the index has fewer fields than we need for the suffix, it can't | 
| +	// possibly help. | 
| +	if len(sortBy) < len(q.suffixFormat) { | 
| +		return | 
| +	} | 
| + | 
| +	// See if we actually have this index for the correct namespace | 
| +	coll := s.GetCollection("idx:" + q.ns + ":" + string(serialize.ToBytes(*id))) | 
| +	if coll == nil { | 
| +		return | 
| +	} | 
| + | 
| +	suffixIdx := len(sortBy) - len(q.suffixFormat) | 
| +	// make sure the orders are precisely the same | 
| +	for i, sb := range sortBy[suffixIdx:] { | 
| +		if q.suffixFormat[i] != sb { | 
| +			return | 
| +		} | 
| +	} | 
| + | 
| +	// Make sure the equalities section doesn't contain any properties we don't | 
| +	// want in our query. | 
| +	for _, p := range sortBy[:suffixIdx] { | 
| +		if _, ok := q.eqFilters[p.Property]; !ok { | 
| +			return | 
| +		} | 
| +	} | 
| + | 
| +	// ok, we can actually use this | 
| +	numRows, _ := coll.GetTotals() | 
| +	toAdd := IndexDefinitionSortable{ | 
| +		numRows: numRows, suffixIdx: suffixIdx, coll: coll} | 
| +	toAdd.eqFilts = sortBy[:suffixIdx] | 
| +	for _, sb := range toAdd.eqFilts { | 
| +		delete(missingTerms, sb.Property) | 
| +	} | 
| +	*idxs = append(*idxs, toAdd) | 
| +} | 
| + | 
| +// getRelevantIndicies retrieves the relevant indices which could be used to | 
| +// service q. It returns nil if it's not possible to service q with the current | 
| +// indicies. | 
| +func getRelevantIndicies(q *reducedQuery, s *memStore) (IndexDefinitionSortableSlice, error) { | 
| +	missingTerms := map[string]struct{}{} | 
| +	for k := range q.eqFilters { | 
| +		if k == "__ancestor__" { | 
| +			// ancestor is not a prefix which can be satisfied by a single index. It | 
| +			// must be satisfied by ALL indices (and has special logic for this in | 
| +			// the addDefinition logic) | 
| +			continue | 
| +		} | 
| +		missingTerms[k] = struct{}{} | 
| +	} | 
| +	idxs := IndexDefinitionSortableSlice{} | 
| + | 
| +	// First we add builtins | 
| +	// add | 
| +	//   idx:KIND | 
| +	idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ | 
| +		Kind: q.kind, | 
| +	}) | 
| + | 
| +	// add | 
| +	//   idx:KIND:prop | 
| +	//   idx:KIND:-prop | 
| +	props := map[string]struct{}{} | 
| +	for prop := range q.eqFilters { | 
| +		props[prop] = struct{}{} | 
| +	} | 
| +	for _, col := range q.suffixFormat[:len(q.suffixFormat)-1] { | 
| +		props[col.Property] = struct{}{} | 
| +	} | 
| +	for prop := range props { | 
| +		idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ | 
| +			Kind: q.kind, | 
| +			SortBy: []ds.IndexColumn{ | 
| +				{Property: prop}, | 
| +			}, | 
| +		}) | 
| +		idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ | 
| +			Kind: q.kind, | 
| +			SortBy: []ds.IndexColumn{ | 
| +				{Property: prop, Direction: ds.DESCENDING}, | 
| +			}, | 
| +		}) | 
| +	} | 
| + | 
| +	// Try adding all compound indicies | 
| +	idxCol := s.GetCollection("idx") | 
| +	if idxCol != nil { | 
| +		idxCol.VisitItemsAscend(ds.IndexComplexQueryPrefix(), false, func(i *gkvlite.Item) bool { | 
| +			id, err := serialize.ReadIndexDefinition(bytes.NewBuffer(i.Key)) | 
| +			memoryCorruption(err) | 
| + | 
| +			idxs.maybeAddDefinition(q, s, missingTerms, &id) | 
| +			return true | 
| +		}) | 
| +	} | 
| + | 
| +	// this query is impossible to fulfil with the current indicies. Not all the | 
| +	// terms (equality + projection) are satisfied. | 
| +	if len(missingTerms) < 0 || len(idxs) == 0 { | 
| +		// TODO(riannucci): return error when index requires missing composite | 
| +		// indicies. We can even calculate the maximum missing index which would | 
| +		// satisfy the remainder of the query! | 
| +		remains := &ds.IndexDefinition{ | 
| +			Kind:     q.kind, | 
| +			Ancestor: q.eqFilters["__ancestor__"] != nil, | 
| +		} | 
| +		terms := make([]string, 0, len(missingTerms)) | 
| +		for mt := range missingTerms { | 
| +			terms = append(terms, mt) | 
| +		} | 
| +		if serializationDeterministic { | 
| +			sort.Strings(terms) | 
| +		} | 
| +		for _, term := range terms { | 
| +			remains.SortBy = append(remains.SortBy, ds.IndexColumn{Property: term}) | 
| +		} | 
| +		remains.SortBy = append(remains.SortBy, q.suffixFormat...) | 
| +		last := remains.SortBy[len(remains.SortBy)-1] | 
| +		if last.Direction == ds.ASCENDING { | 
| +			// this is implied | 
| +			remains.SortBy = remains.SortBy[:len(remains.SortBy)-1] | 
| +		} | 
| +		if remains.Builtin() { | 
| +			// don't recommend that they add a builtin query... just have the query be | 
| +			// empty. | 
| +			return nil, nil | 
| +		} | 
| +		return nil, fmt.Errorf( | 
| +			"Your indicies are insufficient! Try adding:\n  %s", remains) | 
| +	} | 
| + | 
| +	return idxs, nil | 
| +} | 
| + | 
| +// peel picks a constraint value for the property. It then removes this value | 
| +// from constraints (possibly removing the entire row from constraints if it | 
| +// was the last value). If the value wasn't available in constraints, it picks | 
| +// the value from residuals. | 
| +func peel(prop string, constraints map[string][][]byte, residuals map[string][]byte) []byte { | 
| +	ret := []byte(nil) | 
| +	if vals, ok := constraints[prop]; ok { | 
| +		ret = vals[0] | 
| +		if len(vals) == 1 { | 
| +			delete(constraints, prop) | 
| +		} else { | 
| +			constraints[prop] = vals[1:] | 
| +		} | 
| +	} else { | 
| +		ret = residuals[prop] | 
| +	} | 
| +	return ret | 
| +} | 
| + | 
| +// generate generates a single iterDefinition for the given index. | 
| +func generate(q *reducedQuery, idx IndexDefinitionSortable, constraints map[string][][]byte, residuals map[string][]byte) *iterDefinition { | 
| +	def := &iterDefinition{ | 
| +		c:     idx.coll, | 
| +		start: q.start, | 
| +		end:   q.end, | 
| +	} | 
| +	toJoin := [][]byte{} | 
| +	for _, sb := range idx.eqFilts { | 
| +		val := peel(sb.Property, constraints, residuals) | 
| +		if sb.Direction == ds.DESCENDING { | 
| +			val = invert(val) | 
| +		} | 
| +		toJoin = append(toJoin, val) | 
| +	} | 
| +	def.prefix = bjoin(toJoin...) | 
| +	def.prefixLen = len(def.prefix) | 
| + | 
| +	if q.eqFilters["__ancestor__"] != nil && !idx.has("__ancestor__") { | 
| +		// The query requires an ancestor, but the index doesn't explicitly have it | 
| +		// as part of the prefix (otherwise it would have been the first eqFilt | 
| +		// above). This happens when it's a builtin index, or if it's the primary | 
| +		// index (for a kindless query), or if it's the Kind index (for a filterless | 
| +		// query). | 
| +		if len(q.suffixFormat) != 1 && q.suffixFormat[0].Property != "__key__" { | 
| +			// This should never happen. One of the previous validators would have | 
| +			// selected a different index. But just in case. | 
| +			impossible(fmt.Errorf("cannot supply an implicit ancestor for %#v", q)) | 
| +		} | 
| + | 
| +		// chop the terminal null byte off the q.ancestor key... we can accept | 
| +		// anything which is a descendant or an exact match. | 
| + | 
| +		// This silly construction gets the __ancestor__ value, because it's a | 
| +		// map[string]struct{} instead of a [][]byte{} (otherwise we'd just get | 
| +		// the value at the 0th index). | 
| +		anc := "" | 
| +		for k := range q.eqFilters["__ancestor__"] { | 
| +			anc = k | 
| +			break | 
| +		} | 
| + | 
| +		// Intentionally do NOT update prefixLen. This allows multiIterator to | 
| +		// correctly include the entire key in the shared iterator suffix, instead | 
| +		// of just the remainder. | 
| +		chopped := []byte(anc[:len(anc)-1]) | 
| +		if q.suffixFormat[0].Direction == ds.DESCENDING { | 
| +			chopped = invert(chopped) | 
| +		} | 
| +		def.prefix = bjoin(def.prefix, chopped) | 
| + | 
| +		// Update start and end, since we know that if they contain anything, they | 
| +		// contain values for the __key__ field. | 
| +		if def.start != nil { | 
| +			if !bytes.HasPrefix(def.start, chopped) { | 
| +				// again, shouldn't happen, but if it does, we want to know about it. | 
| +				impossible(fmt.Errorf( | 
| +					"start suffix for implied ancestor doesn't start with ancestor! start:%v ancestor:%v", | 
| +					def.start, chopped)) | 
| +			} | 
| +			def.start = def.start[:len(chopped)] | 
| +		} | 
| +		if def.end != nil { | 
| +			if !bytes.HasPrefix(def.end, chopped) { | 
| +				impossible(fmt.Errorf( | 
| +					"end suffix for implied ancestor doesn't start with ancestor! start:%v ancestor:%v", | 
| +					def.end, chopped)) | 
| +			} | 
| +			def.end = def.end[:len(chopped)] | 
| +		} | 
| +	} | 
| + | 
| +	return def | 
| +} | 
| + | 
| +// calculateConstraints produces a mapping of all equality filters to the values | 
| +// that they're constrained to. It also calculates residuals, which are an | 
| +// arbitrary value for filling index prefixes which have more equality fields | 
| +// than are necessary. The value doesn't matter, as long as its an equality | 
| +// constraint in the original query. | 
| +func calculateConstraints(q *reducedQuery) (constraints map[string][][]byte, residuals map[string][]byte) { | 
| +	residuals = make(map[string][]byte, len(q.eqFilters)) | 
| +	constraints = make(map[string][][]byte, len(q.eqFilters)) | 
| +	for prop, vals := range q.eqFilters { | 
| +		bvals := make([][]byte, 0, len(vals)) | 
| +		for val := range vals { | 
| +			bvals = append(bvals, []byte(val)) | 
| +		} | 
| +		residuals[prop] = bvals[0] | 
| +		if prop == "__ancestor__" { | 
| +			// exclude __ancestor__ from the constraints. | 
| +			// | 
| +			// This is because it's handled specially during index proposal and | 
| +			// generation. Ancestor is used by ALL indices, and so its residual value | 
| +			// above will be sufficient. | 
| +			continue | 
| +		} | 
| +		constraints[prop] = bvals | 
| +	} | 
| +	return | 
| +} | 
| + | 
| +// getIndicies returns a set of iterator definitions. Iterating over these | 
| +// will result in matching suffixes. | 
| +func getIndicies(q *reducedQuery, s *memStore) ([]*iterDefinition, error) { | 
| +	relevantIdxs := IndexDefinitionSortableSlice(nil) | 
| +	if q.kind == "" { | 
| +		if coll := s.GetCollection("ents:" + q.ns); coll != nil { | 
| +			relevantIdxs = IndexDefinitionSortableSlice{{coll: coll}} | 
| +		} | 
| +	} else { | 
| +		err := error(nil) | 
| +		relevantIdxs, err = getRelevantIndicies(q, s) | 
| +		if err != nil { | 
| +			return nil, err | 
| +		} | 
| +	} | 
| +	if len(relevantIdxs) == 0 { | 
| +		return nil, errQueryDone | 
| +	} | 
| +	sort.Sort(sort.Reverse(relevantIdxs)) | 
| + | 
| +	constraints, residuals := calculateConstraints(q) | 
| + | 
| +	ret := []*iterDefinition{} | 
| +	for len(constraints) > 0 || len(ret) == 0 { | 
| +		lenBefore := len(ret) | 
| +		for i := len(relevantIdxs) - 1; i >= 0; i-- { | 
| +			// see if this index is helpful for the remaining constraints at all | 
| +			// | 
| +			// We pick the the first index, always. This makes it so that queries with | 
| +			// NO filters hit this at the minimum. | 
| +			useful := len(ret) == 0 | 
| +			if !useful { | 
| +				for _, col := range relevantIdxs[i].eqFilts { | 
| +					if _, ok := constraints[col.Property]; ok || len(ret) == 0 { | 
| +						// it allows us to make progress on at least one constraint! Yay! | 
| +						useful = true | 
| +						break | 
| +					} | 
| +				} | 
| +			} | 
| +			if useful { | 
| +				ret = append(ret, generate(q, relevantIdxs[i], constraints, residuals)) | 
| +			} else { | 
| +				// a useless index will never become useful, because we never add more | 
| +				// constraints. Nuke it. | 
| +				relevantIdxs = append(relevantIdxs[:i], relevantIdxs[i+1:]...) | 
| +			} | 
| +		} | 
| +		if lenBefore == len(ret) { | 
| +			// something is really wrong here... if relevantIdxs is !nil, then we | 
| +			// should always be able to make progress in this loop. | 
| +			impossible(fmt.Errorf("deadlock: cannot fulfil query?")) | 
| +		} | 
| +	} | 
| + | 
| +	return ret, nil | 
| +} | 
|  |