Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(560)

Side by Side Diff: impl/memory/datastore_index_selection.go

Issue 1302813003: impl/memory: Implement Queries (Closed) Base URL: https://github.com/luci/gae.git@add_multi_iterator
Patch Set: Baby's first query! Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 package memory
6
7 import (
8 "bytes"
9 "fmt"
10 "sort"
11
12 ds "github.com/luci/gae/service/datastore"
13 "github.com/luci/gae/service/datastore/serialize"
14 "github.com/luci/gkvlite"
15 )
16
17 // reducedQuery contains only the pieces of the query necessary to iteratate for
dnj (Google) 2015/08/23 06:50:06 ITERATATE!
iannucci 2015/08/23 18:19:42 Done.
18 // results.
19 // deduplication is applied externally
20 // projection / keysonly / entity retrieval is done externally
21 type reducedQuery struct {
22 ns string
23 kind string
24
25 // eqFilters indicate the set of all prefix constraints which need to be
26 // fulfilled in the composite query. All of these will translate into pr efix
27 // bytes for SOME index.
28 eqFilters map[string]map[string]struct{}
29
30 // suffixFormat is the PRECISE listing of the suffix columns that ALL in dexes
31 // in the multi query will have.
32 //
33 // suffixFormat ALWAYS includes the inequality filter (if any) as the 0t h
34 // element
35 // suffixFormat ALWAYS includes any additional projections (in ascending
36 // order) after all user defined sort orders
37 // suffixFormat ALWAYS has __key__ as the last column
38 suffixFormat []ds.IndexColumn
39
40 // limits of the inequality and/or full sort order. This is ONLY a suffi x,
41 // and it will be appended to the prefix during iteration.
42 start []byte
43 end []byte
44 }
45
46 type IndexDefinitionSortable struct {
47 numRows uint64
48 suffixIdx int
49
50 // eqFilts is the list of ACTUAL prefix columns. Note that it may contai n
51 // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becu ase
52 // (tag=1, tag=2) is a perfectly valid query).
53 eqFilts []ds.IndexColumn
54 coll *memCollection
55 }
56
57 func (i *IndexDefinitionSortable) has(property string) bool {
58 for _, col := range i.eqFilts {
59 if col.Property == property {
60 return true
61 }
62 if property == "__ancestor__" {
63 // ancestor can only be the first column, so abort early
64 break
65 }
66 }
67 return false
68 }
69
70 type IndexDefinitionSortableSlice []IndexDefinitionSortable
71
72 func (s IndexDefinitionSortableSlice) Len() int { return len(s) }
73 func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
74 func (s IndexDefinitionSortableSlice) Less(i, j int) bool {
75 // prefer smaller indicies, and indicies which have fewer suffix indexes .
76 //
77 // After that, it doesn't matter. Note that this Less implementation is only
78 // for quick sorts, and NOT for equality comparison (e.g. !(x < y) && !( y < x)
79 // does NOT imply x == y).
80 return s[i].numRows < s[j].numRows && s[i].suffixIdx < s[j].suffixIdx
81 }
82
83 // maybeAddDefinition possibly adds a new IndexDefinitionSortable to this slice.
84 // It's only added if it could be useful in servicing q, otherwise this function
85 // is a noop.
86 func (idxs *IndexDefinitionSortableSlice) maybeAddDefinition(q *reducedQuery, s *memStore, missingTerms map[string]struct{}, id *ds.IndexDefinition) {
87 // Kindless queries are handled elsewhere.
88 if id.Kind != q.kind {
89 return
90 }
91
92 // If we're an ancestor query, and the index is compound, but doesn't in clude
93 // an Ancestor field, it doesn't work. Builtin indicies can be used for
94 // ancestor queries (and have !Ancestor), assuming that it's only equali ty
95 // filters (plus inequality on __key__), or a single inequality.
96 if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() {
97 return
98 }
99
100 // add __ancestor__, __key__, if necessary
101 sortBy := id.NormalizeOrder()
102
103 // If the index has fewer fields than we need for the suffix, it can't
104 // possibly help.
105 if len(sortBy) < len(q.suffixFormat) {
106 return
107 }
108
109 // See if we actually have this index for the correct namespace
110 coll := s.GetCollection("idx:" + q.ns + ":" + string(serialize.ToBytes(* id)))
111 if coll == nil {
112 return
113 }
114
115 suffixIdx := len(sortBy) - len(q.suffixFormat)
116 // make sure the orders are precisely the same
117 for i, sb := range sortBy[suffixIdx:] {
118 if q.suffixFormat[i] != sb {
119 return
120 }
121 }
122
123 // Make sure the equalities section doesn't contain any properties we do n't
124 // want in our query.
125 for _, p := range sortBy[:suffixIdx] {
126 if _, ok := q.eqFilters[p.Property]; !ok {
127 return
128 }
129 }
130
131 // ok, we can actually use this
132 numRows, _ := coll.GetTotals()
133 toAdd := IndexDefinitionSortable{
134 numRows: numRows, suffixIdx: suffixIdx, coll: coll}
135 toAdd.eqFilts = sortBy[:suffixIdx]
136 for _, sb := range toAdd.eqFilts {
137 delete(missingTerms, sb.Property)
138 }
139 *idxs = append(*idxs, toAdd)
140 }
141
142 // getRelevantIndicies retrieves the relevant indices which could be used to
143 // service q. It returns nil if it's not possible to service q with the current
144 // indicies.
145 func getRelevantIndicies(q *reducedQuery, s *memStore) (IndexDefinitionSortableS lice, error) {
146 missingTerms := map[string]struct{}{}
147 for k := range q.eqFilters {
148 if k == "__ancestor__" {
149 // ancestor is not a prefix which can be satisfied by a single index. It
150 // must be satisfied by ALL indices (and has special log ic for this in
151 // the addDefinition logic)
152 continue
153 }
154 missingTerms[k] = struct{}{}
155 }
156 idxs := IndexDefinitionSortableSlice{}
157
158 // add builtins
159 idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{
160 Kind: q.kind,
161 })
162 for prop := range q.eqFilters {
163 idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{
164 Kind: q.kind,
165 SortBy: []ds.IndexColumn{
166 {Property: prop},
167 },
168 })
169 idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{
170 Kind: q.kind,
171 SortBy: []ds.IndexColumn{
172 {Property: prop, Direction: ds.DESCENDING},
173 },
174 })
175 }
176
177 // Try adding all compound indicies
178 idxCol := s.GetCollection("idx")
179 if idxCol != nil {
180 idxCol.VisitItemsAscend(ds.IndexComplexQueryPrefix(), false, fun c(i *gkvlite.Item) bool {
181 id, err := serialize.ReadIndexDefinition(bytes.NewBuffer (i.Key))
182 memoryCorruption(err)
183
184 idxs.maybeAddDefinition(q, s, missingTerms, &id)
185 return true
186 })
187 }
188
189 // this query is impossible to fulfil with the current indicies. Not all the
190 // terms (equality + projection) are satisfied.
191 if len(missingTerms) < 0 || len(idxs) == 0 {
192 // TODO(riannucci): return error when index requires missing com posite
193 // indicies. We can even calculate the maximum missing index whi ch would
194 // satisfy the remainder of the query!
195 remains := &ds.IndexDefinition{
196 Kind: q.kind,
197 Ancestor: q.eqFilters["__ancestor__"] != nil,
198 }
199 terms := make([]string, 0, len(missingTerms))
200 for mt := range missingTerms {
201 terms = append(terms, mt)
202 }
203 if serializationDeterministic {
204 sort.Strings(terms)
205 }
206 for _, term := range terms {
207 remains.SortBy = append(remains.SortBy, ds.IndexColumn{P roperty: term})
208 }
209 remains.SortBy = append(remains.SortBy, q.suffixFormat...)
210 last := remains.SortBy[len(remains.SortBy)-1]
211 if last.Direction == ds.ASCENDING {
212 // this is implied
213 remains.SortBy = remains.SortBy[:len(remains.SortBy)-1]
214 }
215 if remains.Builtin() {
216 // don't recommend that they add a builtin query... just have the query be
217 // empty.
218 return nil, nil
219 }
220 return nil, fmt.Errorf(
221 "Your indicies are insufficient! Try adding:\n %s", rem ains)
222 }
223
224 return idxs, nil
225 }
226
227 // peel picks a constraint value for the property. It then removes this value
228 // from constraints (possibly removing the entire row from constraints if it
229 // was the last value). If the value wasn't available in constraints, it picks
230 // the value from residuals.
231 func peel(prop string, constraints map[string][][]byte, residuals map[string][]b yte) []byte {
232 ret := []byte(nil)
233 if vals, ok := constraints[prop]; ok {
234 ret = vals[0]
235 if len(vals) == 1 {
236 delete(constraints, prop)
237 } else {
238 constraints[prop] = vals[1:]
239 }
240 } else {
241 ret = residuals[prop]
242 }
243 return ret
244 }
245
246 // generate generates a single iterDefinition for the given index.
247 func generate(q *reducedQuery, idx IndexDefinitionSortable, constraints map[stri ng][][]byte, residuals map[string][]byte) *iterDefinition {
248 def := &iterDefinition{
249 c: idx.coll,
250 start: q.start,
251 end: q.end,
252 }
253 toJoin := [][]byte{}
254 for _, sb := range idx.eqFilts {
255 val := peel(sb.Property, constraints, residuals)
256 if sb.Direction == ds.DESCENDING {
257 val = invert(val)
258 }
259 toJoin = append(toJoin, val)
260 }
261 def.prefix = bjoin(toJoin...)
262 def.prefixLen = len(def.prefix)
263
264 if q.eqFilters["__ancestor__"] != nil && !idx.has("__ancestor__") {
265 // The query requires an ancestor, but the index doesn't explici tly have it
266 // as part of the prefix (otherwise it would have been the first eqFilt
267 // above). This happens when it's a builtin index, or if it's th e primary
268 // index (for a kindless query), or if it's the Kind index (for a filterless
269 // query).
270 if len(q.suffixFormat) != 1 && q.suffixFormat[0].Property != "__ key__" {
271 // This should never happen. One of the previous validat ors would have
272 // selected a different index. But just in case.
273 impossible(fmt.Errorf("cannot supply an implicit ancesto r for %#v", q))
274 }
275
276 // chop the terminal null byte off the q.ancestor key... we can accept
277 // anything which is a descendant or an exact match.
278
279 // This silly construction gets the __ancestor__ value, because it's a
280 // map[string]struct{} instead of a [][]byte{} (otherwise we'd j ust get
281 // the value at the 0th index).
282 anc := ""
283 for k := range q.eqFilters["__ancestor__"] {
284 anc = k
285 break
286 }
287
288 // Intentionally do NOT update prefixLen. This allows multiItera tor to
289 // correctly include the entire key in the shared iterator suffi x, instead
290 // of just the remainder.
291 chopped := []byte(anc[:len(anc)-1])
292 if q.suffixFormat[0].Direction == ds.DESCENDING {
293 chopped = invert(chopped)
294 }
295 def.prefix = bjoin(def.prefix, chopped)
296
297 // Update start and end, since we know that if they contain anyt hing, they
298 // contain values for the __key__ field.
299 if def.start != nil {
300 if !bytes.HasPrefix(def.start, chopped) {
301 // again, shouldn't happen, but if it does, we w ant to know about it.
302 impossible(fmt.Errorf(
303 "start suffix for implied ancestor doesn 't start with ancestor! start:%v ancestor:%v",
304 def.start, chopped))
305 }
306 def.start = def.start[:len(chopped)]
307 }
308 if def.end != nil {
309 if !bytes.HasPrefix(def.end, chopped) {
310 impossible(fmt.Errorf(
311 "end suffix for implied ancestor doesn't start with ancestor! start:%v ancestor:%v",
312 def.end, chopped))
313 }
314 def.end = def.end[:len(chopped)]
315 }
316 }
317
318 return def
319 }
320
321 // calculateConstraints produces a mapping of all equality filters to the values
322 // that they're constrained to. It also calculates residuals, which are an
323 // arbitrary value for filling index prefixes which have more equality fields
324 // than are necessary. The value doesn't matter, as long as its an equality
325 // constraint in the original query.
326 func calculateConstraints(q *reducedQuery) (constraints map[string][][]byte, res iduals map[string][]byte) {
327 residuals = make(map[string][]byte, len(q.eqFilters))
328 constraints = make(map[string][][]byte, len(q.eqFilters))
329 for prop, vals := range q.eqFilters {
330 bvals := make([][]byte, 0, len(vals))
331 for val := range vals {
332 bvals = append(bvals, []byte(val))
333 }
334 residuals[prop] = bvals[0]
335 if prop == "__ancestor__" {
336 // exclude __ancestor__ from the constraints.
337 //
338 // This is because it's handled specially during index p roposal and
339 // generation. Ancestor is used by ALL indices, and so i ts residual value
340 // above will be sufficient.
341 continue
342 }
343 constraints[prop] = bvals
344 }
345 return
346 }
347
348 // getIndicies returns a set of iterator definitions. Iterating over these
349 // will result in matching suffixes.
350 func getIndicies(q *reducedQuery, s *memStore) ([]*iterDefinition, error) {
351 relevantIdxs := IndexDefinitionSortableSlice(nil)
352 if q.kind == "" {
353 if coll := s.GetCollection("ents:" + q.ns); coll != nil {
354 relevantIdxs = IndexDefinitionSortableSlice{{coll: coll} }
355 }
356 } else {
357 err := error(nil)
358 relevantIdxs, err = getRelevantIndicies(q, s)
359 if err != nil {
360 return nil, err
361 }
362 }
363 if len(relevantIdxs) == 0 {
364 return nil, errQueryDone
365 }
366 sort.Sort(sort.Reverse(relevantIdxs))
367
368 constraints, residuals := calculateConstraints(q)
369
370 ret := []*iterDefinition{}
371 for len(constraints) > 0 || len(ret) == 0 {
372 lenBefore := len(ret)
373 for i := len(relevantIdxs) - 1; i >= 0; i-- {
374 // see if this index is helpful for the remaining constr aints at all
375 //
376 // We pick the the first index, always. This makes it so that filters with
377 // NO filters hit this at the minimum.
378 useful := len(ret) == 0
379 if !useful {
380 for _, col := range relevantIdxs[i].eqFilts {
381 if _, ok := constraints[col.Property]; o k || len(ret) == 0 {
382 // it allows us to make progress on at least one constraint! Yay!
383 useful = true
384 break
385 }
386 }
387 }
388 if useful {
389 ret = append(ret, generate(q, relevantIdxs[i], c onstraints, residuals))
390 } else {
391 // a useless index will never become useful, bec ause we never add more
392 // constraints. Nuke it.
393 relevantIdxs = append(relevantIdxs[:i], relevant Idxs[i+1:]...)
394 }
395 }
396 if lenBefore == len(ret) {
397 // something is really wrong here... if relevantIdxs is !nil, then we
398 // should always be able to make progress in this loop.
399 impossible(fmt.Errorf("deadlock: cannot fulfil query?"))
400 }
401 }
402
403 return ret, nil
404 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698