OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 package memory | |
6 | |
7 import ( | |
8 "bytes" | |
9 "fmt" | |
10 "sort" | |
11 | |
12 ds "github.com/luci/gae/service/datastore" | |
13 "github.com/luci/gae/service/datastore/serialize" | |
14 "github.com/luci/gkvlite" | |
15 ) | |
16 | |
17 // reducedQuery contains only the pieces of the query necessary to iteratate for | |
dnj (Google)
2015/08/23 06:50:06
ITERATATE!
iannucci
2015/08/23 18:19:42
Done.
| |
18 // results. | |
19 // deduplication is applied externally | |
20 // projection / keysonly / entity retrieval is done externally | |
21 type reducedQuery struct { | |
22 ns string | |
23 kind string | |
24 | |
25 // eqFilters indicate the set of all prefix constraints which need to be | |
26 // fulfilled in the composite query. All of these will translate into pr efix | |
27 // bytes for SOME index. | |
28 eqFilters map[string]map[string]struct{} | |
29 | |
30 // suffixFormat is the PRECISE listing of the suffix columns that ALL in dexes | |
31 // in the multi query will have. | |
32 // | |
33 // suffixFormat ALWAYS includes the inequality filter (if any) as the 0t h | |
34 // element | |
35 // suffixFormat ALWAYS includes any additional projections (in ascending | |
36 // order) after all user defined sort orders | |
37 // suffixFormat ALWAYS has __key__ as the last column | |
38 suffixFormat []ds.IndexColumn | |
39 | |
40 // limits of the inequality and/or full sort order. This is ONLY a suffi x, | |
41 // and it will be appended to the prefix during iteration. | |
42 start []byte | |
43 end []byte | |
44 } | |
45 | |
46 type IndexDefinitionSortable struct { | |
47 numRows uint64 | |
48 suffixIdx int | |
49 | |
50 // eqFilts is the list of ACTUAL prefix columns. Note that it may contai n | |
51 // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becu ase | |
52 // (tag=1, tag=2) is a perfectly valid query). | |
53 eqFilts []ds.IndexColumn | |
54 coll *memCollection | |
55 } | |
56 | |
57 func (i *IndexDefinitionSortable) has(property string) bool { | |
58 for _, col := range i.eqFilts { | |
59 if col.Property == property { | |
60 return true | |
61 } | |
62 if property == "__ancestor__" { | |
63 // ancestor can only be the first column, so abort early | |
64 break | |
65 } | |
66 } | |
67 return false | |
68 } | |
69 | |
70 type IndexDefinitionSortableSlice []IndexDefinitionSortable | |
71 | |
72 func (s IndexDefinitionSortableSlice) Len() int { return len(s) } | |
73 func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } | |
74 func (s IndexDefinitionSortableSlice) Less(i, j int) bool { | |
75 // prefer smaller indicies, and indicies which have fewer suffix indexes . | |
76 // | |
77 // After that, it doesn't matter. Note that this Less implementation is only | |
78 // for quick sorts, and NOT for equality comparison (e.g. !(x < y) && !( y < x) | |
79 // does NOT imply x == y). | |
80 return s[i].numRows < s[j].numRows && s[i].suffixIdx < s[j].suffixIdx | |
81 } | |
82 | |
83 // maybeAddDefinition possibly adds a new IndexDefinitionSortable to this slice. | |
84 // It's only added if it could be useful in servicing q, otherwise this function | |
85 // is a noop. | |
86 func (idxs *IndexDefinitionSortableSlice) maybeAddDefinition(q *reducedQuery, s *memStore, missingTerms map[string]struct{}, id *ds.IndexDefinition) { | |
87 // Kindless queries are handled elsewhere. | |
88 if id.Kind != q.kind { | |
89 return | |
90 } | |
91 | |
92 // If we're an ancestor query, and the index is compound, but doesn't in clude | |
93 // an Ancestor field, it doesn't work. Builtin indicies can be used for | |
94 // ancestor queries (and have !Ancestor), assuming that it's only equali ty | |
95 // filters (plus inequality on __key__), or a single inequality. | |
96 if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() { | |
97 return | |
98 } | |
99 | |
100 // add __ancestor__, __key__, if necessary | |
101 sortBy := id.NormalizeOrder() | |
102 | |
103 // If the index has fewer fields than we need for the suffix, it can't | |
104 // possibly help. | |
105 if len(sortBy) < len(q.suffixFormat) { | |
106 return | |
107 } | |
108 | |
109 // See if we actually have this index for the correct namespace | |
110 coll := s.GetCollection("idx:" + q.ns + ":" + string(serialize.ToBytes(* id))) | |
111 if coll == nil { | |
112 return | |
113 } | |
114 | |
115 suffixIdx := len(sortBy) - len(q.suffixFormat) | |
116 // make sure the orders are precisely the same | |
117 for i, sb := range sortBy[suffixIdx:] { | |
118 if q.suffixFormat[i] != sb { | |
119 return | |
120 } | |
121 } | |
122 | |
123 // Make sure the equalities section doesn't contain any properties we do n't | |
124 // want in our query. | |
125 for _, p := range sortBy[:suffixIdx] { | |
126 if _, ok := q.eqFilters[p.Property]; !ok { | |
127 return | |
128 } | |
129 } | |
130 | |
131 // ok, we can actually use this | |
132 numRows, _ := coll.GetTotals() | |
133 toAdd := IndexDefinitionSortable{ | |
134 numRows: numRows, suffixIdx: suffixIdx, coll: coll} | |
135 toAdd.eqFilts = sortBy[:suffixIdx] | |
136 for _, sb := range toAdd.eqFilts { | |
137 delete(missingTerms, sb.Property) | |
138 } | |
139 *idxs = append(*idxs, toAdd) | |
140 } | |
141 | |
142 // getRelevantIndicies retrieves the relevant indices which could be used to | |
143 // service q. It returns nil if it's not possible to service q with the current | |
144 // indicies. | |
145 func getRelevantIndicies(q *reducedQuery, s *memStore) (IndexDefinitionSortableS lice, error) { | |
146 missingTerms := map[string]struct{}{} | |
147 for k := range q.eqFilters { | |
148 if k == "__ancestor__" { | |
149 // ancestor is not a prefix which can be satisfied by a single index. It | |
150 // must be satisfied by ALL indices (and has special log ic for this in | |
151 // the addDefinition logic) | |
152 continue | |
153 } | |
154 missingTerms[k] = struct{}{} | |
155 } | |
156 idxs := IndexDefinitionSortableSlice{} | |
157 | |
158 // add builtins | |
159 idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ | |
160 Kind: q.kind, | |
161 }) | |
162 for prop := range q.eqFilters { | |
163 idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ | |
164 Kind: q.kind, | |
165 SortBy: []ds.IndexColumn{ | |
166 {Property: prop}, | |
167 }, | |
168 }) | |
169 idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ | |
170 Kind: q.kind, | |
171 SortBy: []ds.IndexColumn{ | |
172 {Property: prop, Direction: ds.DESCENDING}, | |
173 }, | |
174 }) | |
175 } | |
176 | |
177 // Try adding all compound indicies | |
178 idxCol := s.GetCollection("idx") | |
179 if idxCol != nil { | |
180 idxCol.VisitItemsAscend(ds.IndexComplexQueryPrefix(), false, fun c(i *gkvlite.Item) bool { | |
181 id, err := serialize.ReadIndexDefinition(bytes.NewBuffer (i.Key)) | |
182 memoryCorruption(err) | |
183 | |
184 idxs.maybeAddDefinition(q, s, missingTerms, &id) | |
185 return true | |
186 }) | |
187 } | |
188 | |
189 // this query is impossible to fulfil with the current indicies. Not all the | |
190 // terms (equality + projection) are satisfied. | |
191 if len(missingTerms) < 0 || len(idxs) == 0 { | |
192 // TODO(riannucci): return error when index requires missing com posite | |
193 // indicies. We can even calculate the maximum missing index whi ch would | |
194 // satisfy the remainder of the query! | |
195 remains := &ds.IndexDefinition{ | |
196 Kind: q.kind, | |
197 Ancestor: q.eqFilters["__ancestor__"] != nil, | |
198 } | |
199 terms := make([]string, 0, len(missingTerms)) | |
200 for mt := range missingTerms { | |
201 terms = append(terms, mt) | |
202 } | |
203 if serializationDeterministic { | |
204 sort.Strings(terms) | |
205 } | |
206 for _, term := range terms { | |
207 remains.SortBy = append(remains.SortBy, ds.IndexColumn{P roperty: term}) | |
208 } | |
209 remains.SortBy = append(remains.SortBy, q.suffixFormat...) | |
210 last := remains.SortBy[len(remains.SortBy)-1] | |
211 if last.Direction == ds.ASCENDING { | |
212 // this is implied | |
213 remains.SortBy = remains.SortBy[:len(remains.SortBy)-1] | |
214 } | |
215 if remains.Builtin() { | |
216 // don't recommend that they add a builtin query... just have the query be | |
217 // empty. | |
218 return nil, nil | |
219 } | |
220 return nil, fmt.Errorf( | |
221 "Your indicies are insufficient! Try adding:\n %s", rem ains) | |
222 } | |
223 | |
224 return idxs, nil | |
225 } | |
226 | |
227 // peel picks a constraint value for the property. It then removes this value | |
228 // from constraints (possibly removing the entire row from constraints if it | |
229 // was the last value). If the value wasn't available in constraints, it picks | |
230 // the value from residuals. | |
231 func peel(prop string, constraints map[string][][]byte, residuals map[string][]b yte) []byte { | |
232 ret := []byte(nil) | |
233 if vals, ok := constraints[prop]; ok { | |
234 ret = vals[0] | |
235 if len(vals) == 1 { | |
236 delete(constraints, prop) | |
237 } else { | |
238 constraints[prop] = vals[1:] | |
239 } | |
240 } else { | |
241 ret = residuals[prop] | |
242 } | |
243 return ret | |
244 } | |
245 | |
246 // generate generates a single iterDefinition for the given index. | |
247 func generate(q *reducedQuery, idx IndexDefinitionSortable, constraints map[stri ng][][]byte, residuals map[string][]byte) *iterDefinition { | |
248 def := &iterDefinition{ | |
249 c: idx.coll, | |
250 start: q.start, | |
251 end: q.end, | |
252 } | |
253 toJoin := [][]byte{} | |
254 for _, sb := range idx.eqFilts { | |
255 val := peel(sb.Property, constraints, residuals) | |
256 if sb.Direction == ds.DESCENDING { | |
257 val = invert(val) | |
258 } | |
259 toJoin = append(toJoin, val) | |
260 } | |
261 def.prefix = bjoin(toJoin...) | |
262 def.prefixLen = len(def.prefix) | |
263 | |
264 if q.eqFilters["__ancestor__"] != nil && !idx.has("__ancestor__") { | |
265 // The query requires an ancestor, but the index doesn't explici tly have it | |
266 // as part of the prefix (otherwise it would have been the first eqFilt | |
267 // above). This happens when it's a builtin index, or if it's th e primary | |
268 // index (for a kindless query), or if it's the Kind index (for a filterless | |
269 // query). | |
270 if len(q.suffixFormat) != 1 && q.suffixFormat[0].Property != "__ key__" { | |
271 // This should never happen. One of the previous validat ors would have | |
272 // selected a different index. But just in case. | |
273 impossible(fmt.Errorf("cannot supply an implicit ancesto r for %#v", q)) | |
274 } | |
275 | |
276 // chop the terminal null byte off the q.ancestor key... we can accept | |
277 // anything which is a descendant or an exact match. | |
278 | |
279 // This silly construction gets the __ancestor__ value, because it's a | |
280 // map[string]struct{} instead of a [][]byte{} (otherwise we'd j ust get | |
281 // the value at the 0th index). | |
282 anc := "" | |
283 for k := range q.eqFilters["__ancestor__"] { | |
284 anc = k | |
285 break | |
286 } | |
287 | |
288 // Intentionally do NOT update prefixLen. This allows multiItera tor to | |
289 // correctly include the entire key in the shared iterator suffi x, instead | |
290 // of just the remainder. | |
291 chopped := []byte(anc[:len(anc)-1]) | |
292 if q.suffixFormat[0].Direction == ds.DESCENDING { | |
293 chopped = invert(chopped) | |
294 } | |
295 def.prefix = bjoin(def.prefix, chopped) | |
296 | |
297 // Update start and end, since we know that if they contain anyt hing, they | |
298 // contain values for the __key__ field. | |
299 if def.start != nil { | |
300 if !bytes.HasPrefix(def.start, chopped) { | |
301 // again, shouldn't happen, but if it does, we w ant to know about it. | |
302 impossible(fmt.Errorf( | |
303 "start suffix for implied ancestor doesn 't start with ancestor! start:%v ancestor:%v", | |
304 def.start, chopped)) | |
305 } | |
306 def.start = def.start[:len(chopped)] | |
307 } | |
308 if def.end != nil { | |
309 if !bytes.HasPrefix(def.end, chopped) { | |
310 impossible(fmt.Errorf( | |
311 "end suffix for implied ancestor doesn't start with ancestor! start:%v ancestor:%v", | |
312 def.end, chopped)) | |
313 } | |
314 def.end = def.end[:len(chopped)] | |
315 } | |
316 } | |
317 | |
318 return def | |
319 } | |
320 | |
321 // calculateConstraints produces a mapping of all equality filters to the values | |
322 // that they're constrained to. It also calculates residuals, which are an | |
323 // arbitrary value for filling index prefixes which have more equality fields | |
324 // than are necessary. The value doesn't matter, as long as its an equality | |
325 // constraint in the original query. | |
326 func calculateConstraints(q *reducedQuery) (constraints map[string][][]byte, res iduals map[string][]byte) { | |
327 residuals = make(map[string][]byte, len(q.eqFilters)) | |
328 constraints = make(map[string][][]byte, len(q.eqFilters)) | |
329 for prop, vals := range q.eqFilters { | |
330 bvals := make([][]byte, 0, len(vals)) | |
331 for val := range vals { | |
332 bvals = append(bvals, []byte(val)) | |
333 } | |
334 residuals[prop] = bvals[0] | |
335 if prop == "__ancestor__" { | |
336 // exclude __ancestor__ from the constraints. | |
337 // | |
338 // This is because it's handled specially during index p roposal and | |
339 // generation. Ancestor is used by ALL indices, and so i ts residual value | |
340 // above will be sufficient. | |
341 continue | |
342 } | |
343 constraints[prop] = bvals | |
344 } | |
345 return | |
346 } | |
347 | |
348 // getIndicies returns a set of iterator definitions. Iterating over these | |
349 // will result in matching suffixes. | |
350 func getIndicies(q *reducedQuery, s *memStore) ([]*iterDefinition, error) { | |
351 relevantIdxs := IndexDefinitionSortableSlice(nil) | |
352 if q.kind == "" { | |
353 if coll := s.GetCollection("ents:" + q.ns); coll != nil { | |
354 relevantIdxs = IndexDefinitionSortableSlice{{coll: coll} } | |
355 } | |
356 } else { | |
357 err := error(nil) | |
358 relevantIdxs, err = getRelevantIndicies(q, s) | |
359 if err != nil { | |
360 return nil, err | |
361 } | |
362 } | |
363 if len(relevantIdxs) == 0 { | |
364 return nil, errQueryDone | |
365 } | |
366 sort.Sort(sort.Reverse(relevantIdxs)) | |
367 | |
368 constraints, residuals := calculateConstraints(q) | |
369 | |
370 ret := []*iterDefinition{} | |
371 for len(constraints) > 0 || len(ret) == 0 { | |
372 lenBefore := len(ret) | |
373 for i := len(relevantIdxs) - 1; i >= 0; i-- { | |
374 // see if this index is helpful for the remaining constr aints at all | |
375 // | |
376 // We pick the the first index, always. This makes it so that filters with | |
377 // NO filters hit this at the minimum. | |
378 useful := len(ret) == 0 | |
379 if !useful { | |
380 for _, col := range relevantIdxs[i].eqFilts { | |
381 if _, ok := constraints[col.Property]; o k || len(ret) == 0 { | |
382 // it allows us to make progress on at least one constraint! Yay! | |
383 useful = true | |
384 break | |
385 } | |
386 } | |
387 } | |
388 if useful { | |
389 ret = append(ret, generate(q, relevantIdxs[i], c onstraints, residuals)) | |
390 } else { | |
391 // a useless index will never become useful, bec ause we never add more | |
392 // constraints. Nuke it. | |
393 relevantIdxs = append(relevantIdxs[:i], relevant Idxs[i+1:]...) | |
394 } | |
395 } | |
396 if lenBefore == len(ret) { | |
397 // something is really wrong here... if relevantIdxs is !nil, then we | |
398 // should always be able to make progress in this loop. | |
399 impossible(fmt.Errorf("deadlock: cannot fulfil query?")) | |
400 } | |
401 } | |
402 | |
403 return ret, nil | |
404 } | |
OLD | NEW |