Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 package memory | |
| 6 | |
| 7 import ( | |
| 8 "bytes" | |
| 9 "fmt" | |
| 10 "sort" | |
| 11 | |
| 12 ds "github.com/luci/gae/service/datastore" | |
| 13 "github.com/luci/gae/service/datastore/serialize" | |
| 14 "github.com/luci/gkvlite" | |
| 15 ) | |
| 16 | |
| 17 // reducedQuery contains only the pieces of the query necessary to iteratate for | |
|
dnj (Google)
2015/08/23 06:50:06
ITERATATE!
iannucci
2015/08/23 18:19:42
Done.
| |
| 18 // results. | |
| 19 // deduplication is applied externally | |
| 20 // projection / keysonly / entity retrieval is done externally | |
| 21 type reducedQuery struct { | |
| 22 ns string | |
| 23 kind string | |
| 24 | |
| 25 // eqFilters indicate the set of all prefix constraints which need to be | |
| 26 // fulfilled in the composite query. All of these will translate into pr efix | |
| 27 // bytes for SOME index. | |
| 28 eqFilters map[string]map[string]struct{} | |
| 29 | |
| 30 // suffixFormat is the PRECISE listing of the suffix columns that ALL in dexes | |
| 31 // in the multi query will have. | |
| 32 // | |
| 33 // suffixFormat ALWAYS includes the inequality filter (if any) as the 0t h | |
| 34 // element | |
| 35 // suffixFormat ALWAYS includes any additional projections (in ascending | |
| 36 // order) after all user defined sort orders | |
| 37 // suffixFormat ALWAYS has __key__ as the last column | |
| 38 suffixFormat []ds.IndexColumn | |
| 39 | |
| 40 // limits of the inequality and/or full sort order. This is ONLY a suffi x, | |
| 41 // and it will be appended to the prefix during iteration. | |
| 42 start []byte | |
| 43 end []byte | |
| 44 } | |
| 45 | |
| 46 type IndexDefinitionSortable struct { | |
| 47 numRows uint64 | |
| 48 suffixIdx int | |
| 49 | |
| 50 // eqFilts is the list of ACTUAL prefix columns. Note that it may contai n | |
| 51 // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becu ase | |
| 52 // (tag=1, tag=2) is a perfectly valid query). | |
| 53 eqFilts []ds.IndexColumn | |
| 54 coll *memCollection | |
| 55 } | |
| 56 | |
| 57 func (i *IndexDefinitionSortable) has(property string) bool { | |
| 58 for _, col := range i.eqFilts { | |
| 59 if col.Property == property { | |
| 60 return true | |
| 61 } | |
| 62 if property == "__ancestor__" { | |
| 63 // ancestor can only be the first column, so abort early | |
| 64 break | |
| 65 } | |
| 66 } | |
| 67 return false | |
| 68 } | |
| 69 | |
| 70 type IndexDefinitionSortableSlice []IndexDefinitionSortable | |
| 71 | |
| 72 func (s IndexDefinitionSortableSlice) Len() int { return len(s) } | |
| 73 func (s IndexDefinitionSortableSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } | |
| 74 func (s IndexDefinitionSortableSlice) Less(i, j int) bool { | |
| 75 // prefer smaller indicies, and indicies which have fewer suffix indexes . | |
| 76 // | |
| 77 // After that, it doesn't matter. Note that this Less implementation is only | |
| 78 // for quick sorts, and NOT for equality comparison (e.g. !(x < y) && !( y < x) | |
| 79 // does NOT imply x == y). | |
| 80 return s[i].numRows < s[j].numRows && s[i].suffixIdx < s[j].suffixIdx | |
| 81 } | |
| 82 | |
| 83 // maybeAddDefinition possibly adds a new IndexDefinitionSortable to this slice. | |
| 84 // It's only added if it could be useful in servicing q, otherwise this function | |
| 85 // is a noop. | |
| 86 func (idxs *IndexDefinitionSortableSlice) maybeAddDefinition(q *reducedQuery, s *memStore, missingTerms map[string]struct{}, id *ds.IndexDefinition) { | |
| 87 // Kindless queries are handled elsewhere. | |
| 88 if id.Kind != q.kind { | |
| 89 return | |
| 90 } | |
| 91 | |
| 92 // If we're an ancestor query, and the index is compound, but doesn't in clude | |
| 93 // an Ancestor field, it doesn't work. Builtin indicies can be used for | |
| 94 // ancestor queries (and have !Ancestor), assuming that it's only equali ty | |
| 95 // filters (plus inequality on __key__), or a single inequality. | |
| 96 if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() { | |
| 97 return | |
| 98 } | |
| 99 | |
| 100 // add __ancestor__, __key__, if necessary | |
| 101 sortBy := id.NormalizeOrder() | |
| 102 | |
| 103 // If the index has fewer fields than we need for the suffix, it can't | |
| 104 // possibly help. | |
| 105 if len(sortBy) < len(q.suffixFormat) { | |
| 106 return | |
| 107 } | |
| 108 | |
| 109 // See if we actually have this index for the correct namespace | |
| 110 coll := s.GetCollection("idx:" + q.ns + ":" + string(serialize.ToBytes(* id))) | |
| 111 if coll == nil { | |
| 112 return | |
| 113 } | |
| 114 | |
| 115 suffixIdx := len(sortBy) - len(q.suffixFormat) | |
| 116 // make sure the orders are precisely the same | |
| 117 for i, sb := range sortBy[suffixIdx:] { | |
| 118 if q.suffixFormat[i] != sb { | |
| 119 return | |
| 120 } | |
| 121 } | |
| 122 | |
| 123 // Make sure the equalities section doesn't contain any properties we do n't | |
| 124 // want in our query. | |
| 125 for _, p := range sortBy[:suffixIdx] { | |
| 126 if _, ok := q.eqFilters[p.Property]; !ok { | |
| 127 return | |
| 128 } | |
| 129 } | |
| 130 | |
| 131 // ok, we can actually use this | |
| 132 numRows, _ := coll.GetTotals() | |
| 133 toAdd := IndexDefinitionSortable{ | |
| 134 numRows: numRows, suffixIdx: suffixIdx, coll: coll} | |
| 135 toAdd.eqFilts = sortBy[:suffixIdx] | |
| 136 for _, sb := range toAdd.eqFilts { | |
| 137 delete(missingTerms, sb.Property) | |
| 138 } | |
| 139 *idxs = append(*idxs, toAdd) | |
| 140 } | |
| 141 | |
| 142 // getRelevantIndicies retrieves the relevant indices which could be used to | |
| 143 // service q. It returns nil if it's not possible to service q with the current | |
| 144 // indicies. | |
| 145 func getRelevantIndicies(q *reducedQuery, s *memStore) (IndexDefinitionSortableS lice, error) { | |
| 146 missingTerms := map[string]struct{}{} | |
| 147 for k := range q.eqFilters { | |
| 148 if k == "__ancestor__" { | |
| 149 // ancestor is not a prefix which can be satisfied by a single index. It | |
| 150 // must be satisfied by ALL indices (and has special log ic for this in | |
| 151 // the addDefinition logic) | |
| 152 continue | |
| 153 } | |
| 154 missingTerms[k] = struct{}{} | |
| 155 } | |
| 156 idxs := IndexDefinitionSortableSlice{} | |
| 157 | |
| 158 // add builtins | |
| 159 idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ | |
| 160 Kind: q.kind, | |
| 161 }) | |
| 162 for prop := range q.eqFilters { | |
| 163 idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ | |
| 164 Kind: q.kind, | |
| 165 SortBy: []ds.IndexColumn{ | |
| 166 {Property: prop}, | |
| 167 }, | |
| 168 }) | |
| 169 idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ | |
| 170 Kind: q.kind, | |
| 171 SortBy: []ds.IndexColumn{ | |
| 172 {Property: prop, Direction: ds.DESCENDING}, | |
| 173 }, | |
| 174 }) | |
| 175 } | |
| 176 | |
| 177 // Try adding all compound indicies | |
| 178 idxCol := s.GetCollection("idx") | |
| 179 if idxCol != nil { | |
| 180 idxCol.VisitItemsAscend(ds.IndexComplexQueryPrefix(), false, fun c(i *gkvlite.Item) bool { | |
| 181 id, err := serialize.ReadIndexDefinition(bytes.NewBuffer (i.Key)) | |
| 182 memoryCorruption(err) | |
| 183 | |
| 184 idxs.maybeAddDefinition(q, s, missingTerms, &id) | |
| 185 return true | |
| 186 }) | |
| 187 } | |
| 188 | |
| 189 // this query is impossible to fulfil with the current indicies. Not all the | |
| 190 // terms (equality + projection) are satisfied. | |
| 191 if len(missingTerms) < 0 || len(idxs) == 0 { | |
| 192 // TODO(riannucci): return error when index requires missing com posite | |
| 193 // indicies. We can even calculate the maximum missing index whi ch would | |
| 194 // satisfy the remainder of the query! | |
| 195 remains := &ds.IndexDefinition{ | |
| 196 Kind: q.kind, | |
| 197 Ancestor: q.eqFilters["__ancestor__"] != nil, | |
| 198 } | |
| 199 terms := make([]string, 0, len(missingTerms)) | |
| 200 for mt := range missingTerms { | |
| 201 terms = append(terms, mt) | |
| 202 } | |
| 203 if serializationDeterministic { | |
| 204 sort.Strings(terms) | |
| 205 } | |
| 206 for _, term := range terms { | |
| 207 remains.SortBy = append(remains.SortBy, ds.IndexColumn{P roperty: term}) | |
| 208 } | |
| 209 remains.SortBy = append(remains.SortBy, q.suffixFormat...) | |
| 210 last := remains.SortBy[len(remains.SortBy)-1] | |
| 211 if last.Direction == ds.ASCENDING { | |
| 212 // this is implied | |
| 213 remains.SortBy = remains.SortBy[:len(remains.SortBy)-1] | |
| 214 } | |
| 215 if remains.Builtin() { | |
| 216 // don't recommend that they add a builtin query... just have the query be | |
| 217 // empty. | |
| 218 return nil, nil | |
| 219 } | |
| 220 return nil, fmt.Errorf( | |
| 221 "Your indicies are insufficient! Try adding:\n %s", rem ains) | |
| 222 } | |
| 223 | |
| 224 return idxs, nil | |
| 225 } | |
| 226 | |
| 227 // peel picks a constraint value for the property. It then removes this value | |
| 228 // from constraints (possibly removing the entire row from constraints if it | |
| 229 // was the last value). If the value wasn't available in constraints, it picks | |
| 230 // the value from residuals. | |
| 231 func peel(prop string, constraints map[string][][]byte, residuals map[string][]b yte) []byte { | |
| 232 ret := []byte(nil) | |
| 233 if vals, ok := constraints[prop]; ok { | |
| 234 ret = vals[0] | |
| 235 if len(vals) == 1 { | |
| 236 delete(constraints, prop) | |
| 237 } else { | |
| 238 constraints[prop] = vals[1:] | |
| 239 } | |
| 240 } else { | |
| 241 ret = residuals[prop] | |
| 242 } | |
| 243 return ret | |
| 244 } | |
| 245 | |
| 246 // generate generates a single iterDefinition for the given index. | |
| 247 func generate(q *reducedQuery, idx IndexDefinitionSortable, constraints map[stri ng][][]byte, residuals map[string][]byte) *iterDefinition { | |
| 248 def := &iterDefinition{ | |
| 249 c: idx.coll, | |
| 250 start: q.start, | |
| 251 end: q.end, | |
| 252 } | |
| 253 toJoin := [][]byte{} | |
| 254 for _, sb := range idx.eqFilts { | |
| 255 val := peel(sb.Property, constraints, residuals) | |
| 256 if sb.Direction == ds.DESCENDING { | |
| 257 val = invert(val) | |
| 258 } | |
| 259 toJoin = append(toJoin, val) | |
| 260 } | |
| 261 def.prefix = bjoin(toJoin...) | |
| 262 def.prefixLen = len(def.prefix) | |
| 263 | |
| 264 if q.eqFilters["__ancestor__"] != nil && !idx.has("__ancestor__") { | |
| 265 // The query requires an ancestor, but the index doesn't explici tly have it | |
| 266 // as part of the prefix (otherwise it would have been the first eqFilt | |
| 267 // above). This happens when it's a builtin index, or if it's th e primary | |
| 268 // index (for a kindless query), or if it's the Kind index (for a filterless | |
| 269 // query). | |
| 270 if len(q.suffixFormat) != 1 && q.suffixFormat[0].Property != "__ key__" { | |
| 271 // This should never happen. One of the previous validat ors would have | |
| 272 // selected a different index. But just in case. | |
| 273 impossible(fmt.Errorf("cannot supply an implicit ancesto r for %#v", q)) | |
| 274 } | |
| 275 | |
| 276 // chop the terminal null byte off the q.ancestor key... we can accept | |
| 277 // anything which is a descendant or an exact match. | |
| 278 | |
| 279 // This silly construction gets the __ancestor__ value, because it's a | |
| 280 // map[string]struct{} instead of a [][]byte{} (otherwise we'd j ust get | |
| 281 // the value at the 0th index). | |
| 282 anc := "" | |
| 283 for k := range q.eqFilters["__ancestor__"] { | |
| 284 anc = k | |
| 285 break | |
| 286 } | |
| 287 | |
| 288 // Intentionally do NOT update prefixLen. This allows multiItera tor to | |
| 289 // correctly include the entire key in the shared iterator suffi x, instead | |
| 290 // of just the remainder. | |
| 291 chopped := []byte(anc[:len(anc)-1]) | |
| 292 if q.suffixFormat[0].Direction == ds.DESCENDING { | |
| 293 chopped = invert(chopped) | |
| 294 } | |
| 295 def.prefix = bjoin(def.prefix, chopped) | |
| 296 | |
| 297 // Update start and end, since we know that if they contain anyt hing, they | |
| 298 // contain values for the __key__ field. | |
| 299 if def.start != nil { | |
| 300 if !bytes.HasPrefix(def.start, chopped) { | |
| 301 // again, shouldn't happen, but if it does, we w ant to know about it. | |
| 302 impossible(fmt.Errorf( | |
| 303 "start suffix for implied ancestor doesn 't start with ancestor! start:%v ancestor:%v", | |
| 304 def.start, chopped)) | |
| 305 } | |
| 306 def.start = def.start[:len(chopped)] | |
| 307 } | |
| 308 if def.end != nil { | |
| 309 if !bytes.HasPrefix(def.end, chopped) { | |
| 310 impossible(fmt.Errorf( | |
| 311 "end suffix for implied ancestor doesn't start with ancestor! start:%v ancestor:%v", | |
| 312 def.end, chopped)) | |
| 313 } | |
| 314 def.end = def.end[:len(chopped)] | |
| 315 } | |
| 316 } | |
| 317 | |
| 318 return def | |
| 319 } | |
| 320 | |
| 321 // calculateConstraints produces a mapping of all equality filters to the values | |
| 322 // that they're constrained to. It also calculates residuals, which are an | |
| 323 // arbitrary value for filling index prefixes which have more equality fields | |
| 324 // than are necessary. The value doesn't matter, as long as its an equality | |
| 325 // constraint in the original query. | |
| 326 func calculateConstraints(q *reducedQuery) (constraints map[string][][]byte, res iduals map[string][]byte) { | |
| 327 residuals = make(map[string][]byte, len(q.eqFilters)) | |
| 328 constraints = make(map[string][][]byte, len(q.eqFilters)) | |
| 329 for prop, vals := range q.eqFilters { | |
| 330 bvals := make([][]byte, 0, len(vals)) | |
| 331 for val := range vals { | |
| 332 bvals = append(bvals, []byte(val)) | |
| 333 } | |
| 334 residuals[prop] = bvals[0] | |
| 335 if prop == "__ancestor__" { | |
| 336 // exclude __ancestor__ from the constraints. | |
| 337 // | |
| 338 // This is because it's handled specially during index p roposal and | |
| 339 // generation. Ancestor is used by ALL indices, and so i ts residual value | |
| 340 // above will be sufficient. | |
| 341 continue | |
| 342 } | |
| 343 constraints[prop] = bvals | |
| 344 } | |
| 345 return | |
| 346 } | |
| 347 | |
| 348 // getIndicies returns a set of iterator definitions. Iterating over these | |
| 349 // will result in matching suffixes. | |
| 350 func getIndicies(q *reducedQuery, s *memStore) ([]*iterDefinition, error) { | |
| 351 relevantIdxs := IndexDefinitionSortableSlice(nil) | |
| 352 if q.kind == "" { | |
| 353 if coll := s.GetCollection("ents:" + q.ns); coll != nil { | |
| 354 relevantIdxs = IndexDefinitionSortableSlice{{coll: coll} } | |
| 355 } | |
| 356 } else { | |
| 357 err := error(nil) | |
| 358 relevantIdxs, err = getRelevantIndicies(q, s) | |
| 359 if err != nil { | |
| 360 return nil, err | |
| 361 } | |
| 362 } | |
| 363 if len(relevantIdxs) == 0 { | |
| 364 return nil, errQueryDone | |
| 365 } | |
| 366 sort.Sort(sort.Reverse(relevantIdxs)) | |
| 367 | |
| 368 constraints, residuals := calculateConstraints(q) | |
| 369 | |
| 370 ret := []*iterDefinition{} | |
| 371 for len(constraints) > 0 || len(ret) == 0 { | |
| 372 lenBefore := len(ret) | |
| 373 for i := len(relevantIdxs) - 1; i >= 0; i-- { | |
| 374 // see if this index is helpful for the remaining constr aints at all | |
| 375 // | |
| 376 // We pick the the first index, always. This makes it so that filters with | |
| 377 // NO filters hit this at the minimum. | |
| 378 useful := len(ret) == 0 | |
| 379 if !useful { | |
| 380 for _, col := range relevantIdxs[i].eqFilts { | |
| 381 if _, ok := constraints[col.Property]; o k || len(ret) == 0 { | |
| 382 // it allows us to make progress on at least one constraint! Yay! | |
| 383 useful = true | |
| 384 break | |
| 385 } | |
| 386 } | |
| 387 } | |
| 388 if useful { | |
| 389 ret = append(ret, generate(q, relevantIdxs[i], c onstraints, residuals)) | |
| 390 } else { | |
| 391 // a useless index will never become useful, bec ause we never add more | |
| 392 // constraints. Nuke it. | |
| 393 relevantIdxs = append(relevantIdxs[:i], relevant Idxs[i+1:]...) | |
| 394 } | |
| 395 } | |
| 396 if lenBefore == len(ret) { | |
| 397 // something is really wrong here... if relevantIdxs is !nil, then we | |
| 398 // should always be able to make progress in this loop. | |
| 399 impossible(fmt.Errorf("deadlock: cannot fulfil query?")) | |
| 400 } | |
| 401 } | |
| 402 | |
| 403 return ret, nil | |
| 404 } | |
| OLD | NEW |