filter/txnBuf/state.go - Issue 1309803004: Add transaction buffer filter.

Side by Side Diff: filter/txnBuf/state.go

Issue 1309803004: Add transaction buffer filter. (Closed) Base URL: https://github.com/luci/gae.git@add_query_support

Patch Set: add err for too many roots Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2015 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 package txnBuf

	6

	7 import (

	8 "bytes"

	9 "sync"

	10

	11 "github.com/luci/gae/impl/memory"

	12 "github.com/luci/gae/service/datastore"

	13 "github.com/luci/gae/service/datastore/serialize"

	14 "github.com/luci/gae/service/info"

	15 "github.com/luci/luci-go/common/errors"

	16 "github.com/luci/luci-go/common/stringset"

	17 "golang.org/x/net/context"

	18 )

	19

	20 // DefaultSizeBudget is the size budget for the root transaction.

	21 //

	22 // Because our estimation algorithm isn't entirely correct, we take 5% off

	23 // the limit for encoding and estimate inaccuracies.

	24 //

	25 // 10MB taken on 2015/09/24:

	26 // https://cloud.google.com/appengine/docs/go/datastore/#Go_Quotas_and_limits

	27 const DefaultSizeBudget = int64((10 * 1000 * 1000) * 0.95)

	28

	29 // DefaultSizeThreshold prevents the root transaction from getting too close

	30 // to the budget. If the code attempts to begin a transaction which would have

	31 // less than this threshold for its budget, the transaction will immediately

	32 // return ErrTransactionTooLarge.

	33 const DefaultSizeThreshold = int64(10 * 1000)

	34

	35 // XGTransactionGroupLimit is the number of transaction groups to allow in an

	36 // XG transaction.

	37 //

	38 // 25 taken on 2015/09/24:

	39 // https://cloud.google.com/appengine/docs/go/datastore/transactions#Go_What_can _be_done_in_a_transaction

	40 const XGTransactionGroupLimit = 25

	41

	42 // sizeTracker tracks the size of a buffered transaction. The rules are simple:

	43 // * deletes count for the size of their key, but 0 data

	44 // * puts count for the size of their key plus the 'EstimateSize' for their

	45 // data.

	46 type sizeTracker struct {

	47 keyToSize map[string]int64

	48 total int64

	49 }

	50

	51 // set states that the given key is being set to an entity with the size `val`.

	52 // A val of 0 means "I'm deleting this key"

	53 func (s *sizeTracker) set(key string, val int64) {

	54 if s.keyToSize == nil {

	55 s.keyToSize = make(map[string]int64)

	56 }

	57 prev, existed := s.keyToSize[key]

	58 s.keyToSize[key] = val

	59 s.total += val - prev

	60 if !existed {

	61 s.total += int64(len(key))

	62 }

	63 }

	64

	65 // get returns the currently tracked size for key, and wheter or not the key

	66 // has any tracked value.

	67 func (s *sizeTracker) get(key string) (int64, bool) {

	68 size, has := s.keyToSize[key]

	69 return size, has

	70 }

	71

	72 // has returns true iff key has a tracked value.

	73 func (s *sizeTracker) has(key string) bool {

	74 _, has := s.keyToSize[key]

	75 return has

	76 }

	77

	78 // dup returns a duplicate sizeTracker.

	79 func (s sizeTracker) dup() sizeTracker {

	80 if len(s.keyToSize) == 0 {

	81 return &sizeTracker{}

	82 }

	83 k2s := make(map[string]int64, len(s.keyToSize))

	84 for k, v := range s.keyToSize {

	85 k2s[k] = v

	86 }

	87 return &sizeTracker{k2s, s.total}

	88 }

	89

	90 type txnBufState struct {

	91 sync.Mutex

	92

	93 // encoded key -> size of entity. A size of 0 means that the entity is

	94 // deleted.

	95 entState *sizeTracker

	96 memDS datastore.RawInterface

	97

	98 roots stringset.Set

	99 rootLimit int

	100

	101 aid string

	102 ns string

	103 parentDS datastore.RawInterface

	104 parentState *txnBufState

	105

	106 // sizeBudget is the number of bytes that this transaction has to operat e

	107 // within. It's only used when attempting to apply() the transaction, an d

	108 // it is the threshold for the delta of applying this transaction to the

	109 // parent transaction. Note that a buffered transaction could actually h ave

	110 // a negative delta if the parent transaction had many large entities wh ich

	111 // the inner transaction deleted.

	112 sizeBudget int64

	113

	114 // siblingLock is to prevent two nested transactions from running at the same

	115 // time.

	116 //

	117 // Example:

	118 // RunInTransaction() { // root

	119 // RunInTransaction() // A

	120 // RunInTransaction() // B

	121 // }

	122 //

	123 // This will prevent A and B from running simulatneously.

	124 siblingLock sync.Mutex

	125 }

	126

	127 func withTxnBuf(ctx context.Context, cb func(context.Context) error, opts *datas tore.TransactionOptions) error {

	128 inf := info.Get(ctx)

	129 ns := inf.GetNamespace()

	130

	131 parentState, _ := ctx.Value(dsTxnBufParent).(*txnBufState)

	132 roots := stringset.New(0)

	133 rootLimit := 1

	134 if opts != nil && opts.XG {

	135 rootLimit = XGTransactionGroupLimit

	136 }

	137 sizeBudget := DefaultSizeBudget

	138 if parentState != nil {

	139 parentState.siblingLock.Lock()

	140 defer parentState.siblingLock.Unlock()

	141

	142 // TODO(riannucci): this is a bit wonky since it means that a ch ild

	143 // transaction declaring XG=true will only get to modify 25 grou ps IF

	144 // they're same groups affected by the parent transactions. So i nstead of

	145 // respecting opts.XG for inner transactions, we just dup everyt hing from

	146 // the parent transaction.

	147 roots = parentState.roots.Dup()

	148 rootLimit = parentState.rootLimit

	149

	150 sizeBudget = parentState.sizeBudget - parentState.entState.total

	151 if sizeBudget < DefaultSizeThreshold {

	152 return ErrTransactionTooLarge

	153 }

	154 }

	155

	156 memDS, err := memory.NewDatastore(inf.FullyQualifiedAppID(), ns)

	157 if err != nil {

	158 return err

	159 }

	160

	161 state := &txnBufState{

	162 entState: &sizeTracker{},

	163 memDS: memDS.Raw(),

	164 roots: roots,

	165 rootLimit: rootLimit,

	166 ns: ns,

	167 aid: inf.AppID(),

	168 parentDS: datastore.Get(ctx).Raw(),

	169 parentState: parentState,

	170 sizeBudget: sizeBudget,

	171 }

	172 err = cb(context.WithValue(ctx, dsTxnBufParent, state))
	dnj 2015/09/30 16:35:26 nit: if err := ...; err != nil { .. } nit: if err := ...; err != nil { .. } iannucci 2015/09/30 17:10:28 done Show quoted text On 2015/09/30 at 16:35:26, dnj wrote: > nit: if err := ...; err != nil { .. } done
	173 if err != nil {

	174 return err

	175 }

	176 return state.apply()

	177 }

	178

	179 // item is a temporary object for representing key/entity pairs and their cache

	180 // state (e.g. if they exist in the in-memory datastore buffer or not).

	181 // Additionally item memoizes some common comparison strings. item objects

	182 // should never be persisted outside of a single function/query context.
	dnj 2015/09/30 16:35:26 nit: must never nit: must never iannucci 2015/09/30 17:10:28 done Show quoted text On 2015/09/30 at 16:35:26, dnj wrote: > nit: must** never done
	183 type item struct {

	184 key *datastore.Key

	185 data datastore.PropertyMap

	186 buffered bool

	187

	188 encKey string

	189

	190 // cmpRow is used to hold the toComparableString value for this item dur ing

	191 // a query.

	192 cmpRow string

	193

	194 // err is a bit of a hack for passing back synchronized errors from

	195 // queryToIter.

	196 err error

	197 }

	198

	199 func (i *item) getEncKey() string {

	200 if i.encKey == "" {

	201 i.encKey = string(serialize.ToBytes(i.key))

	202 }

	203 return i.encKey

	204 }

	205

	206 func (i *item) getCmpRow(lower, upper []byte, order []datastore.IndexColumn) str ing {

	207 if i.cmpRow == "" {

	208 row, key := toComparableString(lower, upper, order, i.key, i.dat a)

	209 i.cmpRow = string(row)

	210 if i.encKey == "" {

	211 i.encKey = string(key)

	212 }

	213 }

	214 return i.cmpRow

	215 }

	216

	217 func (t *txnBufState) updateRootsLocked(roots stringset.Set) error {

	218 curRootLen := t.roots.Len()

	219 proposedRoots := stringset.New(1)

	220 roots.Iter(func(root string) bool {

	221 if !t.roots.Has(root) {

	222 proposedRoots.Add(root)

	223 }

	224 return proposedRoots.Len()+curRootLen <= t.rootLimit

	225 })

	226 if proposedRoots.Len()+curRootLen > t.rootLimit {

	227 return ErrTooManyRoots

	228 }

	229 // only need to update the roots if they did something that required upd ating

	230 if proposedRoots.Len() > 0 {

	231 proposedRoots.Iter(func(root string) bool {

	232 t.roots.Add(root)

	233 return true

	234 })

	235 }

	236 return nil

	237 }

	238

	239 func (t txnBufState) getMulti(keys []datastore.Key) ([]item, error) {

	240 encKeys, roots := toEncoded(keys)

	241 ret := make([]item, len(keys))

	242

	243 idxMap := []int(nil)

	244 toGetKeys := []*datastore.Key(nil)

	245

	246 t.Lock()

	247 defer t.Unlock()

	248

	249 if err := t.updateRootsLocked(roots); err != nil {

	250 return nil, err

	251 }

	252

	253 for i, key := range keys {

	254 ret[i].key = key

	255 ret[i].encKey = encKeys[i]

	256 if size, ok := t.entState.get(ret[i].getEncKey()); ok {

	257 ret[i].buffered = true

	258 if size > 0 {

	259 idxMap = append(idxMap, i)

	260 toGetKeys = append(toGetKeys, key)

	261 }

	262 }

	263 }

	264

	265 if len(toGetKeys) > 0 {

	266 j := 0

	267 t.memDS.GetMulti(toGetKeys, nil, func(pm datastore.PropertyMap, err error) {

	268 impossible(err)

	269 ret[idxMap[j]].data = pm

	270 j++

	271 })

	272 }

	273

	274 return ret, nil

	275 }

	276

	277 func (t txnBufState) deleteMulti(keys []datastore.Key) error {

	278 encKeys, roots := toEncoded(keys)

	279

	280 t.Lock()

	281 defer t.Unlock()

	282

	283 if err := t.updateRootsLocked(roots); err != nil {

	284 return err

	285 }

	286

	287 i := 0

	288 err := t.memDS.DeleteMulti(keys, func(err error) {

	289 impossible(err)

	290 t.entState.set(encKeys[i], 0)

	291 i++

	292 })

	293 impossible(err)

	294 return nil

	295 }

	296

	297 func (t txnBufState) putMulti(keys []datastore.Key, vals []datastore.PropertyM ap) error {

	298 encKeys, roots := toEncoded(keys)

	299

	300 t.Lock()

	301 defer t.Unlock()

	302

	303 if err := t.updateRootsLocked(roots); err != nil {

	304 return err

	305 }

	306

	307 i := 0

	308 err := t.memDS.PutMulti(keys, vals, func(k *datastore.Key, err error) {

	309 impossible(err)

	310 t.entState.set(encKeys[i], vals[i].EstimateSize())

	311 i++

	312 })

	313 impossible(err)

	314 return nil

	315 }

	316

	317 // apply actually takes the buffered transaction and applies it to the parent

	318 // transaction. It will only return an error if the underlying 'real' datastore

	319 // returns an error on PutMulti or DeleteMulti.

	320 func (t *txnBufState) apply() error {

	321 t.Lock()

	322 defer t.Unlock()

	323

	324 // if parentState is nil... just try to commit this anyway. The estimate s

	325 // we're using here are just educated guesses. If it fits for real, then

	326 // hooray. If not, then the underlying datastore will error.

	327 if t.parentState != nil {

	328 proposedState := t.parentState.entState.dup()
	dnj 2015/09/30 16:35:26 Need to lock parentState to dup this? Need to lock parentState to dup this? iannucci 2015/09/30 17:10:28 yep. done Show quoted text On 2015/09/30 at 16:35:26, dnj wrote: > Need to lock parentState to dup this? yep. done
	329 for k, v := range t.entState.keyToSize {

	330 proposedState.set(k, v)

	331 }

	332 if proposedState.total > t.sizeBudget {

	333 return ErrTransactionTooLarge

	334 }

	335 }

	336

	337 toPutKeys := []*datastore.Key(nil)

	338 toPut := []datastore.PropertyMap(nil)

	339 toDel := []*datastore.Key(nil)

	340

	341 // need to pull all items out of the in-memory datastore. Fortunately we have

	342 // kindless queries, and we disabled all the special entities, so just

	343 // run a kindless query without any filters and it will return all data

	344 // currently in memDS :).

	345 fq, err := datastore.NewQuery("").Finalize()

	346 impossible(err)

	347

	348 err = t.memDS.Run(fq, func(key *datastore.Key, data datastore.PropertyMa p, _ datastore.CursorCB) bool {

	349 toPutKeys = append(toPutKeys, key)

	350 toPut = append(toPut, data)

	351 return true

	352 })

	353 memoryCorruption(err)

	354

	355 for keyStr, size := range t.entState.keyToSize {

	356 if size == 0 {

	357 k, err := serialize.ReadKey(bytes.NewBufferString(keyStr ), serialize.WithoutContext, t.aid, t.ns)

	358 memoryCorruption(err)

	359 toDel = append(toDel, k)

	360 }

	361 }

	362

	363 wg := sync.WaitGroup{}
	dnj 2015/09/30 16:35:26 WDYT about using parallel.FanOutIn for this? WDYT about using parallel.FanOutIn for this? iannucci 2015/09/30 17:10:28 wow much better. y I do it rong? Show quoted text On 2015/09/30 at 16:35:26, dnj wrote: > WDYT about using parallel.FanOutIn for this? wow much better. y I do it rong?
	364

	365 pErr := error(nil)

	366 dErr := error(nil)

	367

	368 ds := t.parentDS

	369 if toPut != nil {
	dnj 2015/09/30 16:35:26 nit: len(toPut) > 0 nit: len(toPut) > 0 iannucci 2015/09/30 17:10:28 ya Show quoted text On 2015/09/30 at 16:35:26, dnj wrote: > nit: len(toPut) > 0 ya
	370 wg.Add(1)

	371 go func() {

	372 defer wg.Done()

	373 mErr := errors.NewLazyMultiError(len(toPut))

	374 i := 0

	375 pErr = ds.PutMulti(toPutKeys, toPut, func(_ *datastore.K ey, err error) {

	376 i++
	iannucci 2015/09/30 17:10:28 hey look! a boog! i is incremented before assigned hey look! a boog! i is incremented before assigned to the multierror!
	377 mErr.Assign(i, err)

	378 })

	379 pErr = mErr.Get()

	380 }()

	381 }

	382

	383 if toDel != nil {
	dnj 2015/09/30 16:35:26 nit: len(toDel) > 0 nit: len(toDel) > 0
	384 wg.Add(1)

	385 go func() {

	386 defer wg.Done()

	387 mErr := errors.NewLazyMultiError(len(toDel))

	388 i := 0

	389 dErr = ds.DeleteMulti(toDel, func(err error) {

	390 mErr.Assign(i, err)

	391 i++

	392 })

	393 dErr = mErr.Get()
	iannucci 2015/09/30 17:10:28 hey look! a boog! dErr is blindly overwritten! hey look! a boog! dErr is blindly overwritten!
	394 }()

	395 }

	396 wg.Wait()

	397

	398 if pErr != nil {

	399 return pErr

	400 }

	401 return dErr

	402 }

	403

	404 // toEncoded returns a list of all of the serialized versions of these keys,

	405 // plus a stringset of all the encoded root keys that.
	dnj 2015/09/30 16:35:26 nit: root keys that ... what? I must know! nit: root keys that ... what? I must know! iannucci 2015/09/30 17:10:28 doone Show quoted text On 2015/09/30 at 16:35:26, dnj wrote: > nit: root keys that ... what? I must know! doone
	406 func toEncoded(keys []*datastore.Key) (full []string, roots stringset.Set) {

	407 roots = stringset.New(len(keys))

	408 full = make([]string, len(keys))

	409 for i, k := range keys {

	410 roots.Add(string(serialize.ToBytes(k.Root())))

	411 full[i] = string(serialize.ToBytes(k))

	412 }

	413 return

	414 }

OLD	NEW

« filter/txnBuf/query_merger.go ('K') | « filter/txnBuf/query_merger.go ('k') | filter/txnBuf/txnbuf_test.go » ('j') | no next file with comments »