From b9e6389276010ddb17d29358d91e2aee39747de5 Mon Sep 17 00:00:00 2001 From: Dev Ojha Date: Sat, 28 Aug 2021 14:40:46 -0500 Subject: [PATCH 1/4] V0.42.9 with cache kv improvement (#24) * use memdb for cachekv * Remove remaining n^2 nature of CacheKV (Speedup dirtyItems) * Delete deadcode, fix lint Co-authored-by: mconcat --- store/cachekv/memiterator.go | 110 +++++++++-------------------------- store/cachekv/store.go | 104 +++++++++++++++++++++------------ 2 files changed, 95 insertions(+), 119 deletions(-) diff --git a/store/cachekv/memiterator.go b/store/cachekv/memiterator.go index b197ac141660..06a92d58d77e 100644 --- a/store/cachekv/memiterator.go +++ b/store/cachekv/memiterator.go @@ -1,107 +1,51 @@ package cachekv import ( - "errors" dbm "github.com/tendermint/tm-db" - "github.com/cosmos/cosmos-sdk/types/kv" + "github.com/cosmos/cosmos-sdk/store/types" ) // Iterates over iterKVCache items. // if key is nil, means it was deleted. // Implements Iterator. type memIterator struct { - start, end []byte - items []*kv.Pair - ascending bool -} - -func newMemIterator(start, end []byte, items *kv.List, ascending bool) *memIterator { - itemsInDomain := make([]*kv.Pair, 0, items.Len()) - - var entered bool - - for e := items.Front(); e != nil; e = e.Next() { - item := e.Value - if !dbm.IsKeyInDomain(item.Key, start, end) { - if entered { - break - } - - continue - } - - itemsInDomain = append(itemsInDomain, item) - entered = true - } - - return &memIterator{ - start: start, - end: end, - items: itemsInDomain, - ascending: ascending, - } -} - -func (mi *memIterator) Domain() ([]byte, []byte) { - return mi.start, mi.end -} + types.Iterator -func (mi *memIterator) Valid() bool { - return len(mi.items) > 0 + deleted map[string]struct{} } -func (mi *memIterator) assertValid() { - if err := mi.Error(); err != nil { - panic(err) - } -} +func newMemIterator(start, end []byte, items *dbm.MemDB, deleted map[string]struct{}, ascending bool) *memIterator { + var iter types.Iterator + var err error -func (mi *memIterator) Next() { - mi.assertValid() + if ascending { + iter, err = items.Iterator(start, end) + } else { + iter, err = items.ReverseIterator(start, end) + } - if mi.ascending { - mi.items = mi.items[1:] - } else { - mi.items = mi.items[:len(mi.items)-1] - } -} + if err != nil { + panic(err) + } -func (mi *memIterator) Key() []byte { - mi.assertValid() + newDeleted := make(map[string]struct{}) + for k, v := range deleted { + newDeleted[k]=v + } - if mi.ascending { - return mi.items[0].Key - } + return &memIterator { + Iterator: iter, - return mi.items[len(mi.items)-1].Key + deleted: newDeleted, + } } func (mi *memIterator) Value() []byte { - mi.assertValid() - - if mi.ascending { - return mi.items[0].Value - } - - return mi.items[len(mi.items)-1].Value -} - -func (mi *memIterator) Close() error { - mi.start = nil - mi.end = nil - mi.items = nil - - return nil -} - -// Error returns an error if the memIterator is invalid defined by the Valid -// method. -func (mi *memIterator) Error() error { - if !mi.Valid() { - return errors.New("invalid memIterator") - } - - return nil + key := mi.Iterator.Key() + if _, ok := mi.deleted[string(key)]; ok { + return nil + } + return mi.Iterator.Value() } diff --git a/store/cachekv/store.go b/store/cachekv/store.go index f302d1873081..d8643a80c7e0 100644 --- a/store/cachekv/store.go +++ b/store/cachekv/store.go @@ -3,9 +3,11 @@ package cachekv import ( "bytes" "io" + "reflect" "sort" "sync" "time" + "unsafe" dbm "github.com/tendermint/tm-db" @@ -20,17 +22,17 @@ import ( // If value is nil but deleted is false, it means the parent doesn't have the // key. (No need to delete upon Write()) type cValue struct { - value []byte - deleted bool - dirty bool + value []byte + dirty bool } // Store wraps an in-memory cache around an underlying types.KVStore. type Store struct { mtx sync.Mutex cache map[string]*cValue + deleted map[string]struct{} unsortedCache map[string]struct{} - sortedCache *kv.List // always ascending sorted + sortedCache *dbm.MemDB // always ascending sorted parent types.KVStore } @@ -40,8 +42,9 @@ var _ types.CacheKVStore = (*Store)(nil) func NewStore(parent types.KVStore) *Store { return &Store{ cache: make(map[string]*cValue), + deleted: make(map[string]struct{}), unsortedCache: make(map[string]struct{}), - sortedCache: kv.NewList(), + sortedCache: dbm.NewMemDB(), parent: parent, } } @@ -122,7 +125,7 @@ func (store *Store) Write() { cacheValue := store.cache[key] switch { - case cacheValue.deleted: + case store.isDeleted(key): store.parent.Delete([]byte(key)) case cacheValue.value == nil: // Skip, it already doesn't exist in parent. @@ -133,8 +136,9 @@ func (store *Store) Write() { // Clear the cache store.cache = make(map[string]*cValue) + store.deleted = make(map[string]struct{}) store.unsortedCache = make(map[string]struct{}) - store.sortedCache = kv.NewList() + store.sortedCache = dbm.NewMemDB() } // CacheWrap implements CacheWrapper. @@ -178,23 +182,53 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator { } store.dirtyItems(start, end) - cache = newMemIterator(start, end, store.sortedCache, ascending) + cache = newMemIterator(start, end, store.sortedCache, store.deleted, ascending) return newCacheMergeIterator(parent, cache, ascending) } +// strToByte is meant to make a zero allocation conversion +// from string -> []byte to speed up operations, it is not meant +// to be used generally, but for a specific pattern to check for available +// keys within a domain. +func strToByte(s string) []byte { + var b []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + hdr.Cap = len(s) + hdr.Len = len(s) + hdr.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data + return b +} + // Constructs a slice of dirty items, to use w/ memIterator. func (store *Store) dirtyItems(start, end []byte) { - unsorted := make([]*kv.Pair, 0) - n := len(store.unsortedCache) - for key := range store.unsortedCache { - if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) { + unsorted := make([]*kv.Pair, 0) + // If the unsortedCache is too big, its costs too much to determine + // whats in the subset we are concerned about. + // If you are interleaving iterator calls with writes, this can easily become an + // O(N^2) overhead. + // Even without that, too many range checks eventually becomes more expensive + // than just not having the cache. + if n >= 1024 { + for key := range store.unsortedCache { cacheValue := store.cache[key] unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value}) } + } else { + // else do a linear scan to determine if the unsorted pairs are in the pool. + for key := range store.unsortedCache { + if dbm.IsKeyInDomain(strToByte(key), start, end) { + cacheValue := store.cache[key] + unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value}) + } + } } + store.clearUnsortedCacheSubset(unsorted) +} +func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) { + n := len(store.unsortedCache) if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map. for key := range store.unsortedCache { delete(store.unsortedCache, key) @@ -204,32 +238,21 @@ func (store *Store) dirtyItems(start, end []byte) { delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key)) } } - sort.Slice(unsorted, func(i, j int) bool { return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0 }) - for e := store.sortedCache.Front(); e != nil && len(unsorted) != 0; { - uitem := unsorted[0] - sitem := e.Value - comp := bytes.Compare(uitem.Key, sitem.Key) - - switch comp { - case -1: - unsorted = unsorted[1:] - - store.sortedCache.InsertBefore(uitem, e) - case 1: - e = e.Next() - case 0: - unsorted = unsorted[1:] - e.Value = uitem - e = e.Next() + for _, item := range unsorted { + if item.Value == nil { + // deleted element, tracked by store.deleted + // setting arbitrary value + store.sortedCache.Set(item.Key, []byte{}) + continue + } + err := store.sortedCache.Set(item.Key, item.Value) + if err != nil { + panic(err) } - } - - for _, kvp := range unsorted { - store.sortedCache.PushBack(kvp) } } @@ -240,11 +263,20 @@ func (store *Store) dirtyItems(start, end []byte) { func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) { keyStr := conv.UnsafeBytesToStr(key) store.cache[keyStr] = &cValue{ - value: value, - deleted: deleted, - dirty: dirty, + value: value, + dirty: dirty, + } + if deleted { + store.deleted[keyStr] = struct{}{} + } else { + delete(store.deleted, keyStr) } if dirty { store.unsortedCache[keyStr] = struct{}{} } } + +func (store *Store) isDeleted(key string) bool { + _, ok := store.deleted[key] + return ok +} From 45674730a1cb2b5c77da15fed119af33ebd18a35 Mon Sep 17 00:00:00 2001 From: ValarDragon Date: Sun, 29 Aug 2021 03:05:55 -0400 Subject: [PATCH 2/4] Add Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3ee0f67bd58..ffbad81211a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -85,6 +85,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ ### Improvements +* (store) [\#10026](https://github.com/cosmos/cosmos-sdk/pull/10026) Improve CacheKVStore datastructures / algorithms, to no longer take O(N^2) time when interleaving iterators and insertions. * (deps) [\#9956](https://github.com/cosmos/cosmos-sdk/pull/9956) Bump Tendermint to [v0.34.12](https://github.com/tendermint/tendermint/releases/tag/v0.34.12). * (cli) [\#9856](https://github.com/cosmos/cosmos-sdk/pull/9856) Overwrite `--sequence` and `--account-number` flags with default flag values when used with `offline=false` in `sign-batch` command. From 3a4d2787994932f65f3b681e7151cd47ee317097 Mon Sep 17 00:00:00 2001 From: ValarDragon Date: Sun, 29 Aug 2021 03:07:40 -0400 Subject: [PATCH 3/4] Gofmt file --- store/cachekv/memiterator.go | 53 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/store/cachekv/memiterator.go b/store/cachekv/memiterator.go index 06a92d58d77e..0a4bc57a6406 100644 --- a/store/cachekv/memiterator.go +++ b/store/cachekv/memiterator.go @@ -1,51 +1,50 @@ package cachekv import ( - dbm "github.com/tendermint/tm-db" - "github.com/cosmos/cosmos-sdk/store/types" + "github.com/cosmos/cosmos-sdk/store/types" ) // Iterates over iterKVCache items. // if key is nil, means it was deleted. // Implements Iterator. type memIterator struct { - types.Iterator + types.Iterator - deleted map[string]struct{} + deleted map[string]struct{} } func newMemIterator(start, end []byte, items *dbm.MemDB, deleted map[string]struct{}, ascending bool) *memIterator { - var iter types.Iterator - var err error + var iter types.Iterator + var err error - if ascending { - iter, err = items.Iterator(start, end) - } else { - iter, err = items.ReverseIterator(start, end) - } + if ascending { + iter, err = items.Iterator(start, end) + } else { + iter, err = items.ReverseIterator(start, end) + } - if err != nil { - panic(err) - } + if err != nil { + panic(err) + } - newDeleted := make(map[string]struct{}) - for k, v := range deleted { - newDeleted[k]=v - } + newDeleted := make(map[string]struct{}) + for k, v := range deleted { + newDeleted[k] = v + } - return &memIterator { - Iterator: iter, + return &memIterator{ + Iterator: iter, - deleted: newDeleted, - } + deleted: newDeleted, + } } func (mi *memIterator) Value() []byte { - key := mi.Iterator.Key() - if _, ok := mi.deleted[string(key)]; ok { - return nil - } - return mi.Iterator.Value() + key := mi.Iterator.Key() + if _, ok := mi.deleted[string(key)]; ok { + return nil + } + return mi.Iterator.Value() } From f0aa753698d456b4ef7994a20413abc47cc15a3a Mon Sep 17 00:00:00 2001 From: ValarDragon Date: Sun, 5 Sep 2021 14:52:58 -0500 Subject: [PATCH 4/4] Use internal/conv --- store/cachekv/store.go | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/store/cachekv/store.go b/store/cachekv/store.go index d8643a80c7e0..25cdd8138610 100644 --- a/store/cachekv/store.go +++ b/store/cachekv/store.go @@ -3,11 +3,9 @@ package cachekv import ( "bytes" "io" - "reflect" "sort" "sync" "time" - "unsafe" dbm "github.com/tendermint/tm-db" @@ -187,19 +185,6 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator { return newCacheMergeIterator(parent, cache, ascending) } -// strToByte is meant to make a zero allocation conversion -// from string -> []byte to speed up operations, it is not meant -// to be used generally, but for a specific pattern to check for available -// keys within a domain. -func strToByte(s string) []byte { - var b []byte - hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - hdr.Cap = len(s) - hdr.Len = len(s) - hdr.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data - return b -} - // Constructs a slice of dirty items, to use w/ memIterator. func (store *Store) dirtyItems(start, end []byte) { n := len(store.unsortedCache) @@ -218,7 +203,7 @@ func (store *Store) dirtyItems(start, end []byte) { } else { // else do a linear scan to determine if the unsorted pairs are in the pool. for key := range store.unsortedCache { - if dbm.IsKeyInDomain(strToByte(key), start, end) { + if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) { cacheValue := store.cache[key] unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value}) }