From 91c2b765025e3b3f2a9d25e537d15c002d4d1a2c Mon Sep 17 00:00:00 2001 From: Roman <34196718+p0mvn@users.noreply.github.com> Date: Mon, 28 Mar 2022 17:13:54 -0700 Subject: [PATCH 1/9] refactor: implement cache abstraction and unit test * implementing cache abstraction, unit test Add * create benchmark for Add(), start working on benchmark for Remove() * implement no duplicates in cache and unit test * switch fast node cache to the new abstraction * switch regular cache to the new abstraction * fmt and add interface godoc * rename receiver to c from nc * const fastNodeCacheLimit * move benchmarks to a separate file --- cache/cache.go | 96 ++++++++++++ cache/cache_bench_test.go | 67 ++++++++ cache/cache_test.go | 311 ++++++++++++++++++++++++++++++++++++++ fast_node.go | 7 + node.go | 7 + nodedb.go | 106 ++++--------- tree_random_test.go | 10 +- 7 files changed, 519 insertions(+), 85 deletions(-) create mode 100644 cache/cache.go create mode 100644 cache/cache_bench_test.go create mode 100644 cache/cache_test.go diff --git a/cache/cache.go b/cache/cache.go new file mode 100644 index 000000000..82dfed20b --- /dev/null +++ b/cache/cache.go @@ -0,0 +1,96 @@ +package cache + +import ( + "container/list" +) + +// Node represents a node eligible for caching. +type Node interface { + GetKey() []byte +} + +// Cache is an in-memory structure to persist nodes for quick access. +type Cache interface { + // Adds node to cache. If full and had to remove the oldest element, + // returns the oldest, otherwise nil. + Add(node Node) Node + + // Returns Node for the key, if exists. nil otherwise. + Get(key []byte) Node + + // Has returns true if node with key exists in cache, false otherwise. + Has(key []byte) bool + + // Remove removes node with key from cache. The removed node is returned. + // if not in cache, return nil. + Remove(key []byte) Node + + // Len returns the cache length. + Len() int +} + +// lruCache is an LRU cache implementation. +type lruCache struct { + dict map[string]*list.Element // FastNode cache. + cacheLimit int // FastNode cache size limit in elements. + ll *list.List // LRU queue of cache elements. Used for deletion. +} + +var _ Cache = (*lruCache)(nil) + +func New(cacheLimit int) Cache { + return &lruCache{ + dict: make(map[string]*list.Element), + cacheLimit: cacheLimit, + ll: list.New(), + } +} + +func (c *lruCache) Add(node Node) Node { + if e, exists := c.dict[string(node.GetKey())]; exists { + c.ll.MoveToFront(e) + old := e.Value + e.Value = node + return old.(Node) + } + + elem := c.ll.PushFront(node) + c.dict[string(node.GetKey())] = elem + + if c.ll.Len() > c.cacheLimit { + oldest := c.ll.Back() + + return c.remove(oldest) + } + return nil +} + +func (nc *lruCache) Get(key []byte) Node { + if ele, hit := nc.dict[string(key)]; hit { + nc.ll.MoveToFront(ele) + return ele.Value.(Node) + } + return nil +} + +func (c *lruCache) Has(key []byte) bool { + _, exists := c.dict[string(key)] + return exists +} + +func (nc *lruCache) Len() int { + return nc.ll.Len() +} + +func (c *lruCache) Remove(key []byte) Node { + if elem, exists := c.dict[string(key)]; exists { + return c.remove(elem) + } + return nil +} + +func (c *lruCache) remove(e *list.Element) Node { + removed := c.ll.Remove(e).(Node) + delete(c.dict, string(removed.GetKey())) + return removed +} diff --git a/cache/cache_bench_test.go b/cache/cache_bench_test.go new file mode 100644 index 000000000..a8bd7fa15 --- /dev/null +++ b/cache/cache_bench_test.go @@ -0,0 +1,67 @@ +package cache_test + +import ( + "math/rand" + "testing" + + "github.com/cosmos/iavl/cache" +) + +func BenchmarkAdd(b *testing.B) { + b.ReportAllocs() + testcases := map[string]struct { + cacheLimit int + keySize int + }{ + "small - limit: 10K, key size - 10b": { + cacheLimit: 10000, + keySize: 10, + }, + "med - limit: 100K, key size 20b": { + cacheLimit: 100000, + keySize: 20, + }, + "large - limit: 1M, key size 30b": { + cacheLimit: 1000000, + keySize: 30, + }, + } + + for name, tc := range testcases { + cache := cache.New(tc.cacheLimit) + b.Run(name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = cache.Add(&testNode{ + key: randBytes(tc.keySize), + }) + } + }) + } +} + +func BenchmarkRemove(b *testing.B) { + b.ReportAllocs() + + b.StopTimer() + cache := cache.New(1000) + existentKeyMirror := [][]byte{} + // Populate cache + for i := 0; i < 50; i++ { + key := randBytes(1000) + + existentKeyMirror = append(existentKeyMirror, key) + + cache.Add(&testNode{ + key: key, + }) + } + + randSeed := 49872768940 // For deterministic tests + r := rand.New(rand.NewSource(int64(randSeed))) + + for i := 0; i < b.N; i++ { + key := existentKeyMirror[r.Intn(len(existentKeyMirror))] + b.ResetTimer() + _ = cache.Remove(key) + } +} diff --git a/cache/cache_test.go b/cache/cache_test.go new file mode 100644 index 000000000..4f0b40ab6 --- /dev/null +++ b/cache/cache_test.go @@ -0,0 +1,311 @@ +package cache_test + +import ( + "crypto/rand" + "fmt" + "testing" + + "github.com/cosmos/iavl/cache" + "github.com/stretchr/testify/require" +) + +// expectedResult represents the expected result of each add/remove operation. +// It can be noneRemoved or the index of the removed node in testNodes +type expectedResult int + +const ( + noneRemoved expectedResult = -1 + // The rest represent the index of the removed node +) + +// testNode is the node used for testing cache implementation +type testNode struct { + key []byte +} + +type cacheOp struct { + testNodexIdx int + expectedResult expectedResult +} + +type testcase struct { + setup func(cache.Cache) + cacheLimit int + cacheOps []cacheOp + expectedNodeIndexes []int // contents of the cache once test case completes represent by indexes in testNodes +} + +func (tn *testNode) GetKey() []byte { + return tn.key +} + +const ( + testKey = "key" +) + +var _ cache.Node = (*testNode)(nil) + +var ( + testNodes = []cache.Node{ + &testNode{ + key: []byte(fmt.Sprintf("%s%d", testKey, 1)), + }, + &testNode{ + key: []byte(fmt.Sprintf("%s%d", testKey, 2)), + }, + &testNode{ + key: []byte(fmt.Sprintf("%s%d", testKey, 3)), + }, + } +) + +func Test_Cache_Add(t *testing.T) { + testcases := map[string]testcase{ + "add 1 node with 1 limit - added": { + cacheLimit: 1, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + }, + expectedNodeIndexes: []int{0}, + }, + "add 1 node twice, cache limit 2 - only one added": { + cacheLimit: 2, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + { + testNodexIdx: 0, + expectedResult: 0, + }, + }, + expectedNodeIndexes: []int{0}, + }, + "add 1 node with 0 limit - not added and return itself": { + cacheLimit: 0, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: 0, + }, + }, + }, + "add 3 nodes with 1 limit - first 2 removed": { + cacheLimit: 1, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + { + testNodexIdx: 1, + expectedResult: 0, + }, + { + testNodexIdx: 2, + expectedResult: 1, + }, + }, + expectedNodeIndexes: []int{2}, + }, + "add 3 nodes with 2 limit - first removed": { + cacheLimit: 2, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + { + testNodexIdx: 1, + expectedResult: noneRemoved, + }, + { + testNodexIdx: 2, + expectedResult: 0, + }, + }, + expectedNodeIndexes: []int{1, 2}, + }, + "add 3 nodes with 10 limit - non removed": { + cacheLimit: 10, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + { + testNodexIdx: 1, + expectedResult: noneRemoved, + }, + { + testNodexIdx: 2, + expectedResult: noneRemoved, + }, + }, + expectedNodeIndexes: []int{0, 1, 2}, + }, + } + + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + cache := cache.New(tc.cacheLimit) + + expectedCurSize := 0 + + for _, op := range tc.cacheOps { + + actualResult := cache.Add(testNodes[op.testNodexIdx]) + + expectedResult := op.expectedResult + + if expectedResult == noneRemoved { + require.Nil(t, actualResult) + expectedCurSize++ + } else { + require.NotNil(t, actualResult) + + // Here, op.expectedResult represents the index of the removed node in tc.cacheOps + require.Equal(t, testNodes[int(op.expectedResult)], actualResult) + } + require.Equal(t, expectedCurSize, cache.Len()) + } + + validateCacheContentsAfterTest(t, tc, cache) + }) + } +} + +func Test_Cache_Remove(t *testing.T) { + testcases := map[string]testcase{ + "remove non-existent key, cache limit 0 - nil returned": { + cacheLimit: 0, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + }, + }, + "remove non-existent key, cache limit 1 - nil returned": { + setup: func(c cache.Cache) { + require.Nil(t, c.Add(testNodes[1])) + require.Equal(t, 1, c.Len()) + }, + cacheLimit: 1, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + }, + expectedNodeIndexes: []int{1}, + }, + "remove existent key, cache limit 1 - removed": { + setup: func(c cache.Cache) { + require.Nil(t, c.Add(testNodes[0])) + require.Equal(t, 1, c.Len()) + }, + cacheLimit: 1, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: 0, + }, + }, + }, + "remove twice, cache limit 1 - removed first time, then nil": { + setup: func(c cache.Cache) { + require.Nil(t, c.Add(testNodes[0])) + require.Equal(t, 1, c.Len()) + }, + cacheLimit: 1, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: 0, + }, + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + }, + }, + "remove all, cache limit 3": { + setup: func(c cache.Cache) { + require.Nil(t, c.Add(testNodes[0])) + require.Nil(t, c.Add(testNodes[1])) + require.Nil(t, c.Add(testNodes[2])) + require.Equal(t, 3, c.Len()) + }, + cacheLimit: 3, + cacheOps: []cacheOp{ + { + testNodexIdx: 2, + expectedResult: 2, + }, + { + testNodexIdx: 0, + expectedResult: 0, + }, + { + testNodexIdx: 1, + expectedResult: 1, + }, + }, + }, + } + + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + cache := cache.New(tc.cacheLimit) + + if tc.setup != nil { + tc.setup(cache) + } + + expectedCurSize := cache.Len() + + for _, op := range tc.cacheOps { + + actualResult := cache.Remove(testNodes[op.testNodexIdx].GetKey()) + + expectedResult := op.expectedResult + + if expectedResult == noneRemoved { + require.Nil(t, actualResult) + } else { + expectedCurSize-- + require.NotNil(t, actualResult) + + // Here, op.expectedResult represents the index of the removed node in tc.cacheOps + require.Equal(t, testNodes[int(op.expectedResult)], actualResult) + } + require.Equal(t, expectedCurSize, cache.Len()) + } + + validateCacheContentsAfterTest(t, tc, cache) + }) + } +} + +func validateCacheContentsAfterTest(t *testing.T, tc testcase, cache cache.Cache) { + require.Equal(t, len(tc.expectedNodeIndexes), cache.Len()) + for _, idx := range tc.expectedNodeIndexes { + expectedNode := testNodes[idx] + require.True(t, cache.Has(expectedNode.GetKey())) + require.Equal(t, expectedNode, cache.Get(expectedNode.GetKey())) + } +} + +func randBytes(length int) []byte { + key := make([]byte, length) + // math.rand.Read always returns err=nil + // we do not need cryptographic randomness for this test: + //nolint:gosec + rand.Read(key) + return key +} diff --git a/fast_node.go b/fast_node.go index a116b8efb..2d03287ab 100644 --- a/fast_node.go +++ b/fast_node.go @@ -3,6 +3,7 @@ package iavl import ( "io" + "github.com/cosmos/iavl/cache" "github.com/cosmos/iavl/internal/encoding" "github.com/pkg/errors" ) @@ -16,6 +17,8 @@ type FastNode struct { value []byte } +var _ cache.Node = (*FastNode)(nil) + // NewFastNode returns a new fast node from a value and version. func NewFastNode(key []byte, value []byte, version int64) *FastNode { return &FastNode{ @@ -47,6 +50,10 @@ func DeserializeFastNode(key []byte, buf []byte) (*FastNode, error) { return fastNode, nil } +func (fn *FastNode) GetKey() []byte { + return fn.key +} + func (node *FastNode) encodedSize() int { n := encoding.EncodeVarintSize(node.versionLastUpdatedAt) + encoding.EncodeBytesSize(node.value) return n diff --git a/node.go b/node.go index 740cde71b..607a164d2 100644 --- a/node.go +++ b/node.go @@ -10,6 +10,7 @@ import ( "io" "math" + "github.com/cosmos/iavl/cache" "github.com/pkg/errors" "github.com/cosmos/iavl/internal/encoding" @@ -30,6 +31,8 @@ type Node struct { persisted bool } +var _ cache.Node = (*Node)(nil) + // NewNode returns a new node from a key, value and version. func NewNode(key []byte, value []byte, version int64) *Node { return &Node{ @@ -107,6 +110,10 @@ func MakeNode(buf []byte) (*Node, error) { return node, nil } +func (n *Node) GetKey() []byte { + return n.hash +} + // String returns a string representation of the node. func (node *Node) String() string { hashstr := "" diff --git a/nodedb.go b/nodedb.go index 7d58ad926..3737a65ba 100644 --- a/nodedb.go +++ b/nodedb.go @@ -2,7 +2,6 @@ package iavl import ( "bytes" - "container/list" "crypto/sha256" "fmt" "math" @@ -11,6 +10,7 @@ import ( "strings" "sync" + "github.com/cosmos/iavl/cache" "github.com/cosmos/iavl/internal/logger" "github.com/pkg/errors" dbm "github.com/tendermint/tm-db" @@ -30,6 +30,7 @@ const ( // Using semantic versioning: https://semver.org/ defaultStorageVersionValue = "1.0.0" fastStorageVersionValue = "1.1.0" + fastNodeCacheLimit = 100000 ) var ( @@ -74,15 +75,9 @@ type nodeDB struct { opts Options // Options to customize for pruning/writing versionReaders map[int64]uint32 // Number of active version readers storageVersion string // Storage version - latestVersion int64 - nodeCache map[string]*list.Element // Node cache. - nodeCacheSize int // Node cache size limit in elements. - nodeCacheQueue *list.List // LRU queue of cache elements. Used for deletion. - - fastNodeCache map[string]*list.Element // FastNode cache. - fastNodeCacheSize int // FastNode cache size limit in elements. - fastNodeCacheQueue *list.List // LRU queue of cache elements. Used for deletion. + nodeCache cache.Cache + fastNodeCache cache.Cache } func newNodeDB(db dbm.DB, cacheSize int, opts *Options) *nodeDB { @@ -98,18 +93,14 @@ func newNodeDB(db dbm.DB, cacheSize int, opts *Options) *nodeDB { } return &nodeDB{ - db: db, - batch: db.NewBatch(), - opts: *opts, - latestVersion: 0, // initially invalid - nodeCache: make(map[string]*list.Element), - nodeCacheSize: cacheSize, - nodeCacheQueue: list.New(), - fastNodeCache: make(map[string]*list.Element), - fastNodeCacheSize: 100000, - fastNodeCacheQueue: list.New(), - versionReaders: make(map[int64]uint32, 8), - storageVersion: string(storeVersion), + db: db, + batch: db.NewBatch(), + opts: *opts, + latestVersion: 0, // initially invalid + nodeCache: cache.New(cacheSize), + fastNodeCache: cache.New(fastNodeCacheLimit), + versionReaders: make(map[int64]uint32, 8), + storageVersion: string(storeVersion), } } @@ -124,10 +115,8 @@ func (ndb *nodeDB) GetNode(hash []byte) (*Node, error) { } // Check the cache. - if elem, ok := ndb.nodeCache[string(hash)]; ok { - // Already exists. Move to back of nodeCacheQueue. - ndb.nodeCacheQueue.MoveToBack(elem) - return elem.Value.(*Node), nil + if cachedNode := ndb.nodeCache.Get(hash); cachedNode != nil { + return cachedNode.(*Node), nil } // Doesn't exist, load. @@ -146,7 +135,7 @@ func (ndb *nodeDB) GetNode(hash []byte) (*Node, error) { node.hash = hash node.persisted = true - ndb.cacheNode(node) + ndb.nodeCache.Add(node) return node, nil } @@ -163,11 +152,8 @@ func (ndb *nodeDB) GetFastNode(key []byte) (*FastNode, error) { return nil, fmt.Errorf("nodeDB.GetFastNode() requires key, len(key) equals 0") } - // Check the cache. - if elem, ok := ndb.fastNodeCache[string(key)]; ok { - // Already exists. Move to back of fastNodeCacheQueue. - ndb.fastNodeCacheQueue.MoveToBack(elem) - return elem.Value.(*FastNode), nil + if cachedFastNode := ndb.fastNodeCache.Get(key); cachedFastNode != nil { + return cachedFastNode.(*FastNode), nil } // Doesn't exist, load. @@ -184,7 +170,7 @@ func (ndb *nodeDB) GetFastNode(key []byte) (*FastNode, error) { return nil, fmt.Errorf("error reading FastNode. bytes: %x, error: %w", buf, err) } - ndb.cacheFastNode(fastNode) + ndb.fastNodeCache.Add(fastNode) return fastNode, nil } @@ -213,7 +199,7 @@ func (ndb *nodeDB) SaveNode(node *Node) error { } logger.Debug("BATCH SAVE %X %p\n", node.hash, node) node.persisted = true - ndb.cacheNode(node) + ndb.nodeCache.Add(node) return nil } @@ -310,7 +296,7 @@ func (ndb *nodeDB) saveFastNodeUnlocked(node *FastNode, shouldAddToCache bool) e return fmt.Errorf("error while writing key/val to nodedb batch. Err: %w", err) } if shouldAddToCache { - ndb.cacheFastNode(node) + ndb.fastNodeCache.Add(node) } return nil } @@ -469,7 +455,7 @@ func (ndb *nodeDB) DeleteVersionsFrom(version int64) error { if err = ndb.batch.Delete(ndb.nodeKey(hash)); err != nil { return err } - ndb.uncacheNode(hash) + ndb.nodeCache.Remove(hash) } else if toVersion >= version-1 { if err = ndb.batch.Delete(key); err != nil { return err @@ -507,7 +493,7 @@ func (ndb *nodeDB) DeleteVersionsFrom(version int64) error { if err = ndb.batch.Delete(keyWithPrefix); err != nil { return err } - ndb.uncacheFastNode(key) + ndb.fastNodeCache.Remove(key) } return nil }) @@ -563,7 +549,7 @@ func (ndb *nodeDB) DeleteVersionsRange(fromVersion, toVersion int64) error { if err := ndb.batch.Delete(ndb.nodeKey(hash)); err != nil { return err } - ndb.uncacheNode(hash) + ndb.nodeCache.Remove(hash) } else { if err := ndb.saveOrphan(hash, from, predecessor); err != nil { return err @@ -596,7 +582,7 @@ func (ndb *nodeDB) DeleteFastNode(key []byte) error { if err := ndb.batch.Delete(ndb.fastNodeKey(key)); err != nil { return err } - ndb.uncacheFastNode(key) + ndb.fastNodeCache.Remove(key) return nil } @@ -628,7 +614,7 @@ func (ndb *nodeDB) deleteNodesFrom(version int64, hash []byte) error { return err } - ndb.uncacheNode(hash) + ndb.nodeCache.Remove(hash) } return nil @@ -701,7 +687,7 @@ func (ndb *nodeDB) deleteOrphans(version int64) error { if err := ndb.batch.Delete(ndb.nodeKey(hash)); err != nil { return err } - ndb.uncacheNode(hash) + ndb.nodeCache.Remove(hash) } else { logger.Debug("MOVE predecessor:%v fromVersion:%v toVersion:%v %X\n", predecessor, fromVersion, toVersion, hash) ndb.saveOrphan(hash, fromVersion, predecessor) @@ -870,46 +856,6 @@ func (ndb *nodeDB) getFastIterator(start, end []byte, ascending bool) (dbm.Itera return ndb.db.ReverseIterator(startFormatted, endFormatted) } -func (ndb *nodeDB) uncacheNode(hash []byte) { - if elem, ok := ndb.nodeCache[string(hash)]; ok { - ndb.nodeCacheQueue.Remove(elem) - delete(ndb.nodeCache, string(hash)) - } -} - -// Add a node to the cache and pop the least recently used node if we've -// reached the cache size limit. -func (ndb *nodeDB) cacheNode(node *Node) { - elem := ndb.nodeCacheQueue.PushBack(node) - ndb.nodeCache[string(node.hash)] = elem - - if ndb.nodeCacheQueue.Len() > ndb.nodeCacheSize { - oldest := ndb.nodeCacheQueue.Front() - hash := ndb.nodeCacheQueue.Remove(oldest).(*Node).hash - delete(ndb.nodeCache, string(hash)) - } -} - -func (ndb *nodeDB) uncacheFastNode(key []byte) { - if elem, ok := ndb.fastNodeCache[string(key)]; ok { - ndb.fastNodeCacheQueue.Remove(elem) - delete(ndb.fastNodeCache, string(key)) - } -} - -// Add a node to the cache and pop the least recently used node if we've -// reached the cache size limit. -func (ndb *nodeDB) cacheFastNode(node *FastNode) { - elem := ndb.fastNodeCacheQueue.PushBack(node) - ndb.fastNodeCache[string(node.key)] = elem - - if ndb.fastNodeCacheQueue.Len() > ndb.fastNodeCacheSize { - oldest := ndb.fastNodeCacheQueue.Front() - key := ndb.fastNodeCacheQueue.Remove(oldest).(*FastNode).key - delete(ndb.fastNodeCache, string(key)) - } -} - // Write to disk. func (ndb *nodeDB) Commit() error { ndb.mtx.Lock() diff --git a/tree_random_test.go b/tree_random_test.go index 7fee3b173..8816657bf 100644 --- a/tree_random_test.go +++ b/tree_random_test.go @@ -428,11 +428,11 @@ func assertFastNodeCacheIsLive(t *testing.T, tree *MutableTree, mirror map[strin return } - for key, cacheElem := range tree.ndb.fastNodeCache { - liveFastNode, ok := mirror[key] - - require.True(t, ok, "cached fast node must be in the live tree") - require.Equal(t, liveFastNode, string(cacheElem.Value.(*FastNode).value), "cached fast node's value must be equal to live state value") + require.Equal(t, len(mirror), tree.ndb.fastNodeCache.Len()) + for k, v := range mirror { + require.True(t, tree.ndb.fastNodeCache.Has([]byte(k)), "cached fast node must be in live tree") + mirrorNode := tree.ndb.fastNodeCache.Get([]byte(k)) + require.Equal(t, []byte(v), mirrorNode.(*FastNode).value, "cached fast node's value must be equal to live state value") } } From 48c3a5e0e5247cd82792019ed7f97ea13a76c978 Mon Sep 17 00:00:00 2001 From: Roman Date: Mon, 13 Jun 2022 21:57:37 -0400 Subject: [PATCH 2/9] fix bench --- cache/cache_bench_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cache/cache_bench_test.go b/cache/cache_bench_test.go index a8bd7fa15..117ca0957 100644 --- a/cache/cache_bench_test.go +++ b/cache/cache_bench_test.go @@ -56,7 +56,7 @@ func BenchmarkRemove(b *testing.B) { }) } - randSeed := 49872768940 // For deterministic tests + randSeed := 498727689 // For deterministic tests r := rand.New(rand.NewSource(int64(randSeed))) for i := 0; i < b.N; i++ { From 1c4c28853fafdfb4d62e05084eb6fc650941d13c Mon Sep 17 00:00:00 2001 From: Roman Date: Thu, 16 Jun 2022 18:46:43 -0400 Subject: [PATCH 3/9] comment about the reasons for implementing cache --- cache/cache.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cache/cache.go b/cache/cache.go index 82dfed20b..568881ef5 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -10,6 +10,8 @@ type Node interface { } // Cache is an in-memory structure to persist nodes for quick access. +// Please see lruCache for more details about why we need a custom +// cache implementation. type Cache interface { // Adds node to cache. If full and had to remove the oldest element, // returns the oldest, otherwise nil. @@ -30,6 +32,15 @@ type Cache interface { } // lruCache is an LRU cache implementation. +// The motivation for using a custom cache implementation is to +// allow for a custom limit policy. +// +// Currently, the cache limit is implemented in terms of the +// number of nodes which is not intuitive to configure. +// Instead, we are planning to add a byte limit. +// The alternative implementations do not allow for +// customization and the ability to estimate the byte +// size of the cache. type lruCache struct { dict map[string]*list.Element // FastNode cache. cacheLimit int // FastNode cache size limit in elements. From d696f75d7e085aede83d297cd1f2ccf678345c96 Mon Sep 17 00:00:00 2001 From: Roman Date: Thu, 16 Jun 2022 19:33:15 -0400 Subject: [PATCH 4/9] expand test cases in TestNodeCacheStatisic for readability --- tree_test.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tree_test.go b/tree_test.go index 455d132c7..1605a3b83 100644 --- a/tree_test.go +++ b/tree_test.go @@ -1994,8 +1994,20 @@ func TestNodeCacheStatisic(t *testing.T) { expectCacheHitCnt int expectCacheMissCnt int }{ - "with_cache": {numKeyVals, numKeyVals, 0, 1, 0}, - "without_cache": {0, 0, numKeyVals, 0, 1}, + "with_cache": { + cacheSize: numKeyVals, + expectFastCacheHitCnt: numKeyVals, + expectFastCacheMissCnt: 0, + expectCacheHitCnt: 1, + expectCacheMissCnt: 0, + }, + "without_cache": { + cacheSize: 0, + expectFastCacheHitCnt: 0, + expectFastCacheMissCnt: numKeyVals, + expectCacheHitCnt: 0, + expectCacheMissCnt: 1, + }, } for name, tc := range testcases { From 8bf11a37589968c2de7d41d236bf98d1f0ce7ed4 Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 6 Jul 2022 11:20:55 -0400 Subject: [PATCH 5/9] contract comment for adding nil --- cache/cache.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cache/cache.go b/cache/cache.go index 568881ef5..3313273e8 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -15,6 +15,7 @@ type Node interface { type Cache interface { // Adds node to cache. If full and had to remove the oldest element, // returns the oldest, otherwise nil. + // CONTRACT: node can never be nil. Otherwise, cache panics. Add(node Node) Node // Returns Node for the key, if exists. nil otherwise. From 5a5f34b06208acbe3ba22d2b4845558ed2f020b3 Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 6 Jul 2022 11:37:45 -0400 Subject: [PATCH 6/9] rename limit to max / size --- cache/cache.go | 22 ++++++++--------- cache/cache_bench_test.go | 24 +++++++++---------- cache/cache_test.go | 50 +++++++++++++++++++-------------------- nodedb.go | 4 ++-- 4 files changed, 50 insertions(+), 50 deletions(-) diff --git a/cache/cache.go b/cache/cache.go index 3313273e8..c0f36c63d 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -34,27 +34,27 @@ type Cache interface { // lruCache is an LRU cache implementation. // The motivation for using a custom cache implementation is to -// allow for a custom limit policy. +// allow for a custom max policy. // -// Currently, the cache limit is implemented in terms of the +// Currently, the cache maximum is implemented in terms of the // number of nodes which is not intuitive to configure. -// Instead, we are planning to add a byte limit. +// Instead, we are planning to add a byte maximum. // The alternative implementations do not allow for // customization and the ability to estimate the byte // size of the cache. type lruCache struct { - dict map[string]*list.Element // FastNode cache. - cacheLimit int // FastNode cache size limit in elements. - ll *list.List // LRU queue of cache elements. Used for deletion. + dict map[string]*list.Element // FastNode cache. + maxElementCount int // FastNode the maximum number of nodes in the cache. + ll *list.List // LRU queue of cache elements. Used for deletion. } var _ Cache = (*lruCache)(nil) -func New(cacheLimit int) Cache { +func New(maxElementCount int) Cache { return &lruCache{ - dict: make(map[string]*list.Element), - cacheLimit: cacheLimit, - ll: list.New(), + dict: make(map[string]*list.Element), + maxElementCount: maxElementCount, + ll: list.New(), } } @@ -69,7 +69,7 @@ func (c *lruCache) Add(node Node) Node { elem := c.ll.PushFront(node) c.dict[string(node.GetKey())] = elem - if c.ll.Len() > c.cacheLimit { + if c.ll.Len() > c.maxElementCount { oldest := c.ll.Back() return c.remove(oldest) diff --git a/cache/cache_bench_test.go b/cache/cache_bench_test.go index 117ca0957..29ec12087 100644 --- a/cache/cache_bench_test.go +++ b/cache/cache_bench_test.go @@ -10,25 +10,25 @@ import ( func BenchmarkAdd(b *testing.B) { b.ReportAllocs() testcases := map[string]struct { - cacheLimit int - keySize int + cacheMax int + keySize int }{ - "small - limit: 10K, key size - 10b": { - cacheLimit: 10000, - keySize: 10, + "small - max: 10K, key size - 10b": { + cacheMax: 10000, + keySize: 10, }, - "med - limit: 100K, key size 20b": { - cacheLimit: 100000, - keySize: 20, + "med - max: 100K, key size 20b": { + cacheMax: 100000, + keySize: 20, }, - "large - limit: 1M, key size 30b": { - cacheLimit: 1000000, - keySize: 30, + "large - max: 1M, key size 30b": { + cacheMax: 1000000, + keySize: 30, }, } for name, tc := range testcases { - cache := cache.New(tc.cacheLimit) + cache := cache.New(tc.cacheMax) b.Run(name, func(b *testing.B) { for i := 0; i < b.N; i++ { _ = cache.Add(&testNode{ diff --git a/cache/cache_test.go b/cache/cache_test.go index 4f0b40ab6..7b2413ef3 100644 --- a/cache/cache_test.go +++ b/cache/cache_test.go @@ -30,7 +30,7 @@ type cacheOp struct { type testcase struct { setup func(cache.Cache) - cacheLimit int + cacheMax int cacheOps []cacheOp expectedNodeIndexes []int // contents of the cache once test case completes represent by indexes in testNodes } @@ -61,8 +61,8 @@ var ( func Test_Cache_Add(t *testing.T) { testcases := map[string]testcase{ - "add 1 node with 1 limit - added": { - cacheLimit: 1, + "add 1 node with 1 max - added": { + cacheMax: 1, cacheOps: []cacheOp{ { testNodexIdx: 0, @@ -71,8 +71,8 @@ func Test_Cache_Add(t *testing.T) { }, expectedNodeIndexes: []int{0}, }, - "add 1 node twice, cache limit 2 - only one added": { - cacheLimit: 2, + "add 1 node twice, cache max 2 - only one added": { + cacheMax: 2, cacheOps: []cacheOp{ { testNodexIdx: 0, @@ -85,8 +85,8 @@ func Test_Cache_Add(t *testing.T) { }, expectedNodeIndexes: []int{0}, }, - "add 1 node with 0 limit - not added and return itself": { - cacheLimit: 0, + "add 1 node with 0 max - not added and return itself": { + cacheMax: 0, cacheOps: []cacheOp{ { testNodexIdx: 0, @@ -94,8 +94,8 @@ func Test_Cache_Add(t *testing.T) { }, }, }, - "add 3 nodes with 1 limit - first 2 removed": { - cacheLimit: 1, + "add 3 nodes with 1 max - first 2 removed": { + cacheMax: 1, cacheOps: []cacheOp{ { testNodexIdx: 0, @@ -112,8 +112,8 @@ func Test_Cache_Add(t *testing.T) { }, expectedNodeIndexes: []int{2}, }, - "add 3 nodes with 2 limit - first removed": { - cacheLimit: 2, + "add 3 nodes with 2 max - first removed": { + cacheMax: 2, cacheOps: []cacheOp{ { testNodexIdx: 0, @@ -130,8 +130,8 @@ func Test_Cache_Add(t *testing.T) { }, expectedNodeIndexes: []int{1, 2}, }, - "add 3 nodes with 10 limit - non removed": { - cacheLimit: 10, + "add 3 nodes with 10 max - non removed": { + cacheMax: 10, cacheOps: []cacheOp{ { testNodexIdx: 0, @@ -152,7 +152,7 @@ func Test_Cache_Add(t *testing.T) { for name, tc := range testcases { t.Run(name, func(t *testing.T) { - cache := cache.New(tc.cacheLimit) + cache := cache.New(tc.cacheMax) expectedCurSize := 0 @@ -181,8 +181,8 @@ func Test_Cache_Add(t *testing.T) { func Test_Cache_Remove(t *testing.T) { testcases := map[string]testcase{ - "remove non-existent key, cache limit 0 - nil returned": { - cacheLimit: 0, + "remove non-existent key, cache max 0 - nil returned": { + cacheMax: 0, cacheOps: []cacheOp{ { testNodexIdx: 0, @@ -190,12 +190,12 @@ func Test_Cache_Remove(t *testing.T) { }, }, }, - "remove non-existent key, cache limit 1 - nil returned": { + "remove non-existent key, cache max 1 - nil returned": { setup: func(c cache.Cache) { require.Nil(t, c.Add(testNodes[1])) require.Equal(t, 1, c.Len()) }, - cacheLimit: 1, + cacheMax: 1, cacheOps: []cacheOp{ { testNodexIdx: 0, @@ -204,12 +204,12 @@ func Test_Cache_Remove(t *testing.T) { }, expectedNodeIndexes: []int{1}, }, - "remove existent key, cache limit 1 - removed": { + "remove existent key, cache max 1 - removed": { setup: func(c cache.Cache) { require.Nil(t, c.Add(testNodes[0])) require.Equal(t, 1, c.Len()) }, - cacheLimit: 1, + cacheMax: 1, cacheOps: []cacheOp{ { testNodexIdx: 0, @@ -217,12 +217,12 @@ func Test_Cache_Remove(t *testing.T) { }, }, }, - "remove twice, cache limit 1 - removed first time, then nil": { + "remove twice, cache max 1 - removed first time, then nil": { setup: func(c cache.Cache) { require.Nil(t, c.Add(testNodes[0])) require.Equal(t, 1, c.Len()) }, - cacheLimit: 1, + cacheMax: 1, cacheOps: []cacheOp{ { testNodexIdx: 0, @@ -234,14 +234,14 @@ func Test_Cache_Remove(t *testing.T) { }, }, }, - "remove all, cache limit 3": { + "remove all, cache max 3": { setup: func(c cache.Cache) { require.Nil(t, c.Add(testNodes[0])) require.Nil(t, c.Add(testNodes[1])) require.Nil(t, c.Add(testNodes[2])) require.Equal(t, 3, c.Len()) }, - cacheLimit: 3, + cacheMax: 3, cacheOps: []cacheOp{ { testNodexIdx: 2, @@ -261,7 +261,7 @@ func Test_Cache_Remove(t *testing.T) { for name, tc := range testcases { t.Run(name, func(t *testing.T) { - cache := cache.New(tc.cacheLimit) + cache := cache.New(tc.cacheMax) if tc.setup != nil { tc.setup(cache) diff --git a/nodedb.go b/nodedb.go index 2ed12b8da..3ccae3178 100644 --- a/nodedb.go +++ b/nodedb.go @@ -30,7 +30,7 @@ const ( // Using semantic versioning: https://semver.org/ defaultStorageVersionValue = "1.0.0" fastStorageVersionValue = "1.1.0" - fastNodeCacheLimit = 100000 + fastNodeCacheSize = 100000 ) var ( @@ -98,7 +98,7 @@ func newNodeDB(db dbm.DB, cacheSize int, opts *Options) *nodeDB { opts: *opts, latestVersion: 0, // initially invalid nodeCache: cache.New(cacheSize), - fastNodeCache: cache.New(fastNodeCacheLimit), + fastNodeCache: cache.New(fastNodeCacheSize), versionReaders: make(map[int64]uint32, 8), storageVersion: string(storeVersion), } From e935d8f6d80282ed69f85099d042ce1fedd33a52 Mon Sep 17 00:00:00 2001 From: Roman Date: Wed, 6 Jul 2022 11:45:30 -0400 Subject: [PATCH 7/9] fix benchmarks --- cache/cache_bench_test.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cache/cache_bench_test.go b/cache/cache_bench_test.go index 29ec12087..7332b6869 100644 --- a/cache/cache_bench_test.go +++ b/cache/cache_bench_test.go @@ -31,8 +31,12 @@ func BenchmarkAdd(b *testing.B) { cache := cache.New(tc.cacheMax) b.Run(name, func(b *testing.B) { for i := 0; i < b.N; i++ { + b.StopTimer() + key := randBytes(tc.keySize) + b.StartTimer() + _ = cache.Add(&testNode{ - key: randBytes(tc.keySize), + key: key, }) } }) @@ -42,7 +46,6 @@ func BenchmarkAdd(b *testing.B) { func BenchmarkRemove(b *testing.B) { b.ReportAllocs() - b.StopTimer() cache := cache.New(1000) existentKeyMirror := [][]byte{} // Populate cache @@ -58,10 +61,9 @@ func BenchmarkRemove(b *testing.B) { randSeed := 498727689 // For deterministic tests r := rand.New(rand.NewSource(int64(randSeed))) - + b.ResetTimer() for i := 0; i < b.N; i++ { key := existentKeyMirror[r.Intn(len(existentKeyMirror))] - b.ResetTimer() _ = cache.Remove(key) } } From 6eac72aa7e584f590d54fadec46cdeecb177ec46 Mon Sep 17 00:00:00 2001 From: Roman Date: Thu, 7 Jul 2022 13:56:24 -0400 Subject: [PATCH 8/9] comment for nodeCache vs fastNodeCache --- nodedb.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nodedb.go b/nodedb.go index 3ccae3178..45548d3a0 100644 --- a/nodedb.go +++ b/nodedb.go @@ -75,9 +75,9 @@ type nodeDB struct { opts Options // Options to customize for pruning/writing versionReaders map[int64]uint32 // Number of active version readers storageVersion string // Storage version - latestVersion int64 - nodeCache cache.Cache - fastNodeCache cache.Cache + latestVersion int64 // Latest version of nodeDB. + nodeCache cache.Cache // Cache for nodes in the regular tree that consists of key-value pairs at any version. + fastNodeCache cache.Cache // Cache for nodes in the fast index that represents only key-value pairs at the latest version. } func newNodeDB(db dbm.DB, cacheSize int, opts *Options) *nodeDB { From 77dce5b6fc68b17e80fa1fa173fdd68a2ea3e234 Mon Sep 17 00:00:00 2001 From: Roman Date: Thu, 7 Jul 2022 14:30:17 -0400 Subject: [PATCH 9/9] attempt to fix failing stats test --- tree_test.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tree_test.go b/tree_test.go index 1605a3b83..f5eaa21cf 100644 --- a/tree_test.go +++ b/tree_test.go @@ -1984,8 +1984,6 @@ func Benchmark_GetByIndex(b *testing.B) { } func TestNodeCacheStatisic(t *testing.T) { - db, err := db.NewDB("test", db.MemDBBackend, "") - require.NoError(t, err) const numKeyVals = 100000 testcases := map[string]struct { cacheSize int @@ -2003,18 +2001,20 @@ func TestNodeCacheStatisic(t *testing.T) { }, "without_cache": { cacheSize: 0, - expectFastCacheHitCnt: 0, - expectFastCacheMissCnt: numKeyVals, + expectFastCacheHitCnt: 100000, // this value is hardcoded in nodedb for fast cache. + expectFastCacheMissCnt: 0, expectCacheHitCnt: 0, expectCacheMissCnt: 1, }, } for name, tc := range testcases { - funcT := func(sub *testing.T) { - + tc := tc + t.Run(name, func(sub *testing.T) { stat := &Statistics{} opts := &Options{Stat: stat} + db, err := db.NewDB("test", db.MemDBBackend, "") + require.NoError(t, err) mt, err := NewMutableTreeWithOpts(db, tc.cacheSize, opts) require.NoError(t, err) @@ -2038,8 +2038,7 @@ func TestNodeCacheStatisic(t *testing.T) { require.Equal(t, tc.expectFastCacheMissCnt, int(opts.Stat.GetFastCacheMissCnt())) require.Equal(t, tc.expectCacheHitCnt, int(opts.Stat.GetCacheHitCnt())) require.Equal(t, tc.expectCacheMissCnt, int(opts.Stat.GetCacheMissCnt())) - } - t.Run(name, funcT) + }) } }