Skip to content

Commit

Permalink
perf: use fast unsafe bytes->string convertion (#525)
Browse files Browse the repository at this point in the history
* perf: use fast unsafe bytes->string convertion

* code style

* update changelog

* add fast convertions to cache

* Update CHANGELOG.md

Co-authored-by: Roman <ackhtariev@gmail.com>

* one more tiny value cache

Co-authored-by: Roman <ackhtariev@gmail.com>
  • Loading branch information
robert-zaremba and p0mvn committed Aug 3, 2022
1 parent 76cec48 commit 3e26f26
Show file tree
Hide file tree
Showing 11 changed files with 133 additions and 47 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

### Improvements

- [#525](https://github.com/cosmos/iavl/pull/525) Optimization: use fast unsafe bytes->string conversion.
- [#506](https://github.com/cosmos/iavl/pull/506) Implement cache abstraction.

### Bug Fixes
Expand Down
16 changes: 9 additions & 7 deletions cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package cache

import (
"container/list"

ibytes "github.com/cosmos/iavl/internal/bytes"
)

// Node represents a node eligible for caching.
Expand Down Expand Up @@ -59,34 +61,34 @@ func New(maxElementCount int) Cache {
}

func (c *lruCache) Add(node Node) Node {
if e, exists := c.dict[string(node.GetKey())]; exists {
keyStr := ibytes.UnsafeBytesToStr(node.GetKey())
if e, exists := c.dict[keyStr]; exists {
c.ll.MoveToFront(e)
old := e.Value
e.Value = node
return old.(Node)
}

elem := c.ll.PushFront(node)
c.dict[string(node.GetKey())] = elem
c.dict[keyStr] = elem

if c.ll.Len() > c.maxElementCount {
oldest := c.ll.Back()

return c.remove(oldest)
}
return nil
}

func (nc *lruCache) Get(key []byte) Node {
if ele, hit := nc.dict[string(key)]; hit {
if ele, hit := nc.dict[ibytes.UnsafeBytesToStr(key)]; hit {
nc.ll.MoveToFront(ele)
return ele.Value.(Node)
}
return nil
}

func (c *lruCache) Has(key []byte) bool {
_, exists := c.dict[string(key)]
_, exists := c.dict[ibytes.UnsafeBytesToStr(key)]
return exists
}

Expand All @@ -95,14 +97,14 @@ func (nc *lruCache) Len() int {
}

func (c *lruCache) Remove(key []byte) Node {
if elem, exists := c.dict[string(key)]; exists {
if elem, exists := c.dict[ibytes.UnsafeBytesToStr(key)]; exists {
return c.remove(elem)
}
return nil
}

func (c *lruCache) remove(e *list.Element) Node {
removed := c.ll.Remove(e).(Node)
delete(c.dict, string(removed.GetKey()))
delete(c.dict, ibytes.UnsafeBytesToStr(removed.GetKey()))
return removed
}
3 changes: 2 additions & 1 deletion cmd/iaviewer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
dbm "github.com/tendermint/tm-db"

"github.com/cosmos/iavl"
ibytes "github.com/cosmos/iavl/internal/bytes"
)

// TODO: make this configurable?
Expand Down Expand Up @@ -94,7 +95,7 @@ func PrintDBStats(db dbm.DB) {

defer itr.Close()
for ; itr.Valid(); itr.Next() {
key := string(itr.Key()[:1])
key := ibytes.UnsafeBytesToStr(itr.Key()[:1])
prefix[key]++
count++
}
Expand Down
6 changes: 4 additions & 2 deletions internal/bytes/bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ func (bz *HexBytes) UnmarshalJSON(data []byte) error {
if len(data) < 2 || data[0] != '"' || data[len(data)-1] != '"' {
return fmt.Errorf("invalid hex string: %s", data)
}
bz2, err := hex.DecodeString(string(data[1 : len(data)-1]))
data = data[1 : len(data)-1]
dest := make([]byte, hex.DecodedLen(len(data)))
_, err := hex.Decode(dest, data)
if err != nil {
return err
}
*bz = bz2
*bz = dest
return nil
}

Expand Down
26 changes: 26 additions & 0 deletions internal/bytes/string.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package common

import (
"reflect"
"unsafe"
)

// UnsafeStrToBytes uses unsafe to convert string into byte array. Returned bytes
// must not be altered after this function is called as it will cause a segmentation fault.
func UnsafeStrToBytes(s string) []byte {
var buf []byte
sHdr := (*reflect.StringHeader)(unsafe.Pointer(&s))
bufHdr := (*reflect.SliceHeader)(unsafe.Pointer(&buf))
bufHdr.Data = sHdr.Data
bufHdr.Cap = sHdr.Len
bufHdr.Len = sHdr.Len
return buf
}

// UnsafeBytesToStr is meant to make a zero allocation conversion
// from []byte -> string to speed up operations, it is not meant
// to be used generally, but for a specific pattern to delete keys
// from a map.
func UnsafeBytesToStr(b []byte) string {
return *(*string)(unsafe.Pointer(&b))
}
54 changes: 54 additions & 0 deletions internal/bytes/string_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package common

import (
"runtime"
"strconv"
"testing"
"time"

"github.com/stretchr/testify/suite"
)

func TestStringSuite(t *testing.T) {
suite.Run(t, new(StringSuite))
}

type StringSuite struct{ suite.Suite }

func unsafeConvertStr() []byte {
return UnsafeStrToBytes("abc")
}

func (s *StringSuite) TestUnsafeStrToBytes() {
// we convert in other function to trigger GC. We want to check that
// the underlying array in []bytes is accessible after GC will finish swapping.
for i := 0; i < 5; i++ {
b := unsafeConvertStr()
runtime.GC()
<-time.NewTimer(2 * time.Millisecond).C
b2 := append(b, 'd')
s.Equal("abc", string(b))
s.Equal("abcd", string(b2))
}
}

func unsafeConvertBytes() string {
return UnsafeBytesToStr([]byte("abc"))
}

func (s *StringSuite) TestUnsafeBytesToStr() {
// we convert in other function to trigger GC. We want to check that
// the underlying array in []bytes is accessible after GC will finish swapping.
for i := 0; i < 5; i++ {
str := unsafeConvertBytes()
runtime.GC()
<-time.NewTimer(2 * time.Millisecond).C
s.Equal("abc", str)
}
}

func BenchmarkUnsafeStrToBytes(b *testing.B) {
for i := 0; i < b.N; i++ {
UnsafeStrToBytes(strconv.Itoa(i))
}
}
17 changes: 9 additions & 8 deletions mutable_tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"time"

"github.com/pkg/errors"

dbm "github.com/tendermint/tm-db"

"github.com/cosmos/iavl/internal/logger"
Expand Down Expand Up @@ -147,7 +146,7 @@ func (tree *MutableTree) Get(key []byte) ([]byte, error) {
return nil, nil
}

if fastNode, ok := tree.unsavedFastNodeAdditions[string(key)]; ok {
if fastNode, ok := tree.unsavedFastNodeAdditions[unsafeToStr(key)]; ok {
return fastNode.value, nil
}
// check if node was deleted
Expand Down Expand Up @@ -919,8 +918,9 @@ func (tree *MutableTree) getUnsavedFastNodeRemovals() map[string]interface{} {
}

func (tree *MutableTree) addUnsavedAddition(key []byte, node *FastNode) {
delete(tree.unsavedFastNodeRemovals, string(key))
tree.unsavedFastNodeAdditions[string(key)] = node
skey := unsafeToStr(key)
delete(tree.unsavedFastNodeRemovals, skey)
tree.unsavedFastNodeAdditions[skey] = node
}

func (tree *MutableTree) saveFastNodeAdditions() error {
Expand All @@ -939,8 +939,9 @@ func (tree *MutableTree) saveFastNodeAdditions() error {
}

func (tree *MutableTree) addUnsavedRemoval(key []byte) {
delete(tree.unsavedFastNodeAdditions, string(key))
tree.unsavedFastNodeRemovals[string(key)] = true
skey := unsafeToStr(key)
delete(tree.unsavedFastNodeAdditions, skey)
tree.unsavedFastNodeRemovals[skey] = true
}

func (tree *MutableTree) saveFastNodeRemovals() error {
Expand All @@ -951,7 +952,7 @@ func (tree *MutableTree) saveFastNodeRemovals() error {
sort.Strings(keysToSort)

for _, key := range keysToSort {
if err := tree.ndb.DeleteFastNode([]byte(key)); err != nil {
if err := tree.ndb.DeleteFastNode(unsafeToBz(key)); err != nil {
return err
}
}
Expand Down Expand Up @@ -1231,7 +1232,7 @@ func (tree *MutableTree) addOrphans(orphans []*Node) error {
if len(node.hash) == 0 {
return fmt.Errorf("expected to find node hash, but was empty")
}
tree.orphans[string(node.hash)] = node.version
tree.orphans[unsafeToStr(node.hash)] = node.version
}
return nil
}
7 changes: 4 additions & 3 deletions nodedb.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ import (
"strings"
"sync"

"github.com/cosmos/iavl/cache"
"github.com/cosmos/iavl/internal/logger"
"github.com/pkg/errors"
dbm "github.com/tendermint/tm-db"

"github.com/cosmos/iavl/cache"
"github.com/cosmos/iavl/internal/logger"
)

const (
Expand Down Expand Up @@ -86,7 +87,7 @@ func newNodeDB(db dbm.DB, cacheSize int, opts *Options) *nodeDB {
opts = &o
}

storeVersion, err := db.Get(metadataKeyFormat.Key([]byte(storageVersionKey)))
storeVersion, err := db.Get(metadataKeyFormat.Key(unsafeToBz(storageVersionKey)))

if err != nil || storeVersion == nil {
storeVersion = []byte(defaultStorageVersionValue)
Expand Down
4 changes: 2 additions & 2 deletions tree_dotgraph.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ func WriteDOTGraph(w io.Writer, tree *ImmutableTree, paths []PathToLeaf) {
}
shortHash := graphNode.Hash[:7]

graphNode.Label = mkLabel(string(node.key), 16, "sans-serif")
graphNode.Label = mkLabel(unsafeToStr(node.key), 16, "sans-serif")
graphNode.Label += mkLabel(shortHash, 10, "monospace")
graphNode.Label += mkLabel(fmt.Sprintf("version=%d", node.version), 10, "monospace")

if node.value != nil {
graphNode.Label += mkLabel(string(node.value), 10, "sans-serif")
graphNode.Label += mkLabel(unsafeToStr(node.value), 10, "sans-serif")
}

if node.height == 0 {
Expand Down
8 changes: 8 additions & 0 deletions unsafe.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package iavl

import ibytes "github.com/cosmos/iavl/internal/bytes"

var (
unsafeToStr = ibytes.UnsafeBytesToStr
unsafeToBz = ibytes.UnsafeStrToBytes
)
38 changes: 14 additions & 24 deletions unsaved_fast_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,19 @@ var (
// it iterates over the latest state via fast nodes,
// taking advantage of keys being located in sequence in the underlying database.
type UnsavedFastIterator struct {
start, end []byte

valid bool

ascending bool

err error

ndb *nodeDB
start, end []byte
valid bool
ascending bool
err error
ndb *nodeDB
nextKey []byte
nextVal []byte
fastIterator dbm.Iterator

nextUnsavedNodeIdx int
unsavedFastNodeAdditions map[string]*FastNode

unsavedFastNodeRemovals map[string]interface{}

unsavedFastNodesToSort []string

nextKey []byte

nextVal []byte

nextUnsavedNodeIdx int

fastIterator dbm.Iterator
unsavedFastNodeRemovals map[string]interface{}
unsavedFastNodesToSort []string
}

var _ dbm.Iterator = (*UnsavedFastIterator)(nil)
Expand Down Expand Up @@ -71,7 +61,7 @@ func NewUnsavedFastIterator(start, end []byte, ascending bool, ndb *nodeDB, unsa
continue
}

iter.unsavedFastNodesToSort = append(iter.unsavedFastNodesToSort, string(fastNode.key))
iter.unsavedFastNodesToSort = append(iter.unsavedFastNodesToSort, unsafeToStr(fastNode.key))
}

sort.Slice(iter.unsavedFastNodesToSort, func(i, j int) bool {
Expand Down Expand Up @@ -142,8 +132,8 @@ func (iter *UnsavedFastIterator) Next() {
return
}

diskKeyStr := unsafeToStr(iter.fastIterator.Key())
if iter.fastIterator.Valid() && iter.nextUnsavedNodeIdx < len(iter.unsavedFastNodesToSort) {
diskKeyStr := string(iter.fastIterator.Key())

if iter.unsavedFastNodeRemovals[diskKeyStr] != nil {
// If next fast node from disk is to be removed, skip it.
Expand Down Expand Up @@ -186,7 +176,7 @@ func (iter *UnsavedFastIterator) Next() {

// if only nodes on disk are left, we return them
if iter.fastIterator.Valid() {
if iter.unsavedFastNodeRemovals[string(iter.fastIterator.Key())] != nil {
if iter.unsavedFastNodeRemovals[diskKeyStr] != nil {
// If next fast node from disk is to be removed, skip it.
iter.fastIterator.Next()
iter.Next()
Expand Down

0 comments on commit 3e26f26

Please sign in to comment.