Skip to content

hashdb cap and commit batch size config options #452

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,14 @@ type CacheConfig struct {
SnapshotRestoreMaxGas uint64 // Rollback up to this much gas to restore snapshot (otherwise snapshot recalculated from nothing)
HeadRewindBlocksLimit uint64 // Rollback up to this many blocks to restore chain head (0 = preserve default upstream behaviour), only for HashScheme

// Arbitrum: configure GC window
// Arbitrum:
// configure GC window
TriesInMemory uint64 // Height difference before which a trie may not be garbage-collected
TrieRetention time.Duration // Time limit before which a trie may not be garbage-collected
TrieTimeLimitRandomOffset time.Duration // Range of random offset of each commit due to TrieTimeLimit period
// configure write batch size thresholds
TrieCapBatchSize uint32 // threshold used during capping triedb size
TrieCommitBatchSize uint32 // threshold used during committing triedb to disk

MaxNumberOfBlocksToSkipStateSaving uint32
MaxAmountOfGasToSkipStateSaving uint64
Expand All @@ -196,6 +200,10 @@ func (c *CacheConfig) triedbConfig(isVerkle bool) *triedb.Config {
}
if c.StateScheme == rawdb.HashScheme {
config.HashDB = &hashdb.Config{
// Arbitrum:
IdealCapBatchSize: c.TrieCapBatchSize,
IdealCommitBatchSize: c.TrieCommitBatchSize,

CleanCacheSize: c.TrieCleanLimit * 1024 * 1024,
}
}
Expand All @@ -222,6 +230,8 @@ var defaultCacheConfig = &CacheConfig{
TriesInMemory: state.DefaultTriesInMemory,
TrieRetention: 30 * time.Minute,
TrieTimeLimitRandomOffset: 0,
TrieCapBatchSize: 0,
TrieCommitBatchSize: 0,
MaxNumberOfBlocksToSkipStateSaving: 0,
MaxAmountOfGasToSkipStateSaving: 0,

Expand Down
8 changes: 8 additions & 0 deletions core/rawdb/accessors_trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,14 @@ func WriteLegacyTrieNode(db ethdb.KeyValueWriter, hash common.Hash, node []byte)
}
}

// Arbitrum: version of WriteLegacyTrieNode that returns error instead of os.Exit in log.Crit
func WriteLegacyTrieNodeWithError(db ethdb.KeyValueWriter, hash common.Hash, node []byte) error {
if err := db.Put(hash.Bytes(), node); err != nil {
return fmt.Errorf("Failed to store legacy trie node, err: %w", err)
}
return nil
}

// DeleteLegacyTrieNode deletes the specified legacy trie node from database.
func DeleteLegacyTrieNode(db ethdb.KeyValueWriter, hash common.Hash) {
if err := db.Delete(hash.Bytes()); err != nil {
Expand Down
4 changes: 4 additions & 0 deletions ethdb/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@

package ethdb

import "errors"

var ErrBatchTooLarge = errors.New("batch too large")

// IdealBatchSize defines the size of the data batches should ideally add in one
// write.
const IdealBatchSize = 100 * 1024
Expand Down
36 changes: 36 additions & 0 deletions ethdb/pebble/pebble.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ package pebble

import (
"bytes"
"encoding/binary"
"fmt"
"runtime"
"sync"
Expand Down Expand Up @@ -51,6 +52,15 @@ const (
// degradationWarnInterval specifies how often warning should be printed if the
// leveldb database cannot keep up with requested writes.
degradationWarnInterval = time.Minute

// The max size of internal pebble batch is limited by the uint32 offsets.
// Pebble limits the size to MaxUint32 (just short of 4GB) so that the exclusive
// end of an allocation fits in uint32.
// On 32-bit systems, slices are naturally limited to MaxInt (just short of
// 2GB).
// see: cockroachdb/pebble.maxBatchSize
oneIf64Bit = ^uint(0) >> 63
maxBatchSize = (1<<31)<<oneIf64Bit - 1 // MaxUint32 on 64-bit platform, MaxInt on 32-bit platform
)

// Database is a persistent key-value store based on the pebble storage engine.
Expand Down Expand Up @@ -633,7 +643,21 @@ type batch struct {
}

// Put inserts the given value into the batch for later committing.
// In case ethdb.ErrBatchTooLarge is returned, it is safe to flush the batch and retry putting the key,value pair.
func (b *batch) Put(key, value []byte) error {
// The size increase is an argument to the cockroachdb/pebble.Batch.grow call in cockroachdb/pebble.Batch.prepareDeferredKeyValueRecord called internally in cockroachdb/pebble.Batch.Set.
// pebble.Batch.grow panics when the batch data size plus the increase reaches cockroachdb/pebble.maxBatchSize
// 1 byte for batch entry kind
// + MaxVariantLen32 for encoding of key length
// + MaxVariantLen32 for encoding of value length
// + key length
// + value length
sizeIncrease := 1 + uint64(2*binary.MaxVarintLen32) + uint64(len(key)) + uint64(len(value))
// check if we fit within maxBatchSize
if uint64(b.b.Len())+sizeIncrease >= maxBatchSize {
// return an error instead of letting b.b.Set to panic
return ethdb.ErrBatchTooLarge
}
if err := b.b.Set(key, value, nil); err != nil {
return err
}
Expand All @@ -642,7 +666,19 @@ func (b *batch) Put(key, value []byte) error {
}

// Delete inserts the key removal into the batch for later committing.
// In case ethdb.ErrBatchTooLarge is returned, it is safe to flush the batch and retry deleting the key
func (b *batch) Delete(key []byte) error {
// the size increase is argument in call to cockroachdb/pebble.Batch.grow in cockroachdb/pebble.Batch.prepareDeferredKeyRecord called internally in cockroachdb/pebble.Batch.Delete.
// pebble.Batch.grow panics when the batch data size plus the increase reaches cockroachdb/pebble.maxBatchSize
// 1 byte for batch entry kind
// + MaxVariantLen32 for encoding of key length
// + key length
sizeIncrease := 1 + uint64(binary.MaxVarintLen32) + uint64(len(key))
// check if we fit within maxBatchSize
if uint64(b.b.Len())+sizeIncrease >= maxBatchSize {
// return an error instead of letting b.b.Delete to panic
return ethdb.ErrBatchTooLarge
}
if err := b.b.Delete(key, nil); err != nil {
return err
}
Expand Down
31 changes: 31 additions & 0 deletions ethdb/pebble/pebble_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
package pebble

import (
"encoding/binary"
"errors"
"testing"

"github.com/cockroachdb/pebble"
Expand All @@ -43,6 +45,35 @@ func TestPebbleDB(t *testing.T) {
})
}

func TestPebbleBatchBatchTooLargeError(t *testing.T) {
pebbleDb, err := pebble.Open("", &pebble.Options{
FS: vfs.NewMem(),
})
if err != nil {
t.Fatal(err)
}
var db ethdb.KeyValueStore = &Database{
db: pebbleDb,
}
batch := db.NewBatch()
data := make([]byte, maxBatchSize-1-binary.MaxVarintLen32)
err = batch.Delete(data)
if err == nil {
t.Fatal("batch.Delete shouldn't succeed")
}
if !errors.Is(err, ethdb.ErrBatchTooLarge) {
t.Fatalf("batch.Delete returned unexpected error: %v", err)
}
data = data[:len(data)-binary.MaxVarintLen32]
err = batch.Put(data[0:len(data)/2], data[len(data)/2:])
if err == nil {
t.Fatal("batch.Put shouldn't succeed")
}
if !errors.Is(err, ethdb.ErrBatchTooLarge) {
t.Fatalf("batch.Put returned unexpected error: %v", err)
}
}

func BenchmarkPebbleDB(b *testing.B) {
dbtest.BenchDatabaseSuite(b, func() ethdb.KeyValueStore {
db, err := pebble.Open("", &pebble.Options{
Expand Down
61 changes: 57 additions & 4 deletions triedb/hashdb/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,20 @@ var (

// Config contains the settings for database.
type Config struct {
// Arbitrum:
IdealCapBatchSize uint32 // write batch size threshold used during capping triedb size (if 0, ethdb.IdealBatchSize will be used)
IdealCommitBatchSize uint32 // write batch size threshold used during committing trie nodes to disk (if 0, ethdb.IdealBatchSize will be used)

CleanCacheSize int // Maximum memory allowance (in bytes) for caching clean nodes
}

// Defaults is the default setting for database if it's not specified.
// Notably, clean cache is disabled explicitly,
var Defaults = &Config{
// Arbitrum:
IdealCapBatchSize: 0, // 0 = ethdb.IdealBatchSize will be used
IdealCommitBatchSize: 0, // 0 = ethdb.IdealBatchSize will be used

// Explicitly set clean cache size to 0 to avoid creating fastcache,
// otherwise database must be closed when it's no longer needed to
// prevent memory leak.
Expand All @@ -78,6 +86,10 @@ var Defaults = &Config{
// the disk database. The aim is to accumulate trie writes in-memory and only
// periodically flush a couple tries to disk, garbage collecting the remainder.
type Database struct {
// Arbitrum:
idealCapBatchSize uint
idealCommitBatchSize uint

diskdb ethdb.Database // Persistent storage for matured trie nodes
cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs
dirties map[common.Hash]*cachedNode // Data and references relationships of dirty trie nodes
Expand Down Expand Up @@ -132,7 +144,16 @@ func New(diskdb ethdb.Database, config *Config) *Database {
if config.CleanCacheSize > 0 {
cleans = fastcache.New(config.CleanCacheSize)
}
sanitizeBatchSize := func(size uint32) uint {
if size > 0 {
return uint(size)
}
return ethdb.IdealBatchSize
}
return &Database{
idealCapBatchSize: sanitizeBatchSize(config.IdealCapBatchSize),
idealCommitBatchSize: sanitizeBatchSize(config.IdealCommitBatchSize),

diskdb: diskdb,
cleans: cleans,
dirties: make(map[common.Hash]*cachedNode),
Expand Down Expand Up @@ -347,10 +368,25 @@ func (db *Database) Cap(limit common.StorageSize) error {
for size > limit && oldest != (common.Hash{}) {
// Fetch the oldest referenced node and push into the batch
node := db.dirties[oldest]
rawdb.WriteLegacyTrieNode(batch, oldest, node.node)

err := rawdb.WriteLegacyTrieNodeWithError(batch, oldest, node.node)
if err != nil {
if errors.Is(err, ethdb.ErrBatchTooLarge) {
log.Warn("Pebble batch limit reached in hashdb Cap operation, flushing batch. Consider setting ideal cap batch size to a lower value.", "pebbleError", err)
// flush batch & retry the write
if err = batch.Write(); err != nil {
log.Error("Failed to write flush list to disk", "err", err)
return err
}
batch.Reset()
rawdb.WriteLegacyTrieNode(batch, oldest, node.node)
} else {
log.Crit("Failure in hashdb Cap operation", "err", err)
}
}

// If we exceeded the ideal batch size, commit and reset
if batch.ValueSize() >= ethdb.IdealBatchSize {
if uint(batch.ValueSize()) >= db.idealCapBatchSize {
if err := batch.Write(); err != nil {
log.Error("Failed to write flush list to disk", "err", err)
return err
Expand Down Expand Up @@ -474,8 +510,25 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane
return err
}
// If we've reached an optimal batch size, commit and start over
rawdb.WriteLegacyTrieNode(batch, hash, node.node)
if batch.ValueSize() >= ethdb.IdealBatchSize {
err = rawdb.WriteLegacyTrieNodeWithError(batch, hash, node.node)
if err != nil {
if errors.Is(err, ethdb.ErrBatchTooLarge) {
log.Warn("Pebble batch limit reached in hashdb Commit operation, flushing batch. Consider setting ideal commit batch size to a lower value.", "pebbleError", err)
// flush batch & retry the write
if err = batch.Write(); err != nil {
return err
}
err = batch.Replay(uncacher)
if err != nil {
return err
}
batch.Reset()
rawdb.WriteLegacyTrieNode(batch, hash, node.node)
} else {
log.Crit("Failure in hashdb Commit operation", "err", err)
}
}
if uint(batch.ValueSize()) >= db.idealCommitBatchSize {
if err := batch.Write(); err != nil {
return err
}
Expand Down
Loading