diff --git a/.github/workflows/c-chain-reexecution-benchmark.yml b/.github/workflows/c-chain-reexecution-benchmark.yml new file mode 100644 index 000000000000..856ab8210af2 --- /dev/null +++ b/.github/workflows/c-chain-reexecution-benchmark.yml @@ -0,0 +1,73 @@ +name: C-Chain Re-Execution Benchmark + +on: + pull_request: + workflow_dispatch: + inputs: + start-block: + description: 'The start block for the benchmark.' + required: false + default: 101 + end-block: + description: 'The end block for the benchmark.' + required: false + default: 250000 + source-block-dir: + description: 'The source block directory. Supports S3 directory/zip and local directories.' + required: false + default: s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb.zip + current-state-dir: + description: 'The current state directory. Supports S3 directory/zip and local directories.' + required: false + default: s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100.zip + schedule: + - cron: '0 6 * * 0' # Runs every Sunday at 06:00 UTC + +jobs: + c-chain-reexecution: + permissions: + id-token: write + contents: write + runs-on: ubuntu-latest + steps: + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_S3_READ_ONLY_ROLE }} + aws-region: us-east-2 + - name: Set task env via GITHUB_ENV + id: set-params + run: | + { + echo "START_BLOCK=${{ github.event.inputs.start-block || 101 }}" + echo "END_BLOCK=${{ github.event.inputs.end-block || 250000 }}" + echo "SOURCE_BLOCK_DIR=${{ github.event.inputs.source-block-dir || 's3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb.zip' }}" + echo "CURRENT_STATE_DIR=${{ github.event.inputs.current-state-dir || 's3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100.zip' }}" + } >> "$GITHUB_ENV" + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup-go-for-project + - name: Run C-Chain Re-Execution + uses: ./.github/actions/run-monitored-tmpnet-cmd + with: + run: ./scripts/run_task.sh reexecute-cchain-range-with-copied-data EXECUTION_DATA_DIR=${{ github.workspace }}/reexecution-data BENCHMARK_OUTPUT_FILE=${{ github.workspace }}/reexecute-cchain-range-benchmark-res.txt + prometheus_username: ${{ secrets.PROMETHEUS_ID || '' }} + prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }} + grafana_dashboard_id: 'Gl1I20mnk/c-chain' + loki_username: ${{ secrets.LOKI_ID || '' }} + loki_password: ${{ secrets.LOKI_PASSWORD || '' }} + runtime: "" # Set runtime input to empty string to disable log collection + - name: Download Previous Benchmark Result + uses: actions/cache@v4 + with: + path: ./cache + key: ${{ runner.os }}-reexecute-cchain-range-benchmark.json + - name: Compare Benchmark Results + uses: benchmark-action/github-action-benchmark@v1 + with: + tool: 'go' + output-file-path: ${{ github.workspace }}/reexecute-cchain-range-benchmark-res.txt + external-data-json-path: ./cache/${{ runner.os }}-reexecute-cchain-range-benchmark.json + fail-on-alert: true + github-token: ${{ secrets.GITHUB_TOKEN }} + summary-always: true + comment-on-alert: true diff --git a/Taskfile.yml b/Taskfile.yml index 5775f4f24762..5a5a2e053974 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -85,6 +85,24 @@ tasks: - cmd: go mod tidy - task: check-clean-branch + export-cchain-block-range: + desc: Export range of C-Chain blocks from source to target directory. + vars: + SOURCE_BLOCK_DIR: '{{.SOURCE_BLOCK_DIR}}' + TARGET_BLOCK_DIR: '{{.TARGET_BLOCK_DIR}}' + START_BLOCK: '{{.START_BLOCK}}' + END_BLOCK: '{{.END_BLOCK}}' + cmds: + - cmd: go test -timeout=0 -run=TestExportBlockRange github.com/ava-labs/avalanchego/tests/reexecute/c --source-block-dir={{.SOURCE_BLOCK_DIR}} --target-block-dir={{.TARGET_BLOCK_DIR}} --start-block={{.START_BLOCK}} --end-block={{.END_BLOCK}} + + export-dir-to-s3: + desc: Copies a directory to s3 + vars: + LOCAL_SRC: '{{.LOCAL_SRC}}' + S3_DST: '{{.S3_DST}}' + cmds: + - cmd: s5cmd cp {{.LOCAL_SRC}} {{.S3_DST}} + generate-mocks: desc: Generates testing mocks cmds: @@ -109,6 +127,30 @@ tasks: desc: Runs ginkgo against the current working directory cmd: ./bin/ginkgo build {{.USER_WORKING_DIR}} + import-cchain-reexecute-range: + desc: Imports the C-Chain block and state data to re-execute. Defaults to import the first 200 and the current state created with the default config of the C-Chain (hashdb). + vars: + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' + SOURCE_BLOCK_DIR: '{{.SOURCE_BLOCK_DIR | default "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-200.zip"}}' + CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR | default "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100.zip"}}' + cmds: + - task: import-s3-to-dir + vars: + SRC: '{{.SOURCE_BLOCK_DIR}}' + DST: '{{.EXECUTION_DATA_DIR}}/blocks' + - task: import-s3-to-dir + vars: + SRC: '{{.CURRENT_STATE_DIR}}' + DST: '{{.EXECUTION_DATA_DIR}}/current-state' + + import-s3-to-dir: + desc: Imports an S3 path to a local directory. Unzipping if needed. + vars: + SRC: '{{.SRC}}' + DST: '{{.DST}}' + cmds: + - cmd: bash -x ./scripts/copy_dir.sh {{.SRC}} {{.DST}} + install-nix: desc: Installs nix with the determinate systems installer cmd: curl --proto '=https' --tlsv1.2 -sSf -L https://install.determinate.systems/nix | sh -s -- install @@ -139,6 +181,49 @@ tasks: desc: Runs shellcheck to check sanity of shell scripts cmd: ./scripts/shellcheck.sh + reexecute-cchain-range: + desc: Re-execute a range of C-Chain blocks. + vars: + CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR}}' + SOURCE_BLOCK_DIR: '{{.SOURCE_BLOCK_DIR}}' + START_BLOCK: '{{.START_BLOCK}}' + END_BLOCK: '{{.END_BLOCK}}' + LABELS: '{{.LABELS | default ""}}' + BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' + cmd: | + CURRENT_STATE_DIR={{.CURRENT_STATE_DIR}} \ + SOURCE_BLOCK_DIR={{.SOURCE_BLOCK_DIR}} \ + START_BLOCK={{.START_BLOCK}} \ + END_BLOCK={{.END_BLOCK}} \ + LABELS={{.LABELS}} \ + BENCHMARK_OUTPUT_FILE={{.BENCHMARK_OUTPUT_FILE}} \ + bash -x ./scripts/benchmark_cchain_range.sh + + reexecute-cchain-range-with-copied-data: + desc: Combines import-cchain-reexecute-range and reexecute-cchain-range + vars: + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' + SOURCE_BLOCK_DIR: '{{.SOURCE_BLOCK_DIR | default "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb.zip"}}' + CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR | default "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100.zip"}}' + START_BLOCK: '{{.START_BLOCK | default "101"}}' + END_BLOCK: '{{.END_BLOCK | default "250000"}}' + LABELS: '{{.LABELS | default ""}}' + BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' + cmds: + - task: import-cchain-reexecute-range + vars: + SOURCE_BLOCK_DIR: '{{.SOURCE_BLOCK_DIR}}' + CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR}}' + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' + - task: reexecute-cchain-range + vars: + SOURCE_BLOCK_DIR: '{{.EXECUTION_DATA_DIR}}/blocks' + CURRENT_STATE_DIR: '{{.EXECUTION_DATA_DIR}}/current-state' + START_BLOCK: '{{.START_BLOCK}}' + END_BLOCK: '{{.END_BLOCK}}' + LABELS: '{{.LABELS}}' + BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE}}' + test-bootstrap-monitor-e2e: desc: Runs bootstrap monitor e2e tests cmd: bash -x ./scripts/tests.e2e.bootstrap_monitor.sh diff --git a/flake.lock b/flake.lock index 98a552d749c5..bb003457f77a 100644 --- a/flake.lock +++ b/flake.lock @@ -2,12 +2,12 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1751211869, - "narHash": "sha256-1Cu92i1KSPbhPCKxoiVG5qnoRiKTgR5CcGSRyLpOd7Y=", - "rev": "b43c397f6c213918d6cfe6e3550abfe79b5d1c51", - "revCount": 805252, + "lastModified": 1752620740, + "narHash": "sha256-f3pO+9lg66mV7IMmmIqG4PL3223TYMlnlw+pnpelbss=", + "rev": "32a4e87942101f1c9f9865e04dc3ddb175f5f32e", + "revCount": 806427, "type": "tarball", - "url": "https://api.flakehub.com/f/pinned/NixOS/nixpkgs/0.2505.805252%2Brev-b43c397f6c213918d6cfe6e3550abfe79b5d1c51/0197c007-f18c-7f97-aeb6-aed117320dd2/source.tar.gz" + "url": "https://api.flakehub.com/f/pinned/NixOS/nixpkgs/0.2505.806427%2Brev-32a4e87942101f1c9f9865e04dc3ddb175f5f32e/019816e8-f0d0-7f8f-866a-81065956761d/source.tar.gz" }, "original": { "type": "tarball", diff --git a/flake.nix b/flake.nix index 1bbd86dd45c5..f384066994b9 100644 --- a/flake.nix +++ b/flake.nix @@ -61,6 +61,9 @@ # Solidity compiler solc + + # s5cmd for rapid s3 interactions + s5cmd ] ++ lib.optionals stdenv.isDarwin [ # macOS-specific frameworks darwin.apple_sdk.frameworks.Security diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh new file mode 100755 index 000000000000..bd84e45908c5 --- /dev/null +++ b/scripts/benchmark_cchain_range.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# This script runs the C-Chain re-execution benchmark with a single iteration. +# It expects the following environment variables to be set: +# SOURCE_BLOCK_DIR: Path or S3 URL to the source block directory or zip. +# CURRENT_STATE_DIR: Path or S3 URL to the current state directory or zip. +# START_BLOCK: The starting block height (exclusive). +# END_BLOCK: The ending block height (inclusive). +# LABELS (optional): Comma-separated key=value pairs for metric labels. +# BENCHMARK_OUTPUT_FILE (optional): If set, benchmark output is also written to this file. + +: "${SOURCE_BLOCK_DIR:?SOURCE_BLOCK_DIR must be set}" +: "${CURRENT_STATE_DIR:?CURRENT_STATE_DIR must be set}" +: "${START_BLOCK:?START_BLOCK must be set}" +: "${END_BLOCK:?END_BLOCK must be set}" + +cmd="go test -timeout=0 -v -benchtime=1x -bench=BenchmarkReexecuteRange -run=^$ github.com/ava-labs/avalanchego/tests/reexecute/c --source-block-dir=\"${SOURCE_BLOCK_DIR}\" --target-dir=\"${CURRENT_STATE_DIR}\" --start-block=\"${START_BLOCK}\" --end-block=\"${END_BLOCK}\" ${LABELS:+--labels=\"${LABELS}\"}" + +if [ -n "${BENCHMARK_OUTPUT_FILE:-}" ]; then + eval "$cmd" | tee "${BENCHMARK_OUTPUT_FILE}" +else + eval "$cmd" +fi \ No newline at end of file diff --git a/scripts/build_test.sh b/scripts/build_test.sh index 2f4c534c76cf..81e7a0afa776 100755 --- a/scripts/build_test.sh +++ b/scripts/build_test.sh @@ -7,7 +7,7 @@ AVALANCHE_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )"; cd .. && pwd ) # Load the constants source "$AVALANCHE_PATH"/scripts/constants.sh -EXCLUDED_TARGETS="| grep -v /mocks | grep -v proto | grep -v tests/e2e | grep -v tests/load/c | grep -v tests/upgrade | grep -v tests/fixture/bootstrapmonitor/e2e" +EXCLUDED_TARGETS="| grep -v /mocks | grep -v proto | grep -v tests/e2e | grep -v tests/load/c | grep -v tests/upgrade | grep -v tests/fixture/bootstrapmonitor/e2e | grep -v tests/reexecute" if [[ "$(go env GOOS)" == "windows" ]]; then # Test discovery for the antithesis test setups is broken due to diff --git a/scripts/copy_dir.sh b/scripts/copy_dir.sh new file mode 100755 index 000000000000..4dad1c5a4e1d --- /dev/null +++ b/scripts/copy_dir.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Usage: ./scripts/copy_dir.sh source_directory destination_directory +# Sources can be S3 URLs (s3://bucket/path) or a local file path +# Assumes s5cmd has been installed and is available in the PATH. +# s5cmd is included in the nix dev shell. + +if [ $# -ne 2 ]; then + echo "Usage: $0 " + echo "S3 Example: $0 's3://bucket1/path1' /dest/dir" + echo "Local Example: $0 '/local/path1' /dest/dir" + exit 1 +fi + +SRC="$1" +DST="$2" + +# Ensure destination directory exists +mkdir -p "$DST" + +# Function to copy from a single source to destination +copy_source() { + local source="$1" + local dest="$2" + + # Check if source starts with s3:// + if [[ "$source" == s3://* ]]; then + echo "Copying from S3: $source -> $dest" + # Use s5cmd to copy from S3 + time s5cmd cp "$source" "$dest" + + # If we copied a zip, extract it in place + if [[ "$source" == *.zip ]]; then + echo "Extracting zip file in place" + time unzip "$dest"/*.zip -d "$dest" + rm "$dest"/*.zip + fi + else + echo "Copying from local filesystem: $source -> $dest" + # Use cp for local filesystem with recursive support + if [ -d "$source" ]; then + time cp -r "$source"/* "$dest/" + elif [ -f "$source" ]; then + time cp "$source" "$dest/" + else + echo "Warning: Source not found: $source" + return 1 + fi + fi +} + +copy_source "$SRC" "$DST" diff --git a/tests/reexecute/c/README.md b/tests/reexecute/c/README.md new file mode 100644 index 000000000000..ba0e075261bf --- /dev/null +++ b/tests/reexecute/c/README.md @@ -0,0 +1,107 @@ +# C-Chain Re-Execution Benchmark + +The C-Chain benchmarks support re-executing a range of mainnet C-Chain blocks against a provided snapshot of the current state as of some initial state. + +AvalancheGo provides a [Taskfile](https://taskfile.dev/) with commands to manage the import/export of data required for re-execution (block range and current state) and triggering a benchmark run. + +## Metrics + +The C-Chain benchmarks export VM metrics to the same Grafana instance as AvalancheGo CI: https://grafana-poc.avax-dev.network/. + +You can view granular C-Chain processing metrics with the label attached to this job (job="c-chain-reexecution") [here](https://grafana-poc.avax-dev.network/d/Gl1I20mnk/c-chain?orgId=1&from=now-5m&to=now&timezone=browser&var-datasource=P1809F7CD0C75ACF3&var-filter=job%7C%3D%7Cc-chain-reexecution&var-chain=C&refresh=10s). + +Note: to ensure Prometheus gets a final scrape at the end of a run, the test will sleep for 2s greater than the 10s Prometheus scrape interval, which will cause short-running tests to appear to take much longer than expected. Additionally, the linked dashboard displays most metrics using a 1min rate, which means that very short running tests will not produce a very useful visualization. + +For a realistic view, run the default C-Chain benchmark in the [final step](#run-default-c-chain-benchmark) or view the preview URL printed by the [c-chain-benchmark](../../../.github/workflows/c-chain-reexecution-benchmark.yml) job, which executes the block range [101, 250k]. + +## Configure Dev Environment + +To set up your dev environment to run C-Chain benchmarks, run: + +```bash +nix develop +``` + +If using AWS to push/pull S3 buckets, configure your AWS profile with the required access. The instructions here utilize the S3 bucket `s3://avalanchego-bootstrap-testing` in `us-east-2` under the Ava Labs Experimental AWS account. + +To authenticate metrics collection (enabled by default), provide the Prometheus credentials referenced in the e2e [README](../../e2e/README.md#monitoring). + +## Import Blocks + +To import the first 200 blocks for re-execution, you can fetch the following ZIP from S3: `s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-10k-ldb.zip`: + +```bash +task import-s3-to-dir SRC=s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-10k-ldb.zip DST=$HOME/exec-data/blocks +``` + +## Create C-Chain State Snapshot + +To execute a range of blocks [N, N+K], we need an initial current state with the last accepted block of N-1. To generate this from scratch, simply execute the range of blocks [1, N-1] (genesis is "executed" vacuously, so do not provide 0 as the start block) locally starting from an empty state: + +```bash +task reexecute-cchain-range CURRENT_STATE_DIR=$HOME/exec-data/current-state SOURCE_BLOCK_DIR=$HOME/exec-data/blocks START_BLOCK=1 END_BLOCK=100 +``` + +This initializes a `current-state` subdirectory inside of `$HOME/exec-data`, which will contain two subdirectories `chain-data-dir` and `db`. + +The `chain-data-dir` is the path passed in via `*snow.Context` to the VM as `snowContext.ChainDataDir`. +If the VM does not populate it, it may remain empty after a run. + +The `db` directory is used to initialize the leveldb instance used to create two nested PrefixDBs: the database passed into `vm.Initialize(...)` and the database used by shared memory. +These two databases must be built on top of the same base database as documented in the shared memory [README](../../../chains/atomic/README.md#shared-database). + +For reference, the expected directory structure is: + +``` +$HOME/exec-data +├── blocks +│ ├── 000001.log +│ ├── CURRENT +│ ├── LOCK +│ ├── LOG +│ └── MANIFEST-000000 +└── current-state + ├── chain-data-dir + └── db + ├── 000002.ldb + ├── 000003.log + ├── CURRENT + ├── CURRENT.bak + ├── LOCK + ├── LOG + └── MANIFEST-000004 +``` + +After generating the `$HOME/exec-data/current-state` directory from executing the first segment of blocks, we can take a snapshot of the current state and push to S3 (or copy to another location locally if preferred) for re-use. + +Run the export task: + +```bash +task export-dir-to-s3 LOCAL_SRC=$HOME/exec-data/current-state/ S3_DST=s3://avalanchego-bootstrap-testing/cchain-current-state-test/ +``` + +## Run C-Chain Benchmark + +Now that we've pushed the current-state back to S3, we can run the target range of blocks [101, 200] either re-using the data we already have locally or copying all of the data including both the blocks and current state for a completely fresh run. + +First, to run the block range using our locally available data, run: + +```bash +task reexecute-cchain-range CURRENT_STATE_DIR=$HOME/exec-data/current-state SOURCE_BLOCK_DIR=$HOME/exec-data/blocks START_BLOCK=101 END_BLOCK=200 +``` + +Note: if you attempt to re-execute a second time on the same data set, it will fail because the current state has been updated to block 200. + +Provide the parameters explicitly that we have just used locally: + +```bash +task reexecute-cchain-range-with-copied-data EXECUTION_DATA_DIR=$HOME/reexec-data-params SOURCE_BLOCK_DIR=s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-10k-ldb.zip CURRENT_STATE_DIR=s3://avalanchego-bootstrap-testing/cchain-current-state-test/** START_BLOCK=101 END_BLOCK=10000 +``` + +## Run Default C-Chain Benchmark + +To re-execute with an fresh copy, use the defaults provided in [Taskfile.yaml](../../../Taskfile.yml) to execute the range [101, 250k]: + +```bash +task reexecute-cchain-range-with-copied-data EXECUTION_DATA_DIR=$HOME/reexec-data-defaults +``` diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go new file mode 100644 index 000000000000..262383e154b5 --- /dev/null +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -0,0 +1,517 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package vm + +import ( + "context" + "encoding/binary" + "errors" + "flag" + "fmt" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/ava-labs/coreth/plugin/factory" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + + "github.com/ava-labs/avalanchego/api/metrics" + "github.com/ava-labs/avalanchego/chains/atomic" + "github.com/ava-labs/avalanchego/database" + "github.com/ava-labs/avalanchego/database/leveldb" + "github.com/ava-labs/avalanchego/database/prefixdb" + "github.com/ava-labs/avalanchego/genesis" + "github.com/ava-labs/avalanchego/ids" + "github.com/ava-labs/avalanchego/snow" + "github.com/ava-labs/avalanchego/snow/engine/enginetest" + "github.com/ava-labs/avalanchego/snow/engine/snowman/block" + "github.com/ava-labs/avalanchego/snow/validators/validatorstest" + "github.com/ava-labs/avalanchego/tests" + "github.com/ava-labs/avalanchego/tests/fixture/tmpnet" + "github.com/ava-labs/avalanchego/upgrade" + "github.com/ava-labs/avalanchego/utils/constants" + "github.com/ava-labs/avalanchego/utils/crypto/bls/signer/localsigner" + "github.com/ava-labs/avalanchego/utils/logging" + "github.com/ava-labs/avalanchego/utils/units" + "github.com/ava-labs/avalanchego/vms/platformvm/warp" +) + +var ( + mainnetXChainID = ids.FromStringOrPanic("2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM") + mainnetCChainID = ids.FromStringOrPanic("2q9e4r6Mu3U68nU1fYjgbR6JvwrRx36CohpAX5UQxse55x1Q5") + mainnetAvaxAssetID = ids.FromStringOrPanic("FvwEAhmxKfeiG8SnEvq42hc6whRyY3EFYAvebMqDNDGCgxN5Z") +) + +var ( + sourceBlockDirArg string + targetBlockDirArg string + targetDirArg string + startBlockArg uint64 + endBlockArg uint64 + chanSizeArg int + metricsEnabledArg bool + executionTimeout time.Duration + labelsArg string + + labels map[string]string = map[string]string{ + "job": "c-chain-reexecution", + "is_ephemeral_node": "false", + "chain": "C", + } +) + +func TestMain(m *testing.M) { + // Source directory must be a leveldb dir with the required blocks accessible via rawdb.ReadBlock. + flag.StringVar(&sourceBlockDirArg, "source-block-dir", sourceBlockDirArg, "DB directory storing executable block range.") + // Target block directory to write blocks into when executing TestExportBlockRange. + flag.StringVar(&targetBlockDirArg, "target-block-dir", targetBlockDirArg, "DB directory to write blocks into when executing TestExportBlockRange.") + + // Target directory assumes the current-state directory contains a db directory and a chain-data-dir directory. + // - db/ + // - chain-data-dir/ + flag.StringVar(&targetDirArg, "target-dir", targetDirArg, "Target directory for the current state including VM DB and Chain Data Directory.") + flag.Uint64Var(&startBlockArg, "start-block", 101, "Start block to begin execution (exclusive).") + flag.Uint64Var(&endBlockArg, "end-block", 200, "End block to end execution (inclusive).") + flag.IntVar(&chanSizeArg, "chan-size", 100, "Size of the channel to use for block processing.") + flag.DurationVar(&executionTimeout, "execution-timeout", 0, "Benchmark execution timeout. After this timeout has elapsed, terminate the benchmark without error. If 0, no timeout is applied.") + + flag.BoolVar(&metricsEnabledArg, "metrics-enabled", true, "Enable metrics collection.") + flag.StringVar(&labelsArg, "labels", "", "Comma separated KV list of metric labels to attach to all exported metrics. Ex. \"owner=tim,runner=snoopy\"") + + flag.Parse() + + customLabels, err := parseLabels(labelsArg) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to parse labels: %v\n", err) + os.Exit(1) + } + for customLabel, customValue := range customLabels { + labels[customLabel] = customValue + } + + m.Run() +} + +func BenchmarkReexecuteRange(b *testing.B) { + require.Equalf(b, 1, b.N, "BenchmarkReexecuteRange expects to run a single iteration because it overwrites the input current-state, but found (b.N=%d)", b.N) + benchmarkReexecuteRange(b, sourceBlockDirArg, targetDirArg, startBlockArg, endBlockArg, chanSizeArg, metricsEnabledArg) +} + +func benchmarkReexecuteRange(b *testing.B, sourceBlockDir string, targetDir string, startBlock uint64, endBlock uint64, chanSize int, metricsEnabled bool) { + r := require.New(b) + ctx := context.Background() + + // Create the prefix gatherer passed to the VM and register it with the top-level, + // labeled gatherer. + prefixGatherer := metrics.NewPrefixGatherer() + + vmMultiGatherer := metrics.NewPrefixGatherer() + r.NoError(prefixGatherer.Register("avalanche_evm", vmMultiGatherer)) + + // consensusRegistry includes the chain="C" label and the prefix "avalanche_snowman". + // The consensus registry is passed to the executor to mimic a subset of consensus metrics. + consensusRegistry := prometheus.NewRegistry() + r.NoError(prefixGatherer.Register("avalanche_snowman", consensusRegistry)) + + if metricsEnabled { + collectRegistry(b, "c-chain-reexecution", time.Minute, prefixGatherer, labels) + } + + log := tests.NewDefaultLogger("c-chain-reexecution") + + var ( + targetDBDir = filepath.Join(targetDir, "db") + chainDataDir = filepath.Join(targetDir, "chain-data-dir") + ) + + log.Info("re-executing block range with params", + zap.String("source-block-dir", sourceBlockDir), + zap.String("target-db-dir", targetDBDir), + zap.String("chain-data-dir", chainDataDir), + zap.Uint64("start-block", startBlock), + zap.Uint64("end-block", endBlock), + zap.Int("chan-size", chanSize), + ) + + blockChan, err := createBlockChanFromLevelDB(b, sourceBlockDir, startBlock, endBlock, chanSize) + r.NoError(err) + + dbLogger := tests.NewDefaultLogger("db") + + db, err := leveldb.New(targetDBDir, nil, dbLogger, prometheus.NewRegistry()) + r.NoError(err) + defer func() { + log.Info("shutting down DB") + r.NoError(db.Close()) + }() + + vm, err := newMainnetCChainVM( + ctx, + db, + chainDataDir, + nil, + vmMultiGatherer, + ) + r.NoError(err) + defer func() { + log.Info("shutting down VM") + r.NoError(vm.Shutdown(ctx)) + }() + + config := vmExecutorConfig{ + Log: tests.NewDefaultLogger("vm-executor"), + Registry: consensusRegistry, + ExecutionTimeout: executionTimeout, + } + executor, err := newVMExecutor(vm, config) + r.NoError(err) + + start := time.Now() + r.NoError(executor.executeSequence(ctx, blockChan)) + elapsed := time.Since(start) + + b.ReportMetric(0, "ns/op") // Set default ns/op to 0 to hide from the output + getTopLevelMetrics(b, prefixGatherer, elapsed) // Report the desired top-level metrics +} + +func newMainnetCChainVM( + ctx context.Context, + vmAndSharedMemoryDB database.Database, + chainDataDir string, + configBytes []byte, + metricsGatherer metrics.MultiGatherer, +) (block.ChainVM, error) { + factory := factory.Factory{} + vmIntf, err := factory.New(logging.NoLog{}) + if err != nil { + return nil, fmt.Errorf("failed to create VM from factory: %w", err) + } + vm := vmIntf.(block.ChainVM) + + blsKey, err := localsigner.New() + if err != nil { + return nil, fmt.Errorf("failed to create BLS key: %w", err) + } + + blsPublicKey := blsKey.PublicKey() + warpSigner := warp.NewSigner(blsKey, constants.MainnetID, mainnetCChainID) + + genesisConfig := genesis.GetConfig(constants.MainnetID) + + sharedMemoryDB := prefixdb.New([]byte("sharedmemory"), vmAndSharedMemoryDB) + atomicMemory := atomic.NewMemory(sharedMemoryDB) + + chainIDToSubnetID := map[ids.ID]ids.ID{ + mainnetXChainID: constants.PrimaryNetworkID, + mainnetCChainID: constants.PrimaryNetworkID, + ids.Empty: constants.PrimaryNetworkID, + } + + if err := vm.Initialize( + ctx, + &snow.Context{ + NetworkID: constants.MainnetID, + SubnetID: constants.PrimaryNetworkID, + ChainID: mainnetCChainID, + NodeID: ids.GenerateTestNodeID(), + PublicKey: blsPublicKey, + NetworkUpgrades: upgrade.Mainnet, + + XChainID: mainnetXChainID, + CChainID: mainnetCChainID, + AVAXAssetID: mainnetAvaxAssetID, + + Log: tests.NewDefaultLogger("mainnet-vm-reexecution"), + SharedMemory: atomicMemory.NewSharedMemory(mainnetCChainID), + BCLookup: ids.NewAliaser(), + Metrics: metricsGatherer, + + WarpSigner: warpSigner, + + ValidatorState: &validatorstest.State{ + GetSubnetIDF: func(_ context.Context, chainID ids.ID) (ids.ID, error) { + subnetID, ok := chainIDToSubnetID[chainID] + if ok { + return subnetID, nil + } + return ids.Empty, fmt.Errorf("unknown chainID: %s", chainID) + }, + }, + ChainDataDir: chainDataDir, + }, + prefixdb.New([]byte("vm"), vmAndSharedMemoryDB), + []byte(genesisConfig.CChainGenesis), + nil, + configBytes, + nil, + &enginetest.Sender{}, + ); err != nil { + return nil, fmt.Errorf("failed to initialize VM: %w", err) + } + + return vm, nil +} + +type blockResult struct { + BlockBytes []byte + Height uint64 + Err error +} + +type vmExecutorConfig struct { + Log logging.Logger + // Registry is the registry to register the metrics with. + Registry prometheus.Registerer + // ExecutionTimeout is the maximum timeout to continue executing blocks. + // If 0, no timeout is applied. If non-zero, the executor will exit early + // WITHOUT error after hitting the timeout. + // This is useful to provide consistent duration benchmarks. + ExecutionTimeout time.Duration +} + +type vmExecutor struct { + config vmExecutorConfig + vm block.ChainVM + metrics *consensusMetrics +} + +func newVMExecutor(vm block.ChainVM, config vmExecutorConfig) (*vmExecutor, error) { + metrics, err := newConsensusMetrics(config.Registry) + if err != nil { + return nil, fmt.Errorf("failed to create consensus metrics: %w", err) + } + + return &vmExecutor{ + vm: vm, + metrics: metrics, + config: config, + }, nil +} + +func (e *vmExecutor) execute(ctx context.Context, blockBytes []byte) error { + blk, err := e.vm.ParseBlock(ctx, blockBytes) + if err != nil { + return fmt.Errorf("failed to parse block: %w", err) + } + if err := blk.Verify(ctx); err != nil { + return fmt.Errorf("failed to verify block %s at height %d: %w", blk.ID(), blk.Height(), err) + } + + if err := blk.Accept(ctx); err != nil { + return fmt.Errorf("failed to accept block %s at height %d: %w", blk.ID(), blk.Height(), err) + } + e.metrics.lastAcceptedHeight.Set(float64(blk.Height())) + + return nil +} + +func (e *vmExecutor) executeSequence(ctx context.Context, blkChan <-chan blockResult) error { + blkID, err := e.vm.LastAccepted(ctx) + if err != nil { + return fmt.Errorf("failed to get last accepted block: %w", err) + } + blk, err := e.vm.GetBlock(ctx, blkID) + if err != nil { + return fmt.Errorf("failed to get last accepted block by blkID %s: %w", blkID, err) + } + + start := time.Now() + e.config.Log.Info("last accepted block", zap.String("blkID", blkID.String()), zap.Uint64("height", blk.Height())) + + if e.config.ExecutionTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, e.config.ExecutionTimeout) + defer cancel() + } + + for blkResult := range blkChan { + if blkResult.Err != nil { + return blkResult.Err + } + + if blkResult.Height%1000 == 0 { + e.config.Log.Info("executing block", zap.Uint64("height", blkResult.Height)) + } + if err := e.execute(ctx, blkResult.BlockBytes); err != nil { + return err + } + + select { + case <-ctx.Done(): + e.config.Log.Info("exiting early due to context timeout", zap.Duration("elapsed", time.Since(start)), zap.Duration("execution-timeout", e.config.ExecutionTimeout)) + return nil + default: + } + } + e.config.Log.Info("finished executing sequence") + + return nil +} + +func createBlockChanFromLevelDB(tb testing.TB, sourceDir string, startBlock, endBlock uint64, chanSize int) (<-chan blockResult, error) { + r := require.New(tb) + ch := make(chan blockResult, chanSize) + + db, err := leveldb.New(sourceDir, nil, logging.NoLog{}, prometheus.NewRegistry()) + if err != nil { + return nil, fmt.Errorf("failed to create leveldb database from %q: %w", sourceDir, err) + } + tb.Cleanup(func() { + r.NoError(db.Close()) + }) + + go func() { + defer close(ch) + + for i := startBlock; i <= endBlock; i++ { + blockBytes, err := db.Get(binary.BigEndian.AppendUint64(nil, i)) + if err != nil { + ch <- blockResult{ + BlockBytes: nil, + Err: fmt.Errorf("failed to get block %d: %w", i, err), + } + return + } + + ch <- blockResult{ + BlockBytes: blockBytes, + Height: i, + Err: nil, + } + } + }() + + return ch, nil +} + +func TestExportBlockRange(t *testing.T) { + exportBlockRange(t, sourceBlockDirArg, targetBlockDirArg, startBlockArg, endBlockArg, chanSizeArg) +} + +func exportBlockRange(tb testing.TB, sourceDir string, targetDir string, startBlock, endBlock uint64, chanSize int) { + r := require.New(tb) + blockChan, err := createBlockChanFromLevelDB(tb, sourceDir, startBlock, endBlock, chanSize) + r.NoError(err) + + db, err := leveldb.New(targetDir, nil, logging.NoLog{}, prometheus.NewRegistry()) + r.NoError(err) + tb.Cleanup(func() { + r.NoError(db.Close()) + }) + + batch := db.NewBatch() + for blkResult := range blockChan { + r.NoError(batch.Put(binary.BigEndian.AppendUint64(nil, blkResult.Height), blkResult.BlockBytes)) + + if batch.Size() > 10*units.MiB { + r.NoError(batch.Write()) + batch = db.NewBatch() + } + } + + r.NoError(batch.Write()) +} + +type consensusMetrics struct { + lastAcceptedHeight prometheus.Gauge +} + +// newConsensusMetrics creates a subset of the metrics from snowman consensus +// [engine](../../snow/engine/snowman/metrics.go). +// +// The registry passed in is expected to be registered with the prefix +// "avalanche_snowman" and the chain label (ex. chain="C") that would be handled +// by the[chain manager](../../../chains/manager.go). +func newConsensusMetrics(registry prometheus.Registerer) (*consensusMetrics, error) { + m := &consensusMetrics{ + lastAcceptedHeight: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "last_accepted_height", + Help: "last height accepted", + }), + } + if err := registry.Register(m.lastAcceptedHeight); err != nil { + return nil, fmt.Errorf("failed to register last accepted height metric: %w", err) + } + return m, nil +} + +// collectRegistry starts prometheus and collects metrics from the provided gatherer. +// Attaches the provided labels + GitHub labels if available to the collected metrics. +func collectRegistry(tb testing.TB, name string, timeout time.Duration, gatherer prometheus.Gatherer, labels map[string]string) { + r := require.New(tb) + + ctx, cancel := context.WithTimeout(context.Background(), timeout) + tb.Cleanup(cancel) + + r.NoError(tmpnet.StartPrometheus(ctx, tests.NewDefaultLogger("prometheus"))) + + server, err := tests.NewPrometheusServer(gatherer) + r.NoError(err) + + var sdConfigFilePath string + tb.Cleanup(func() { + // Ensure a final metrics scrape. + // This default delay is set above the default scrape interval used by StartPrometheus. + time.Sleep(tmpnet.NetworkShutdownDelay) + + r.NoError(errors.Join( + server.Stop(), + func() error { + if sdConfigFilePath != "" { + return os.Remove(sdConfigFilePath) + } + return nil + }(), + )) + }) + + sdConfigFilePath, err = tmpnet.WritePrometheusSDConfig(name, tmpnet.SDConfig{ + Targets: []string{server.Address()}, + Labels: labels, + }, true /* withGitHubLabels */) + r.NoError(err) +} + +func parseLabels(labelsStr string) (map[string]string, error) { + labels := make(map[string]string) + if labelsStr == "" { + return labels, nil + } + for i, label := range strings.Split(labelsStr, ",") { + parts := strings.SplitN(label, "=", 2) + if len(parts) != 2 { + return nil, fmt.Errorf("invalid label format at index %d: %q (expected key=value)", i, label) + } + labels[parts[0]] = parts[1] + } + return labels, nil +} + +func getTopLevelMetrics(b *testing.B, registry prometheus.Gatherer, elapsed time.Duration) { + r := require.New(b) + + gasUsed, err := getCounterMetricValue(registry, "avalanche_evm_eth_chain_block_gas_used_processed") + r.NoError(err) + mgasPerSecond := gasUsed / 1_000_000 / elapsed.Seconds() + b.ReportMetric(mgasPerSecond, "mgas/s") +} + +func getCounterMetricValue(registry prometheus.Gatherer, query string) (float64, error) { + metricFamilies, err := registry.Gather() + if err != nil { + return 0, fmt.Errorf("failed to gather metrics: %w", err) + } + + for _, mf := range metricFamilies { + if mf.GetName() == query { + return mf.GetMetric()[0].Counter.GetValue(), nil + } + } + + return 0, fmt.Errorf("metric %s not found", query) +}