Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core/filtermaps: two dimensional log filter data structure (WIP) #30370

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
596 changes: 596 additions & 0 deletions core/filtermaps/filtermaps.go

Large diffs are not rendered by default.

112 changes: 112 additions & 0 deletions core/filtermaps/filtermaps_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package filtermaps

import (
"math/rand"
"testing"

"github.com/ethereum/go-ethereum/common"
)

func TestSingleMatch(t *testing.T) {
for count := 0; count < 100000; count++ {
// generate a row with a single random entry
mapIndex := rand.Uint32()
lvIndex := uint64(mapIndex)<<logValuesPerMap + uint64(rand.Intn(valuesPerMap))
var lvHash common.Hash
rand.Read(lvHash[:])
row := FilterRow{columnIndex(lvIndex, lvHash)}
matches := row.potentialMatches(mapIndex, lvHash)
// check if it has been reverse transformed correctly
if len(matches) != 1 {
t.Fatalf("Invalid length of matches (got %d, expected 1)", len(matches))
}
if matches[0] != lvIndex {
if len(matches) != 1 {
t.Fatalf("Incorrect match returned (got %d, expected %d)", matches[0], lvIndex)
}
}
}
}

const (
testPmCount = 100
testPmLen = 1000
)

func TestPotentialMatches(t *testing.T) {
var falsePositives int
for count := 0; count < testPmCount; count++ {
mapIndex := rand.Uint32()
lvStart := uint64(mapIndex) << logValuesPerMap
var row FilterRow
lvIndices := make([]uint64, testPmLen)
lvHashes := make([]common.Hash, testPmLen+1)
for i := range lvIndices {
// add testPmLen single entries with different log value hashes at different indices
lvIndices[i] = lvStart + uint64(rand.Intn(valuesPerMap))
rand.Read(lvHashes[i][:])
row = append(row, columnIndex(lvIndices[i], lvHashes[i]))
}
// add the same log value hash at the first testPmLen log value indices of the map's range
rand.Read(lvHashes[testPmLen][:])
for lvIndex := lvStart; lvIndex < lvStart+testPmLen; lvIndex++ {
row = append(row, columnIndex(lvIndex, lvHashes[testPmLen]))
}
// randomly duplicate some entries
for i := 0; i < testPmLen; i++ {
row = append(row, row[rand.Intn(len(row))])
}
// randomly mix up order of elements
for i := len(row) - 1; i > 0; i-- {
j := rand.Intn(i)
row[i], row[j] = row[j], row[i]
}
// check retrieved matches while also counting false positives
for i, lvHash := range lvHashes {
matches := row.potentialMatches(mapIndex, lvHash)
if i < testPmLen {
// check single entry match
if len(matches) < 1 {
t.Fatalf("Invalid length of matches (got %d, expected >=1)", len(matches))
}
var found bool
for _, lvi := range matches {
if lvi == lvIndices[i] {
found = true
} else {
falsePositives++
}
}
if !found {
t.Fatalf("Expected match not found (got %v, expected %d)", matches, lvIndices[i])
}
} else {
// check "long series" match
if len(matches) < testPmLen {
t.Fatalf("Invalid length of matches (got %d, expected >=%d)", len(matches), testPmLen)
}
// since results are ordered, first testPmLen entries should always match exactly
for j := 0; j < testPmLen; j++ {
if matches[j] != lvStart+uint64(j) {
t.Fatalf("Incorrect match at index %d (got %d, expected %d)", j, matches[j], lvStart+uint64(j))
}
}
// the rest are false positives
falsePositives += len(matches) - testPmLen
}
}
}
// Whenever looking for a certain log value hash, each entry in the row that
// was generated by another log value hash (a "foreign entry") has an
// 1 / valuesPerMap chance of yielding a false positive.
// We have testPmLen unique hash entries and a testPmLen long series of entries
// for the same hash. For each of the testPmLen unique hash entries there are
// testPmLen*2-1 foreign entries while for the long series there are testPmLen
// foreign entries. This means that after performing all these filtering runs,
// we have processed 2*testPmLen^2 foreign entries, which given us an estimate
// of how many false positives to expect.
expFalse := testPmCount * testPmLen * testPmLen * 2 / valuesPerMap
if falsePositives < expFalse/2 || falsePositives > expFalse*3/2 {
t.Fatalf("False positive rate out of expected range (got %d, expected %d +-50%%)", falsePositives, expFalse)
}
}
Loading
Loading