From a8facbf476c1954bfc2fbd3d7b186cee870f723f Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 14 Apr 2021 22:36:10 +0200 Subject: [PATCH 1/4] core/state/snapshot: reuse memory data instead of hitting disk when generating --- core/state/snapshot/generate.go | 19 ++++++++++++++- trie/iterator.go | 43 ++++++++++++++++++++++++++------- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index ed431fcb3d8..612bf7c188d 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -31,6 +31,7 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" @@ -434,6 +435,19 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, } meter.Mark(1) } + + // We use the snap data to build up a cache which can be used by the + // main account trie as a primary lookup when resolving hashes + var snapTrieDb *trie.Database + if len(result.keys) > 0 { + snapNodeCache := memorydb.New() + snapTrieDb = trie.NewDatabase(snapNodeCache) + snapTrie, _ := trie.New(common.Hash{}, snapTrieDb) + for i, key := range result.keys { + snapTrie.Update(key, result.vals[i]) + } + snapTrie.Commit(nil) + } tr := result.tr if tr == nil { tr, err = trie.New(root, dl.triedb) @@ -442,9 +456,11 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, return false, nil, errMissingTrie } } + var ( trieMore bool - iter = trie.NewIterator(tr.NodeIterator(origin)) + nodeIt = tr.NodeIterator(origin) + iter = trie.NewIterator(nodeIt) kvkeys, kvvals = result.keys, result.vals // counters @@ -458,6 +474,7 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, start = time.Now() internal time.Duration ) + nodeIt.AddResolver(snapTrieDb) for iter.Next() { if last != nil && bytes.Compare(iter.Key, last) > 0 { trieMore = true diff --git a/trie/iterator.go b/trie/iterator.go index 4f72258a1d8..732d0d89c3d 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -102,6 +102,8 @@ type NodeIterator interface { // iterator is not positioned at a leaf. Callers must not retain references // to the value after calling Next. LeafProof() [][]byte + + AddResolver(*Database) } // nodeIteratorState represents the iteration state at one particular node of the @@ -115,10 +117,15 @@ type nodeIteratorState struct { } type nodeIterator struct { - trie *Trie // Trie being iterated - stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state - path []byte // Path to the current node - err error // Failure set in case of an internal error in the iterator + trie *Trie // Trie being iterated + stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state + path []byte // Path to the current node + err error // Failure set in case of an internal error in the iterator + primaryResolver *Database +} + +func (it *nodeIterator) AddResolver(db *Database) { + it.primaryResolver = db } // errIteratorEnd is stored in nodeIterator.err when iteration is done. @@ -262,7 +269,7 @@ func (it *nodeIterator) init() (*nodeIteratorState, error) { if root != emptyRoot { state.hash = root } - return state, state.resolve(it.trie, nil) + return state, state.resolve(it, nil) } // peek creates the next state of the iterator. @@ -286,7 +293,7 @@ func (it *nodeIterator) peek(descend bool) (*nodeIteratorState, *int, []byte, er } state, path, ok := it.nextChild(parent, ancestor) if ok { - if err := state.resolve(it.trie, path); err != nil { + if err := state.resolve(it, path); err != nil { return parent, &parent.index, path, err } return state, &parent.index, path, nil @@ -319,7 +326,7 @@ func (it *nodeIterator) peekSeek(seekKey []byte) (*nodeIteratorState, *int, []by } state, path, ok := it.nextChildAt(parent, ancestor, seekKey) if ok { - if err := state.resolve(it.trie, path); err != nil { + if err := state.resolve(it, path); err != nil { return parent, &parent.index, path, err } return state, &parent.index, path, nil @@ -330,9 +337,19 @@ func (it *nodeIterator) peekSeek(seekKey []byte) (*nodeIteratorState, *int, []by return nil, nil, nil, errIteratorEnd } -func (st *nodeIteratorState) resolve(tr *Trie, path []byte) error { +func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) { + if it.primaryResolver != nil { + if resolved := it.primaryResolver.node(common.BytesToHash(hash)); resolved != nil { + return resolved, nil + } + } + resolved, err := it.trie.resolveHash(hash, path) + return resolved, err +} + +func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error { if hash, ok := st.node.(hashNode); ok { - resolved, err := tr.resolveHash(hash, path) + resolved, err := it.resolveHash(hash, path) if err != nil { return err } @@ -517,6 +534,10 @@ func (it *differenceIterator) Path() []byte { return it.b.Path() } +func (it *differenceIterator) AddResolver(db *Database) { + panic("Not implemented") +} + func (it *differenceIterator) Next(bool) bool { // Invariants: // - We always advance at least one element in b. @@ -624,6 +645,10 @@ func (it *unionIterator) Path() []byte { return (*it.items)[0].Path() } +func (it *unionIterator) AddResolver(db *Database) { + panic("Not implemented") +} + // Next returns the next node in the union of tries being iterated over. // // It does this by maintaining a heap of iterators, sorted by the iteration From 4897e104dd1ef0bf3741978c07b63869b25c6d37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Thu, 22 Apr 2021 21:02:55 +0300 Subject: [PATCH 2/4] trie: minor nitpicks wrt the resolver optimization --- trie/iterator.go | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/trie/iterator.go b/trie/iterator.go index 732d0d89c3d..e0a842aead2 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -103,6 +103,17 @@ type NodeIterator interface { // to the value after calling Next. LeafProof() [][]byte + // AddResolver sets an intermediate database to use for looking up trie nodes + // before reaching into the real persistent layer. + // + // This is not required for normal operation, rather is an optimization for + // cases where trie nodes can be recovered from some external mechanism without + // reading from disk. In those cases, this resolver allows short circuiting + // accesses and returning them from memory. + // + // Before adding a similar mechanism to any other place in Geth, consider + // making trie.Database an interface and wrapping at that level. It's a huge + // refactor, but it could be worth it if another occurance arises. AddResolver(*Database) } @@ -117,15 +128,16 @@ type nodeIteratorState struct { } type nodeIterator struct { - trie *Trie // Trie being iterated - stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state - path []byte // Path to the current node - err error // Failure set in case of an internal error in the iterator - primaryResolver *Database + trie *Trie // Trie being iterated + stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state + path []byte // Path to the current node + err error // Failure set in case of an internal error in the iterator + + resolver *Database // Optional intermediate resolver above the disk layer } -func (it *nodeIterator) AddResolver(db *Database) { - it.primaryResolver = db +func (it *nodeIterator) AddResolver(resolver *Database) { + it.resolver = resolver } // errIteratorEnd is stored in nodeIterator.err when iteration is done. @@ -338,8 +350,8 @@ func (it *nodeIterator) peekSeek(seekKey []byte) (*nodeIteratorState, *int, []by } func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) { - if it.primaryResolver != nil { - if resolved := it.primaryResolver.node(common.BytesToHash(hash)); resolved != nil { + if it.resolver != nil { + if resolved := it.resolver.node(common.BytesToHash(hash)); resolved != nil { return resolved, nil } } @@ -535,7 +547,7 @@ func (it *differenceIterator) Path() []byte { } func (it *differenceIterator) AddResolver(db *Database) { - panic("Not implemented") + panic("not implemented") } func (it *differenceIterator) Next(bool) bool { @@ -646,7 +658,7 @@ func (it *unionIterator) Path() []byte { } func (it *unionIterator) AddResolver(db *Database) { - panic("Not implemented") + panic("not implemented") } // Next returns the next node in the union of tries being iterated over. From 1acfe92b95fe8061b083959b5c0b697525a4ef14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Fri, 23 Apr 2021 08:54:00 +0300 Subject: [PATCH 3/4] core/state/snapshot, trie: use key/value store for resolver --- core/state/snapshot/generate.go | 11 ++++++----- trie/iterator.go | 23 +++++++++++++---------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 612bf7c188d..13b34f4d694 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -438,15 +438,16 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, // We use the snap data to build up a cache which can be used by the // main account trie as a primary lookup when resolving hashes - var snapTrieDb *trie.Database + var snapNodeCache ethdb.KeyValueStore if len(result.keys) > 0 { - snapNodeCache := memorydb.New() - snapTrieDb = trie.NewDatabase(snapNodeCache) + snapNodeCache = memorydb.New() + snapTrieDb := trie.NewDatabase(snapNodeCache) snapTrie, _ := trie.New(common.Hash{}, snapTrieDb) for i, key := range result.keys { snapTrie.Update(key, result.vals[i]) } - snapTrie.Commit(nil) + root, _ := snapTrie.Commit(nil) + snapTrieDb.Commit(root, false, nil) } tr := result.tr if tr == nil { @@ -474,7 +475,7 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, start = time.Now() internal time.Duration ) - nodeIt.AddResolver(snapTrieDb) + nodeIt.AddResolver(snapNodeCache) for iter.Next() { if last != nil && bytes.Compare(iter.Key, last) > 0 { trieMore = true diff --git a/trie/iterator.go b/trie/iterator.go index e0a842aead2..d2adf42da89 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -22,6 +22,7 @@ import ( "errors" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/rlp" ) @@ -114,7 +115,7 @@ type NodeIterator interface { // Before adding a similar mechanism to any other place in Geth, consider // making trie.Database an interface and wrapping at that level. It's a huge // refactor, but it could be worth it if another occurance arises. - AddResolver(*Database) + AddResolver(ethdb.KeyValueStore) } // nodeIteratorState represents the iteration state at one particular node of the @@ -133,11 +134,7 @@ type nodeIterator struct { path []byte // Path to the current node err error // Failure set in case of an internal error in the iterator - resolver *Database // Optional intermediate resolver above the disk layer -} - -func (it *nodeIterator) AddResolver(resolver *Database) { - it.resolver = resolver + resolver ethdb.KeyValueStore // Optional intermediate resolver above the disk layer } // errIteratorEnd is stored in nodeIterator.err when iteration is done. @@ -162,6 +159,10 @@ func newNodeIterator(trie *Trie, start []byte) NodeIterator { return it } +func (it *nodeIterator) AddResolver(resolver ethdb.KeyValueStore) { + it.resolver = resolver +} + func (it *nodeIterator) Hash() common.Hash { if len(it.stack) == 0 { return common.Hash{} @@ -351,8 +352,10 @@ func (it *nodeIterator) peekSeek(seekKey []byte) (*nodeIteratorState, *int, []by func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) { if it.resolver != nil { - if resolved := it.resolver.node(common.BytesToHash(hash)); resolved != nil { - return resolved, nil + if blob, err := it.resolver.Get(hash); err == nil && len(blob) > 0 { + if resolved, err := decodeNode(hash, blob); err == nil { + return resolved, nil + } } } resolved, err := it.trie.resolveHash(hash, path) @@ -546,7 +549,7 @@ func (it *differenceIterator) Path() []byte { return it.b.Path() } -func (it *differenceIterator) AddResolver(db *Database) { +func (it *differenceIterator) AddResolver(resolver ethdb.KeyValueStore) { panic("not implemented") } @@ -657,7 +660,7 @@ func (it *unionIterator) Path() []byte { return (*it.items)[0].Path() } -func (it *unionIterator) AddResolver(db *Database) { +func (it *unionIterator) AddResolver(resolver ethdb.KeyValueStore) { panic("not implemented") } From b32edf9e40b4fd2947dfcd4206318797a7c2b4d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Fri, 23 Apr 2021 12:51:10 +0300 Subject: [PATCH 4/4] trie: fix linter --- trie/iterator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trie/iterator.go b/trie/iterator.go index d2adf42da89..406f216c229 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -114,7 +114,7 @@ type NodeIterator interface { // // Before adding a similar mechanism to any other place in Geth, consider // making trie.Database an interface and wrapping at that level. It's a huge - // refactor, but it could be worth it if another occurance arises. + // refactor, but it could be worth it if another occurrence arises. AddResolver(ethdb.KeyValueStore) }