From c17ee5d8203bcc2b7e1ae18fa4a9793083e236cd Mon Sep 17 00:00:00 2001
From: Gary Rong <garyrong0905@gmail.com>
Date: Fri, 22 Mar 2024 10:11:53 +0800
Subject: [PATCH 1/7] core, eth/protocols/snap, trie: implement gentrie

---
 core/state/snapshot/conversion.go  |  10 +-
 core/state/statedb.go              |   4 +-
 eth/protocols/snap/gentrie.go      | 283 ++++++++++++++++++++++++
 eth/protocols/snap/gentrie_test.go | 341 +++++++++++++++++++++++++++++
 eth/protocols/snap/metrics.go      |  31 ++-
 eth/protocols/snap/sync.go         | 165 +++++---------
 trie/stacktrie.go                  | 147 +++----------
 trie/stacktrie_fuzzer_test.go      |  16 +-
 trie/stacktrie_test.go             |  87 --------
 trie/trie_test.go                  |  13 +-
 10 files changed, 743 insertions(+), 354 deletions(-)
 create mode 100644 eth/protocols/snap/gentrie.go
 create mode 100644 eth/protocols/snap/gentrie_test.go

diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go
index 681be7ebc01f..8a0fd1989af4 100644
--- a/core/state/snapshot/conversion.go
+++ b/core/state/snapshot/conversion.go
@@ -362,15 +362,15 @@ func generateTrieRoot(db ethdb.KeyValueWriter, scheme string, it Iterator, accou
 }
 
 func stackTrieGenerate(db ethdb.KeyValueWriter, scheme string, owner common.Hash, in chan trieKV, out chan common.Hash) {
-	options := trie.NewStackTrieOptions()
+	var onTrieNode trie.OnTrieNode
 	if db != nil {
-		options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+		onTrieNode = func(path []byte, hash common.Hash, blob []byte) {
 			rawdb.WriteTrieNode(db, owner, path, hash, blob, scheme)
-		})
+		}
 	}
-	t := trie.NewStackTrie(options)
+	t := trie.NewStackTrie(onTrieNode)
 	for leaf := range in {
 		t.Update(leaf.key[:], leaf.value)
 	}
-	out <- t.Commit()
+	out <- t.Hash()
 }
diff --git a/core/state/statedb.go b/core/state/statedb.go
index f2c2e7a798ef..d3d383389c23 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -981,12 +981,10 @@ func (s *StateDB) fastDeleteStorage(addrHash common.Hash, root common.Hash) (com
 		nodes = trienode.NewNodeSet(addrHash)
 		slots = make(map[common.Hash][]byte)
 	)
-	options := trie.NewStackTrieOptions()
-	options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+	stack := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
 		nodes.AddNode(path, trienode.NewDeleted())
 		size += common.StorageSize(len(path))
 	})
-	stack := trie.NewStackTrie(options)
 	for iter.Next() {
 		slot := common.CopyBytes(iter.Slot())
 		if err := iter.Error(); err != nil { // error might occur after Slot function
diff --git a/eth/protocols/snap/gentrie.go b/eth/protocols/snap/gentrie.go
new file mode 100644
index 000000000000..d452cbc8b24f
--- /dev/null
+++ b/eth/protocols/snap/gentrie.go
@@ -0,0 +1,283 @@
+// Copyright 2024 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snap
+
+import (
+	"bytes"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/trie"
+)
+
+// genTrie interface is used by the trie to generate merkle tree nodes based
+// on a received batch of states.
+type genTrie interface {
+	// update inserts the state into generator trie.
+	update(key, value []byte) error
+
+	// commit flushes the leftover nodes produced in the trie into database.
+	// The nodes on right boundary won't be committed unless this function
+	// is called. The flag complete should be set to true if there are more
+	// items on the right side.
+	//
+	// This function must be called before flushing database batch.
+	commit(complete bool) common.Hash
+}
+
+// pathTrie is a wrapper over the stackTrie, incorporating numerous additional
+// logics to handle the semi-completed trie and potential leftover dangling
+// nodes in the database. It is utilized for constructing the merkle tree nodes
+// in path mode during the snap sync process.
+type pathTrie struct {
+	owner common.Hash     // identifier of trie owner, empty for account trie
+	tr    *trie.StackTrie // underlying raw stack trie
+	first []byte          // the path of first written node
+	last  []byte          // the path of last written node
+
+	// Flag whether the nodes on the left boundary are skipped for committing.
+	// If it's set, then nodes on the left boundary are regarded as incomplete
+	// due to potentially missing left children.
+	noLeftBound bool
+	db          ethdb.KeyValueReader
+	batch       ethdb.Batch
+}
+
+// newPathTrie initializes the path trie.
+func newPathTrie(owner common.Hash, noLeftBound bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie {
+	tr := &pathTrie{
+		owner:       owner,
+		noLeftBound: noLeftBound,
+		db:          db,
+		batch:       batch,
+	}
+	tr.tr = trie.NewStackTrie(tr.onTrieNode)
+	return tr
+}
+
+// onTrieNode is invoked whenever a new node is produced by the stackTrie.
+//
+// As the produced nodes might be incomplete if they are on the boundaries
+// (left or right), this function has the ability to detect the incomplete
+// ones and filter them out for committing. Namely, only the nodes belonging
+// to completed subtries will be committed.
+//
+// Additionally, the assumption is made that there may exist leftover dangling
+// nodes in the database. This function has the ability to detect all the
+// dangling nodes that fall within the committed subtries (on the path covered
+// by internal extension nodes) and remove them from the database. This property
+// ensures that the entire path space is uniquely occupied by committed subtries.
+//
+// Furthermore, all leftover dangling nodes along the path from committed tries
+// to the root node should be removed as well; otherwise, they might potentially
+// disrupt the state healing process, leaving behind an inconsistent state.
+func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
+	// Filter out the nodes on the left boundary if noLeftBound is configured.
+	// Nodes are considered to be on the left boundary if it's the first one
+	// produced, or on the path of the first produced one.
+	if t.noLeftBound && (t.first == nil || bytes.HasPrefix(t.first, path)) {
+		if t.first == nil {
+			// Memorize the path of first produced node, which is regarded
+			// as left boundary. Deep-copy is necessary as the path given
+			// is volatile.
+			t.first = append([]byte{}, path...)
+
+			// The position of first complete sub trie (e.g. N_3) can be determined
+			// by the first produced node(e.g. N_1) correctly, with a branch node
+			// (e.g. N_2) as the common parent for shared path prefix. Therefore,
+			// the nodes along the path from root to N_1 can be regarded as left
+			// boundary. The leftover dangling nodes on left boundary should be
+			// cleaned out first before committing any node.
+			//
+			//                           +-----+
+			//                           | N_2 |  parent for shared path prefix
+			//                           +-----+
+			//                           /-  -\
+			//                     +-----+   +-----+
+			// First produced one  | N_1 |   | N_3 | First completed sub trie
+			//                     +-----+   +-----+
+			//
+			// Nodes must be cleaned from top to bottom as it's possible the procedure
+			// is interrupted in the middle.
+			//
+			// The node with the path of the first produced node is not removed, as
+			// it's a sibling of the first complete sub-trie, not the parent. There
+			// is no reason to remove it.
+			for i := 0; i < len(path); i++ {
+				t.delete(path[:i], false)
+			}
+		}
+		return
+	}
+	// If boundary filtering is not configured, or the node is not on the left
+	// boundary, commit it to database.
+	//
+	// Note, the nodes fall within the path between extension node and its
+	// **in disk** child must be cleaned out before committing the extension
+	// node. This is essential in snap sync to avoid leaving dangling nodes
+	// within this range covered by extension node which could potentially
+	// break the state healing.
+	//
+	// The target node is detected if its path is the prefix of last written
+	// one and path gap is non-zero.
+	//
+	// Nodes must be cleaned from top to bottom, including the node with the
+	// path of the committed extension node itself.
+	if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 {
+		for i := len(path); i < len(t.last); i++ {
+			t.delete(t.last[:i], true)
+		}
+	}
+	t.write(path, blob)
+
+	// Update the last flag. Deep-copy is necessary as the provided path is volatile.
+	if t.last == nil {
+		t.last = append([]byte{}, path...)
+	} else {
+		t.last = append(t.last[:0], path...)
+	}
+}
+
+// write commits the node write to provided database batch in path mode.
+func (t *pathTrie) write(path []byte, blob []byte) {
+	if t.owner == (common.Hash{}) {
+		rawdb.WriteAccountTrieNode(t.batch, path, blob)
+	} else {
+		rawdb.WriteStorageTrieNode(t.batch, t.owner, path, blob)
+	}
+}
+
+// delete commits the node deletion to provided database batch in path mode.
+func (t *pathTrie) delete(path []byte, inner bool) {
+	if t.owner == (common.Hash{}) {
+		if rawdb.ExistsAccountTrieNode(t.db, path) {
+			rawdb.DeleteAccountTrieNode(t.batch, path)
+			if inner {
+				accountInnerDeleteGauge.Inc(1)
+			} else {
+				accountOuterDeleteGauge.Inc(1)
+			}
+		}
+		if inner {
+			accountInnerLookupGauge.Inc(1)
+		} else {
+			accountOuterLookupGauge.Inc(1)
+		}
+		return
+	}
+	if rawdb.ExistsStorageTrieNode(t.db, t.owner, path) {
+		rawdb.DeleteStorageTrieNode(t.batch, t.owner, path)
+		if inner {
+			storageInnerDeleteGauge.Inc(1)
+		} else {
+			storageOuterDeleteGauge.Inc(1)
+		}
+	}
+	if inner {
+		storageInnerLookupGauge.Inc(1)
+	} else {
+		storageOuterLookupGauge.Inc(1)
+	}
+}
+
+// update implements genTrie interface, inserting a (key, value) pair into the
+// stack trie.
+func (t *pathTrie) update(key, value []byte) error {
+	return t.tr.Update(key, value)
+}
+
+// commit implements genTrie interface, flushing the right boundary if it's
+// regarded as complete. Otherwise, the nodes on the right boundary are discarded
+// and cleaned up.
+//
+// Note, this function must be called before flushing database batch, otherwise,
+// dangling nodes might be left in database.
+func (t *pathTrie) commit(complete bool) common.Hash {
+	// If the right boundary is claimed as complete, flush them out.
+	// The nodes on both left and right boundary will still be filtered
+	// out if left boundary filtering is configured.
+	if complete {
+		return t.tr.Hash()
+	}
+	// If the right boundary is claimed as incomplete, the uncommitted
+	// nodes should be discarded, as they might be incomplete due to
+	// missing children on the right side. Furthermore, previously committed
+	// nodes can be the children of the right boundary nodes; therefore,
+	// the nodes of the right boundary must be cleaned out!
+	//
+	// The position of the last complete sub-trie (e.g., N_1) can be correctly
+	// determined by the last produced node (e.g., N_3), with a branch node
+	// (e.g., N_2) as the common parent for the shared path prefix. Therefore,
+	// the nodes along the path from the root to N_3 can be regarded as the
+	// right boundary.
+	//
+	//                             +-----+
+	//                             | N_2 |  parent for shared path prefix
+	//                             +-----+
+	//                             /-  -\
+	//                        +-----+   +-----+
+	// Last complete subtrie  | N_1 |   | N_3 | Last produced node
+	//                        +-----+   +-----+
+	//
+	// Another interesting scenario occurs when the trie is committed due to
+	// too many items being accumulated in the batch. To flush them out to
+	// the database, the path of the last inserted item is temporarily treated
+	// as an incomplete right boundary, and nodes on this path are removed.
+	//
+	// However, this path will be reclaimed as an internal path by inserting
+	// more items after the batch flush. Newly produced nodes on this path
+	// can be committed with no issues as they are actually complete (also
+	// from a database perspective, first deleting and then rewriting is
+	// still a valid data update).
+	//
+	// Nodes must be cleaned from top to bottom as it's possible the procedure
+	// is interrupted in the middle.
+	for i := 0; i < len(t.last); i++ {
+		// The node with the path of the last produced node is not removed, as
+		// it's a sibling of the last complete sub-trie, not the parent. There
+		// is no reason to remove it.
+		t.delete(t.last[:i], false)
+	}
+	return common.Hash{} // the hash is meaningless for incomplete commit
+}
+
+// hashTrie is a wrapper over the stackTrie for implementing genTrie interface.
+type hashTrie struct {
+	tr *trie.StackTrie
+}
+
+// newHashTrie initializes the hash trie.
+func newHashTrie(batch ethdb.Batch) *hashTrie {
+	return &hashTrie{tr: trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+		rawdb.WriteLegacyTrieNode(batch, hash, blob)
+	})}
+}
+
+// update implements genTrie interface, inserting a (key, value) pair into
+// the stack trie.
+func (t *hashTrie) update(key, value []byte) error {
+	return t.tr.Update(key, value)
+}
+
+// commit implements genTrie interface, committing the nodes on right boundary.
+func (t *hashTrie) commit(complete bool) common.Hash {
+	if !complete {
+		return common.Hash{} // the hash is meaningless for incomplete commit
+	}
+	return t.tr.Hash() // return hash only if it's claimed as complete
+}
diff --git a/eth/protocols/snap/gentrie_test.go b/eth/protocols/snap/gentrie_test.go
new file mode 100644
index 000000000000..b3a97e2d01e0
--- /dev/null
+++ b/eth/protocols/snap/gentrie_test.go
@@ -0,0 +1,341 @@
+// Copyright 2024 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package snap
+
+import (
+	"bytes"
+	"math/rand"
+	"slices"
+	"testing"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/rawdb"
+	"github.com/ethereum/go-ethereum/crypto"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/internal/testrand"
+	"github.com/ethereum/go-ethereum/trie"
+)
+
+type replayer struct {
+	paths    []string      // sort in fifo order
+	hashes   []common.Hash // empty for deletion
+	unknowns int           // counter for unknown write
+}
+
+func newBatchReplay() *replayer {
+	return &replayer{}
+}
+
+func (r *replayer) decode(key []byte, value []byte) {
+	account := rawdb.IsAccountTrieNode(key)
+	storage := rawdb.IsStorageTrieNode(key)
+	if !account && !storage {
+		r.unknowns += 1
+		return
+	}
+	var path []byte
+	if account {
+		_, path = rawdb.ResolveAccountTrieNodeKey(key)
+	} else {
+		_, owner, inner := rawdb.ResolveStorageTrieNode(key)
+		path = append(owner.Bytes(), inner...)
+	}
+	r.paths = append(r.paths, string(path))
+
+	if len(value) == 0 {
+		r.hashes = append(r.hashes, common.Hash{})
+	} else {
+		r.hashes = append(r.hashes, crypto.Keccak256Hash(value))
+	}
+}
+
+func (r *replayer) updates() map[string]common.Hash {
+	set := make(map[string]common.Hash)
+	for i, path := range r.paths {
+		set[path] = r.hashes[i]
+	}
+	return set
+}
+
+// Put inserts the given value into the key-value data store.
+func (r *replayer) Put(key []byte, value []byte) error {
+	r.decode(key, value)
+	return nil
+}
+
+// Delete removes the key from the key-value data store.
+func (r *replayer) Delete(key []byte) error {
+	r.decode(key, nil)
+	return nil
+}
+
+func byteToHex(str []byte) []byte {
+	l := len(str) * 2
+	var nibbles = make([]byte, l)
+	for i, b := range str {
+		nibbles[i*2] = b / 16
+		nibbles[i*2+1] = b % 16
+	}
+	return nibbles
+}
+
+// innerNodes returns the internal nodes narrowed by two boundaries along with
+// the leftmost and rightmost sub-trie roots.
+func innerNodes(first, last []byte, nodes map[string]common.Hash, t *testing.T) (map[string]common.Hash, []byte, []byte) {
+	var (
+		leftRoot  []byte
+		rightRoot []byte
+		firstHex  = byteToHex(first)
+		lastHex   = byteToHex(last)
+		inner     = make(map[string]common.Hash)
+	)
+	for path, hash := range nodes {
+		if hash == (common.Hash{}) {
+			t.Fatalf("Unexpected deletion, %v", []byte(path))
+		}
+		// Filter out the siblings on the left side or the left boundary nodes.
+		if bytes.Compare(firstHex, []byte(path)) > 0 || bytes.HasPrefix(firstHex, []byte(path)) {
+			continue
+		}
+		// Filter out the siblings on the right side or the right boundary nodes.
+		if bytes.Compare(lastHex, []byte(path)) < 0 || bytes.HasPrefix(lastHex, []byte(path)) {
+			continue
+		}
+		inner[path] = hash
+
+		// Track the path of the leftmost sub trie root
+		if leftRoot == nil || bytes.Compare(leftRoot, []byte(path)) > 0 {
+			leftRoot = []byte(path)
+		}
+		// Track the path of the rightmost sub trie root
+		if rightRoot == nil ||
+			(bytes.Compare(rightRoot, []byte(path)) < 0) ||
+			(bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) {
+			rightRoot = []byte(path)
+		}
+	}
+	return inner, leftRoot, rightRoot
+}
+
+func buildPartial(owner common.Hash, db ethdb.KeyValueReader, batch ethdb.Batch, entries []*kv, first, last int) *replayer {
+	tr := newPathTrie(owner, first != 0, db, batch)
+	for i := first; i <= last; i++ {
+		tr.update(entries[i].k, entries[i].v)
+	}
+	tr.commit(last == len(entries)-1)
+
+	replay := newBatchReplay()
+	batch.Replay(replay)
+
+	return replay
+}
+
+// TestPartialGentree verifies if the trie constructed with partial states can
+// generate consistent trie nodes that match those of the full trie.
+func TestPartialGentree(t *testing.T) {
+	for round := 0; round < 100; round++ {
+		var (
+			n       = rand.Intn(1024) + 10
+			entries []*kv
+		)
+		for i := 0; i < n; i++ {
+			var val []byte
+			if rand.Intn(3) == 0 {
+				val = testrand.Bytes(3)
+			} else {
+				val = testrand.Bytes(32)
+			}
+			entries = append(entries, &kv{
+				k: testrand.Bytes(32),
+				v: val,
+			})
+		}
+		slices.SortFunc(entries, (*kv).cmp)
+
+		nodes := make(map[string]common.Hash)
+		tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+			nodes[string(path)] = hash
+		})
+		for i := 0; i < len(entries); i++ {
+			tr.Update(entries[i].k, entries[i].v)
+		}
+		tr.Hash()
+
+		check := func(first, last int) {
+			var (
+				db    = rawdb.NewMemoryDatabase()
+				batch = db.NewBatch()
+			)
+			inner, leftRoot, rightRoot := innerNodes(entries[first].k, entries[last].k, nodes, t)
+
+			// Write the junk nodes as the dangling
+			var injects []string
+			for path := range nodes {
+				for i := 0; i < len(path); i++ {
+					_, ok := nodes[path[:i]]
+					if ok {
+						continue
+					}
+					injects = append(injects, path[:i])
+				}
+			}
+			for _, path := range injects {
+				rawdb.WriteAccountTrieNode(db, []byte(path), testrand.Bytes(32))
+			}
+			// Build the partial tree with specific range
+			replay := buildPartial(common.Hash{}, db, batch, entries, first, last)
+			if replay.unknowns > 0 {
+				t.Fatalf("Unknown database write: %d", replay.unknowns)
+			}
+
+			// Ensure all the internal nodes are produced
+			set := replay.updates()
+			for path, hash := range inner {
+				if _, ok := set[path]; !ok {
+					t.Fatalf("Missing nodes %v", []byte(path))
+				}
+				if hash != set[path] {
+					t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path])
+				}
+			}
+
+			// Make sure all injected junks are correctly deleted
+			for _, path := range injects {
+				if bytes.Compare([]byte(path), leftRoot) < 0 && !bytes.HasPrefix(leftRoot, []byte(path)) {
+					continue
+				}
+				if bytes.Compare([]byte(path), rightRoot) > 0 {
+					continue
+				}
+				if hash, ok := set[path]; !ok || hash != (common.Hash{}) {
+					t.Fatalf("Missing delete, %v", []byte(path))
+				}
+			}
+		}
+		for j := 0; j < 100; j++ {
+			var (
+				first int
+				last  int
+			)
+			for {
+				first = rand.Intn(len(entries))
+				last = rand.Intn(len(entries))
+				if first <= last {
+					break
+				}
+			}
+			check(first, last)
+		}
+		var cases = []struct {
+			first int
+			last  int
+		}{
+			{0, len(entries) - 1},                // full
+			{1, len(entries) - 1},                // no left
+			{2, len(entries) - 1},                // no left
+			{2, len(entries) - 2},                // no left and right
+			{2, len(entries) - 2},                // no left and right
+			{len(entries) / 2, len(entries) / 2}, // single
+			{0, 0},                               // single first
+			{len(entries) - 1, len(entries) - 1}, // single last
+		}
+		for _, c := range cases {
+			check(c.first, c.last)
+		}
+	}
+}
+
+// TestFlushPartialTree tests the gentrie can produce complete inner trie nodes
+// even with lots of batch flushes in the middle.
+func TestFlushPartialTree(t *testing.T) {
+	var entries []*kv
+	for i := 0; i < 1024; i++ {
+		var val []byte
+		if rand.Intn(3) == 0 {
+			val = testrand.Bytes(3)
+		} else {
+			val = testrand.Bytes(32)
+		}
+		entries = append(entries, &kv{
+			k: testrand.Bytes(32),
+			v: val,
+		})
+	}
+	slices.SortFunc(entries, (*kv).cmp)
+
+	nodes := make(map[string]common.Hash)
+	tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+		nodes[string(path)] = hash
+	})
+	for i := 0; i < len(entries); i++ {
+		tr.Update(entries[i].k, entries[i].v)
+	}
+	tr.Hash()
+
+	var cases = []struct {
+		first int
+		last  int
+	}{
+		{0, len(entries) - 1},                // full
+		{1, len(entries) - 1},                // no left
+		{10, len(entries) - 1},               // no left
+		{10, len(entries) - 2},               // no left and right
+		{10, len(entries) - 10},              // no left and right
+		{11, 11},                             // single
+		{0, 0},                               // single first
+		{len(entries) - 1, len(entries) - 1}, // single last
+	}
+	for _, c := range cases {
+		var (
+			db       = rawdb.NewMemoryDatabase()
+			batch    = db.NewBatch()
+			combined = db.NewBatch()
+		)
+		inner, _, _ := innerNodes(entries[c.first].k, entries[c.last].k, nodes, t)
+
+		tr := newPathTrie(common.Hash{}, c.first != 0, db, batch)
+		for i := c.first; i <= c.last; i++ {
+			tr.update(entries[i].k, entries[i].v)
+			if rand.Intn(2) == 0 {
+				tr.commit(false)
+
+				batch.Replay(combined)
+				batch.Write()
+				batch.Reset()
+			}
+		}
+		tr.commit(c.last == len(entries)-1)
+
+		batch.Replay(combined)
+		batch.Write()
+		batch.Reset()
+
+		r := newBatchReplay()
+		combined.Replay(r)
+
+		// Ensure all the internal nodes are produced
+		set := r.updates()
+		for path, hash := range inner {
+			if _, ok := set[path]; !ok {
+				t.Fatalf("Missing nodes %v", []byte(path))
+			}
+			if hash != set[path] {
+				t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path])
+			}
+		}
+	}
+}
diff --git a/eth/protocols/snap/metrics.go b/eth/protocols/snap/metrics.go
index 19e9151824cb..6878e5b28058 100644
--- a/eth/protocols/snap/metrics.go
+++ b/eth/protocols/snap/metrics.go
@@ -27,21 +27,28 @@ var (
 	IngressRegistrationErrorMeter = metrics.NewRegisteredMeter(ingressRegistrationErrorName, nil)
 	EgressRegistrationErrorMeter  = metrics.NewRegisteredMeter(egressRegistrationErrorName, nil)
 
-	// deletionGauge is the metric to track how many trie node deletions
-	// are performed in total during the sync process.
-	deletionGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete", nil)
+	// accountInnerDeleteGauge is the metric to track how many dangling trie nodes
+	// covered by extension node in account trie are deleted during the sync.
+	accountInnerDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/account/inner", nil)
 
-	// lookupGauge is the metric to track how many trie node lookups are
-	// performed to determine if node needs to be deleted.
-	lookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/lookup", nil)
+	// storageInnerDeleteGauge is the metric to track how many dangling trie nodes
+	// covered by extension node in storage trie are deleted during the sync.
+	storageInnerDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/storage/inner", nil)
+
+	// accountOuterDeleteGauge is the metric to track how many dangling trie nodes
+	// above the committed nodes in account trie are deleted during the sync.
+	accountOuterDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/account/outer", nil)
 
-	// boundaryAccountNodesGauge is the metric to track how many boundary trie
-	// nodes in account trie are met.
-	boundaryAccountNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/account", nil)
+	// storageOuterDeleteGauge is the metric to track how many dangling trie nodes
+	// above the committed nodes in storage trie are deleted during the sync.
+	storageOuterDeleteGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/delete/storage/outer", nil)
 
-	// boundaryAccountNodesGauge is the metric to track how many boundary trie
-	// nodes in storage tries are met.
-	boundaryStorageNodesGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/boundary/storage", nil)
+	// lookupGauge is the metric to track how many trie node lookups are
+	// performed to determine if node needs to be deleted.
+	accountInnerLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/account/lookup/inner", nil)
+	accountOuterLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/account/lookup/outer", nil)
+	storageInnerLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/lookup/inner", nil)
+	storageOuterLookupGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/lookup/outer", nil)
 
 	// smallStorageGauge is the metric to track how many storages are small enough
 	// to retrieved in one or two request.
diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go
index d5d6fd6d69e5..761e28ce9766 100644
--- a/eth/protocols/snap/sync.go
+++ b/eth/protocols/snap/sync.go
@@ -94,6 +94,9 @@ const (
 	// trienodeHealThrottleDecrease is the divisor for the throttle when the
 	// rate of arriving data is lower than the rate of processing it.
 	trienodeHealThrottleDecrease = 1.25
+
+	// batchSizeThreshold is the maximum size allowed for gentrie batch.
+	batchSizeThreshold = 8 * 1024 * 1024
 )
 
 var (
@@ -321,8 +324,8 @@ type accountTask struct {
 	stateTasks     map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval
 	stateCompleted map[common.Hash]struct{}    // Account hashes whose storage have been completed
 
-	genBatch ethdb.Batch     // Batch used by the node generator
-	genTrie  *trie.StackTrie // Node generator from storage slots
+	genBatch ethdb.Batch // Batch used by the node generator
+	genTrie  genTrie     // Node generator from storage slots
 
 	done bool // Flag whether the task can be removed
 }
@@ -360,8 +363,8 @@ type storageTask struct {
 	root common.Hash     // Storage root hash for this instance
 	req  *storageRequest // Pending request to fill this task
 
-	genBatch ethdb.Batch     // Batch used by the node generator
-	genTrie  *trie.StackTrie // Node generator from storage slots
+	genBatch ethdb.Batch // Batch used by the node generator
+	genTrie  genTrie     // Node generator from storage slots
 
 	done bool // Flag whether the task can be removed
 }
@@ -749,19 +752,6 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
 	}
 }
 
-// cleanPath is used to remove the dangling nodes in the stackTrie.
-func (s *Syncer) cleanPath(batch ethdb.Batch, owner common.Hash, path []byte) {
-	if owner == (common.Hash{}) && rawdb.ExistsAccountTrieNode(s.db, path) {
-		rawdb.DeleteAccountTrieNode(batch, path)
-		deletionGauge.Inc(1)
-	}
-	if owner != (common.Hash{}) && rawdb.ExistsStorageTrieNode(s.db, owner, path) {
-		rawdb.DeleteStorageTrieNode(batch, owner, path)
-		deletionGauge.Inc(1)
-	}
-	lookupGauge.Inc(1)
-}
-
 // loadSyncStatus retrieves a previously aborted sync status from the database,
 // or generates a fresh one if none is available.
 func (s *Syncer) loadSyncStatus() {
@@ -792,23 +782,12 @@ func (s *Syncer) loadSyncStatus() {
 						s.accountBytes += common.StorageSize(len(key) + len(value))
 					},
 				}
-				options := trie.NewStackTrieOptions()
-				options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-					rawdb.WriteTrieNode(task.genBatch, common.Hash{}, path, hash, blob, s.scheme)
-				})
+				if s.scheme == rawdb.HashScheme {
+					task.genTrie = newHashTrie(task.genBatch)
+				}
 				if s.scheme == rawdb.PathScheme {
-					// Configure the dangling node cleaner and also filter out boundary nodes
-					// only in the context of the path scheme. Deletion is forbidden in the
-					// hash scheme, as it can disrupt state completeness.
-					options = options.WithCleaner(func(path []byte) {
-						s.cleanPath(task.genBatch, common.Hash{}, path)
-					})
-					// Skip the left boundary if it's not the first range.
-					// Skip the right boundary if it's not the last range.
-					options = options.WithSkipBoundary(task.Next != (common.Hash{}), task.Last != common.MaxHash, boundaryAccountNodesGauge)
+					task.genTrie = newPathTrie(common.Hash{}, task.Next != common.Hash{}, s.db, task.genBatch)
 				}
-				task.genTrie = trie.NewStackTrie(options)
-
 				// Restore leftover storage tasks
 				for accountHash, subtasks := range task.SubTasks {
 					for _, subtask := range subtasks {
@@ -820,23 +799,12 @@ func (s *Syncer) loadSyncStatus() {
 								s.storageBytes += common.StorageSize(len(key) + len(value))
 							},
 						}
-						owner := accountHash // local assignment for stacktrie writer closure
-						options := trie.NewStackTrieOptions()
-						options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-							rawdb.WriteTrieNode(subtask.genBatch, owner, path, hash, blob, s.scheme)
-						})
+						if s.scheme == rawdb.HashScheme {
+							subtask.genTrie = newHashTrie(subtask.genBatch)
+						}
 						if s.scheme == rawdb.PathScheme {
-							// Configure the dangling node cleaner and also filter out boundary nodes
-							// only in the context of the path scheme. Deletion is forbidden in the
-							// hash scheme, as it can disrupt state completeness.
-							options = options.WithCleaner(func(path []byte) {
-								s.cleanPath(subtask.genBatch, owner, path)
-							})
-							// Skip the left boundary if it's not the first range.
-							// Skip the right boundary if it's not the last range.
-							options = options.WithSkipBoundary(subtask.Next != common.Hash{}, subtask.Last != common.MaxHash, boundaryStorageNodesGauge)
+							subtask.genTrie = newPathTrie(accountHash, subtask.Next != common.Hash{}, s.db, subtask.genBatch)
 						}
-						subtask.genTrie = trie.NewStackTrie(options)
 					}
 				}
 			}
@@ -888,20 +856,12 @@ func (s *Syncer) loadSyncStatus() {
 				s.accountBytes += common.StorageSize(len(key) + len(value))
 			},
 		}
-		options := trie.NewStackTrieOptions()
-		options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-			rawdb.WriteTrieNode(batch, common.Hash{}, path, hash, blob, s.scheme)
-		})
+		var tr genTrie
+		if s.scheme == rawdb.HashScheme {
+			tr = newHashTrie(batch)
+		}
 		if s.scheme == rawdb.PathScheme {
-			// Configure the dangling node cleaner and also filter out boundary nodes
-			// only in the context of the path scheme. Deletion is forbidden in the
-			// hash scheme, as it can disrupt state completeness.
-			options = options.WithCleaner(func(path []byte) {
-				s.cleanPath(batch, common.Hash{}, path)
-			})
-			// Skip the left boundary if it's not the first range.
-			// Skip the right boundary if it's not the last range.
-			options = options.WithSkipBoundary(next != common.Hash{}, last != common.MaxHash, boundaryAccountNodesGauge)
+			tr = newPathTrie(common.Hash{}, next != common.Hash{}, s.db, batch)
 		}
 		s.tasks = append(s.tasks, &accountTask{
 			Next:           next,
@@ -909,7 +869,7 @@ func (s *Syncer) loadSyncStatus() {
 			SubTasks:       make(map[common.Hash][]*storageTask),
 			genBatch:       batch,
 			stateCompleted: make(map[common.Hash]struct{}),
-			genTrie:        trie.NewStackTrie(options),
+			genTrie:        tr,
 		})
 		log.Debug("Created account sync task", "from", next, "last", last)
 		next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1))
@@ -920,11 +880,13 @@ func (s *Syncer) loadSyncStatus() {
 func (s *Syncer) saveSyncStatus() {
 	// Serialize any partial progress to disk before spinning down
 	for _, task := range s.tasks {
+		task.genTrie.commit(false)
 		if err := task.genBatch.Write(); err != nil {
 			log.Error("Failed to persist account slots", "err", err)
 		}
 		for _, subtasks := range task.SubTasks {
 			for _, subtask := range subtasks {
+				subtask.genTrie.commit(false)
 				if err := subtask.genBatch.Write(); err != nil {
 					log.Error("Failed to persist storage slots", "err", err)
 				}
@@ -2155,25 +2117,20 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 							s.storageBytes += common.StorageSize(len(key) + len(value))
 						},
 					}
-					owner := account // local assignment for stacktrie writer closure
-					options := trie.NewStackTrieOptions()
-					options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-						rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme)
-					})
+					var tr genTrie
+					if s.scheme == rawdb.HashScheme {
+						tr = newHashTrie(batch)
+					}
 					if s.scheme == rawdb.PathScheme {
-						options = options.WithCleaner(func(path []byte) {
-							s.cleanPath(batch, owner, path)
-						})
 						// Keep the left boundary as it's the first range.
-						// Skip the right boundary if it's not the last range.
-						options = options.WithSkipBoundary(false, r.End() != common.MaxHash, boundaryStorageNodesGauge)
+						tr = newPathTrie(account, false, s.db, batch)
 					}
 					tasks = append(tasks, &storageTask{
 						Next:     common.Hash{},
 						Last:     r.End(),
 						root:     acc.Root,
 						genBatch: batch,
-						genTrie:  trie.NewStackTrie(options),
+						genTrie:  tr,
 					})
 					for r.Next() {
 						batch := ethdb.HookedBatch{
@@ -2182,27 +2139,19 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 								s.storageBytes += common.StorageSize(len(key) + len(value))
 							},
 						}
-						options := trie.NewStackTrieOptions()
-						options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-							rawdb.WriteTrieNode(batch, owner, path, hash, blob, s.scheme)
-						})
+						var tr genTrie
+						if s.scheme == rawdb.HashScheme {
+							tr = newHashTrie(batch)
+						}
 						if s.scheme == rawdb.PathScheme {
-							// Configure the dangling node cleaner and also filter out boundary nodes
-							// only in the context of the path scheme. Deletion is forbidden in the
-							// hash scheme, as it can disrupt state completeness.
-							options = options.WithCleaner(func(path []byte) {
-								s.cleanPath(batch, owner, path)
-							})
-							// Skip the left boundary as it's not the first range
-							// Skip the right boundary if it's not the last range.
-							options = options.WithSkipBoundary(true, r.End() != common.MaxHash, boundaryStorageNodesGauge)
+							tr = newPathTrie(account, true, s.db, batch)
 						}
 						tasks = append(tasks, &storageTask{
 							Next:     r.Start(),
 							Last:     r.End(),
 							root:     acc.Root,
 							genBatch: batch,
-							genTrie:  trie.NewStackTrie(options),
+							genTrie:  tr,
 						})
 					}
 					for _, task := range tasks {
@@ -2248,26 +2197,18 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 
 		if i < len(res.hashes)-1 || res.subTask == nil {
 			// no need to make local reassignment of account: this closure does not outlive the loop
-			options := trie.NewStackTrieOptions()
-			options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-				rawdb.WriteTrieNode(batch, account, path, hash, blob, s.scheme)
-			})
+			var tr genTrie
+			if s.scheme == rawdb.HashScheme {
+				tr = newHashTrie(batch)
+			}
 			if s.scheme == rawdb.PathScheme {
-				// Configure the dangling node cleaner only in the context of the
-				// path scheme. Deletion is forbidden in the hash scheme, as it can
-				// disrupt state completeness.
-				//
-				// Notably, boundary nodes can be also kept because the whole storage
-				// trie is complete.
-				options = options.WithCleaner(func(path []byte) {
-					s.cleanPath(batch, account, path)
-				})
+				// Keep the left boundary as it's complete
+				tr = newPathTrie(account, false, s.db, batch)
 			}
-			tr := trie.NewStackTrie(options)
 			for j := 0; j < len(res.hashes[i]); j++ {
-				tr.Update(res.hashes[i][j][:], res.slots[i][j])
+				tr.update(res.hashes[i][j][:], res.slots[i][j])
 			}
-			tr.Commit()
+			tr.commit(true)
 		}
 		// Persist the received storage segments. These flat state maybe
 		// outdated during the sync, but it can be fixed later during the
@@ -2278,14 +2219,14 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 			// If we're storing large contracts, generate the trie nodes
 			// on the fly to not trash the gluing points
 			if i == len(res.hashes)-1 && res.subTask != nil {
-				res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j])
+				res.subTask.genTrie.update(res.hashes[i][j][:], res.slots[i][j])
 			}
 		}
 	}
 	// Large contracts could have generated new trie nodes, flush them to disk
 	if res.subTask != nil {
 		if res.subTask.done {
-			root := res.subTask.genTrie.Commit()
+			root := res.subTask.genTrie.commit(res.subTask.Last == common.MaxHash)
 			if err := res.subTask.genBatch.Write(); err != nil {
 				log.Error("Failed to persist stack slots", "err", err)
 			}
@@ -2302,8 +2243,8 @@ func (s *Syncer) processStorageResponse(res *storageResponse) {
 					}
 				}
 			}
-		}
-		if res.subTask.genBatch.ValueSize() > ethdb.IdealBatchSize {
+		} else if res.subTask.genBatch.ValueSize() > batchSizeThreshold {
+			res.subTask.genTrie.commit(false)
 			if err := res.subTask.genBatch.Write(); err != nil {
 				log.Error("Failed to persist stack slots", "err", err)
 			}
@@ -2486,7 +2427,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) {
 			if err != nil {
 				panic(err) // Really shouldn't ever happen
 			}
-			task.genTrie.Update(hash[:], full)
+			task.genTrie.update(hash[:], full)
 		}
 	}
 	// Flush anything written just now and update the stats
@@ -2519,9 +2460,13 @@ func (s *Syncer) forwardAccountTask(task *accountTask) {
 	// flush after finalizing task.done. It's fine even if we crash and lose this
 	// write as it will only cause more data to be downloaded during heal.
 	if task.done {
-		task.genTrie.Commit()
-	}
-	if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done {
+		task.genTrie.commit(task.Last == common.MaxHash)
+		if err := task.genBatch.Write(); err != nil {
+			log.Error("Failed to persist stack account", "err", err)
+		}
+		task.genBatch.Reset()
+	} else if task.genBatch.ValueSize() > batchSizeThreshold {
+		task.genTrie.commit(false)
 		if err := task.genBatch.Write(); err != nil {
 			log.Error("Failed to persist stack account", "err", err)
 		}
diff --git a/trie/stacktrie.go b/trie/stacktrie.go
index f2f5355c49e8..32784a6c53bc 100644
--- a/trie/stacktrie.go
+++ b/trie/stacktrie.go
@@ -23,8 +23,6 @@ import (
 
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/types"
-	"github.com/ethereum/go-ethereum/log"
-	"github.com/ethereum/go-ethereum/metrics"
 )
 
 var (
@@ -32,62 +30,31 @@ var (
 	_      = types.TrieHasher((*StackTrie)(nil))
 )
 
-// StackTrieOptions contains the configured options for manipulating the stackTrie.
-type StackTrieOptions struct {
-	Writer  func(path []byte, hash common.Hash, blob []byte) // The function to commit the dirty nodes
-	Cleaner func(path []byte)                                // The function to clean up dangling nodes
-
-	SkipLeftBoundary  bool          // Flag whether the nodes on the left boundary are skipped for committing
-	SkipRightBoundary bool          // Flag whether the nodes on the right boundary are skipped for committing
-	boundaryGauge     metrics.Gauge // Gauge to track how many boundary nodes are met
-}
-
-// NewStackTrieOptions initializes an empty options for stackTrie.
-func NewStackTrieOptions() *StackTrieOptions { return &StackTrieOptions{} }
-
-// WithWriter configures trie node writer within the options.
-func (o *StackTrieOptions) WithWriter(writer func(path []byte, hash common.Hash, blob []byte)) *StackTrieOptions {
-	o.Writer = writer
-	return o
-}
-
-// WithCleaner configures the cleaner in the option for removing dangling nodes.
-func (o *StackTrieOptions) WithCleaner(cleaner func(path []byte)) *StackTrieOptions {
-	o.Cleaner = cleaner
-	return o
-}
-
-// WithSkipBoundary configures whether the left and right boundary nodes are
-// filtered for committing, along with a gauge metrics to track how many
-// boundary nodes are met.
-func (o *StackTrieOptions) WithSkipBoundary(skipLeft, skipRight bool, gauge metrics.Gauge) *StackTrieOptions {
-	o.SkipLeftBoundary = skipLeft
-	o.SkipRightBoundary = skipRight
-	o.boundaryGauge = gauge
-	return o
-}
+// OnTrieNode is a callback method to invoke when a trie node is produced
+// by the stack trie.
+//
+// The caller should not modify the contents of the returned path and blob
+// slice, and their contents may change after the call. Please deep-copy
+// the slices if necessary.
+type OnTrieNode func(path []byte, hash common.Hash, blob []byte)
 
 // StackTrie is a trie implementation that expects keys to be inserted
 // in order. Once it determines that a subtree will no longer be inserted
 // into, it will hash it and free up the memory it uses.
 type StackTrie struct {
-	options *StackTrieOptions
-	root    *stNode
-	h       *hasher
-
-	first []byte // The (hex-encoded without terminator) key of first inserted entry, tracked as left boundary.
-	last  []byte // The (hex-encoded without terminator) key of last inserted entry, tracked as right boundary.
+	root       *stNode
+	h          *hasher
+	last       []byte
+	onTrieNode OnTrieNode
 }
 
-// NewStackTrie allocates and initializes an empty trie.
-func NewStackTrie(options *StackTrieOptions) *StackTrie {
-	if options == nil {
-		options = NewStackTrieOptions()
-	}
+// NewStackTrie allocates and initializes an empty trie. The produced nodes will
+// be discarded immediately if no callback is provided.
+func NewStackTrie(onTrieNode OnTrieNode) *StackTrie {
 	return &StackTrie{
-		options: options,
-		root:    stPool.Get().(*stNode),
-		h:       newHasher(false),
+		root:       stPool.Get().(*stNode),
+		h:          newHasher(false),
+		onTrieNode: onTrieNode,
 	}
 }
 
@@ -101,10 +68,6 @@ func (t *StackTrie) Update(key, value []byte) error {
 	if bytes.Compare(t.last, k) >= 0 {
 		return errors.New("non-ascending key order")
 	}
-	// track the first and last inserted entries.
-	if t.first == nil {
-		t.first = append([]byte{}, k...)
-	}
 	if t.last == nil {
 		t.last = append([]byte{}, k...) // allocate key slice
 	} else {
@@ -114,19 +77,9 @@ func (t *StackTrie) Update(key, value []byte) error {
 	return nil
 }
 
-// MustUpdate is a wrapper of Update and will omit any encountered error but
-// just print out an error message.
-func (t *StackTrie) MustUpdate(key, value []byte) {
-	if err := t.Update(key, value); err != nil {
-		log.Error("Unhandled trie error in StackTrie.Update", "err", err)
-	}
-}
-
 // Reset resets the stack trie object to empty state.
 func (t *StackTrie) Reset() {
-	t.options = NewStackTrieOptions()
 	t.root = stPool.Get().(*stNode)
-	t.first = nil
 	t.last = nil
 }
 
@@ -346,10 +299,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
 //
 // This method also sets 'st.type' to hashedNode, and clears 'st.key'.
 func (t *StackTrie) hash(st *stNode, path []byte) {
-	var (
-		blob     []byte   // RLP-encoded node blob
-		internal [][]byte // List of node paths covered by the extension node
-	)
+	var blob []byte // RLP-encoded node blob
 	switch st.typ {
 	case hashedNode:
 		return
@@ -384,15 +334,6 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 		// recursively hash and commit child as the first step
 		t.hash(st.children[0], append(path, st.key...))
 
-		// Collect the path of internal nodes between shortNode and its **in disk**
-		// child. This is essential in the case of path mode scheme to avoid leaving
-		// danging nodes within the range of this internal path on disk, which would
-		// break the guarantee for state healing.
-		if len(st.children[0].val) >= 32 && t.options.Cleaner != nil {
-			for i := 1; i < len(st.key); i++ {
-				internal = append(internal, append(path, st.key[:i]...))
-			}
-		}
 		// encode the extension node
 		n := shortNode{Key: hexToCompactInPlace(st.key)}
 		if len(st.children[0].val) < 32 {
@@ -416,11 +357,12 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 	default:
 		panic("invalid node type")
 	}
-
+	// Convert the node type to hashNode and reset the key slice.
 	st.typ = hashedNode
 	st.key = st.key[:0]
 
-	// Skip committing the non-root node if the size is smaller than 32 bytes.
+	// Skip committing the non-root node if the size is smaller than 32 bytes
+	// as tiny nodes are always embedded in their parent except root node.
 	if len(blob) < 32 && len(path) > 0 {
 		st.val = common.CopyBytes(blob)
 		return
@@ -429,51 +371,20 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 	// input values.
 	st.val = t.h.hashData(blob)
 
-	// Short circuit if the stack trie is not configured for writing.
-	if t.options.Writer == nil {
-		return
+	// Invoke the callback it's provided. Notably, the path and blob slices are
+	// volatile, please deep-copy the slices in callback if the contents need
+	// to be saved.
+	if t.onTrieNode != nil {
+		t.onTrieNode(path, common.BytesToHash(st.val), blob)
 	}
-	// Skip committing if the node is on the left boundary and stackTrie is
-	// configured to filter the boundary.
-	if t.options.SkipLeftBoundary && bytes.HasPrefix(t.first, path) {
-		if t.options.boundaryGauge != nil {
-			t.options.boundaryGauge.Inc(1)
-		}
-		return
-	}
-	// Skip committing if the node is on the right boundary and stackTrie is
-	// configured to filter the boundary.
-	if t.options.SkipRightBoundary && bytes.HasPrefix(t.last, path) {
-		if t.options.boundaryGauge != nil {
-			t.options.boundaryGauge.Inc(1)
-		}
-		return
-	}
-	// Clean up the internal dangling nodes covered by the extension node.
-	// This should be done before writing the node to adhere to the committing
-	// order from bottom to top.
-	for _, path := range internal {
-		t.options.Cleaner(path)
-	}
-	t.options.Writer(path, common.BytesToHash(st.val), blob)
 }
 
 // Hash will firstly hash the entire trie if it's still not hashed and then commit
-// all nodes to the associated database. Actually most of the trie nodes have been
-// committed already. The main purpose here is to commit the nodes on right boundary.
-//
-// For stack trie, Hash and Commit are functionally identical.
+// all leftover nodes to the associated database. Actually most of the trie nodes
+// have been committed already. The main purpose here is to commit the nodes on
+// right boundary.
 func (t *StackTrie) Hash() common.Hash {
 	n := t.root
 	t.hash(n, nil)
 	return common.BytesToHash(n.val)
 }
-
-// Commit will firstly hash the entire trie if it's still not hashed and then commit
-// all nodes to the associated database. Actually most of the trie nodes have been
-// committed already. The main purpose here is to commit the nodes on right boundary.
-//
-// For stack trie, Hash and Commit are functionally identical.
-func (t *StackTrie) Commit() common.Hash {
-	return t.Hash()
-}
diff --git a/trie/stacktrie_fuzzer_test.go b/trie/stacktrie_fuzzer_test.go
index 57a31d115f5a..5126e0bd07ce 100644
--- a/trie/stacktrie_fuzzer_test.go
+++ b/trie/stacktrie_fuzzer_test.go
@@ -46,11 +46,9 @@ func fuzz(data []byte, debugging bool) {
 		trieA   = NewEmpty(dbA)
 		spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
 		dbB     = newTestDatabase(rawdb.NewDatabase(spongeB), rawdb.HashScheme)
-
-		options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+		trieB   = NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
 			rawdb.WriteTrieNode(spongeB, common.Hash{}, path, hash, blob, dbB.Scheme())
 		})
-		trieB       = NewStackTrie(options)
 		vals        []*kv
 		maxElements = 10000
 		// operate on unique keys only
@@ -99,10 +97,9 @@ func fuzz(data []byte, debugging bool) {
 		if debugging {
 			fmt.Printf("{\"%#x\" , \"%#x\"} // stacktrie.Update\n", kv.k, kv.v)
 		}
-		trieB.MustUpdate(kv.k, kv.v)
+		trieB.Update(kv.k, kv.v)
 	}
 	rootB := trieB.Hash()
-	trieB.Commit()
 	if rootA != rootB {
 		panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB))
 	}
@@ -114,20 +111,19 @@ func fuzz(data []byte, debugging bool) {
 
 	// Ensure all the nodes are persisted correctly
 	var (
-		nodeset  = make(map[string][]byte) // path -> blob
-		optionsC = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+		nodeset = make(map[string][]byte) // path -> blob
+		trieC   = NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
 			if crypto.Keccak256Hash(blob) != hash {
 				panic("invalid node blob")
 			}
 			nodeset[string(path)] = common.CopyBytes(blob)
 		})
-		trieC   = NewStackTrie(optionsC)
 		checked int
 	)
 	for _, kv := range vals {
-		trieC.MustUpdate(kv.k, kv.v)
+		trieC.Update(kv.k, kv.v)
 	}
-	rootC := trieC.Commit()
+	rootC := trieC.Hash()
 	if rootA != rootC {
 		panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootC))
 	}
diff --git a/trie/stacktrie_test.go b/trie/stacktrie_test.go
index 58115bc33a40..f053b5112d3f 100644
--- a/trie/stacktrie_test.go
+++ b/trie/stacktrie_test.go
@@ -19,14 +19,11 @@ package trie
 import (
 	"bytes"
 	"math/big"
-	"math/rand"
-	"slices"
 	"testing"
 
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core/rawdb"
 	"github.com/ethereum/go-ethereum/crypto"
-	"github.com/ethereum/go-ethereum/internal/testrand"
 	"github.com/stretchr/testify/assert"
 )
 
@@ -381,90 +378,6 @@ func TestStacktrieNotModifyValues(t *testing.T) {
 	}
 }
 
-func buildPartialTree(entries []*kv, t *testing.T) map[string]common.Hash {
-	var (
-		options = NewStackTrieOptions()
-		nodes   = make(map[string]common.Hash)
-	)
-	var (
-		first int
-		last  = len(entries) - 1
-
-		noLeft  bool
-		noRight bool
-	)
-	// Enter split mode if there are at least two elements
-	if rand.Intn(5) != 0 {
-		for {
-			first = rand.Intn(len(entries))
-			last = rand.Intn(len(entries))
-			if first <= last {
-				break
-			}
-		}
-		if first != 0 {
-			noLeft = true
-		}
-		if last != len(entries)-1 {
-			noRight = true
-		}
-	}
-	options = options.WithSkipBoundary(noLeft, noRight, nil)
-	options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-		nodes[string(path)] = hash
-	})
-	tr := NewStackTrie(options)
-
-	for i := first; i <= last; i++ {
-		tr.MustUpdate(entries[i].k, entries[i].v)
-	}
-	tr.Commit()
-	return nodes
-}
-
-func TestPartialStackTrie(t *testing.T) {
-	for round := 0; round < 100; round++ {
-		var (
-			n       = rand.Intn(100) + 1
-			entries []*kv
-		)
-		for i := 0; i < n; i++ {
-			var val []byte
-			if rand.Intn(3) == 0 {
-				val = testrand.Bytes(3)
-			} else {
-				val = testrand.Bytes(32)
-			}
-			entries = append(entries, &kv{
-				k: testrand.Bytes(32),
-				v: val,
-			})
-		}
-		slices.SortFunc(entries, (*kv).cmp)
-
-		var (
-			nodes   = make(map[string]common.Hash)
-			options = NewStackTrieOptions().WithWriter(func(path []byte, hash common.Hash, blob []byte) {
-				nodes[string(path)] = hash
-			})
-		)
-		tr := NewStackTrie(options)
-
-		for i := 0; i < len(entries); i++ {
-			tr.MustUpdate(entries[i].k, entries[i].v)
-		}
-		tr.Commit()
-
-		for j := 0; j < 100; j++ {
-			for path, hash := range buildPartialTree(entries, t) {
-				if nodes[path] != hash {
-					t.Errorf("%v, want %x, got %x", []byte(path), nodes[path], hash)
-				}
-			}
-		}
-	}
-}
-
 func TestStackTrieErrors(t *testing.T) {
 	s := NewStackTrie(nil)
 	// Deletion
diff --git a/trie/trie_test.go b/trie/trie_test.go
index 87a0785cfb04..6ecd20c21894 100644
--- a/trie/trie_test.go
+++ b/trie/trie_test.go
@@ -963,11 +963,9 @@ func TestCommitSequenceStackTrie(t *testing.T) {
 			id:     "b",
 			values: make(map[string]string),
 		}
-		options := NewStackTrieOptions()
-		options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+		stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
 			rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme())
 		})
-		stTrie := NewStackTrie(options)
 
 		// Fill the trie with elements
 		for i := 0; i < count; i++ {
@@ -993,7 +991,7 @@ func TestCommitSequenceStackTrie(t *testing.T) {
 		s.Flush()
 
 		// And flush stacktrie -> disk
-		stRoot := stTrie.Commit()
+		stRoot := stTrie.Hash()
 		if stRoot != root {
 			t.Fatalf("root wrong, got %x exp %x", stRoot, root)
 		}
@@ -1034,12 +1032,9 @@ func TestCommitSequenceSmallRoot(t *testing.T) {
 		id:     "b",
 		values: make(map[string]string),
 	}
-	options := NewStackTrieOptions()
-	options = options.WithWriter(func(path []byte, hash common.Hash, blob []byte) {
+	stTrie := NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
 		rawdb.WriteTrieNode(stackTrieSponge, common.Hash{}, path, hash, blob, db.Scheme())
 	})
-	stTrie := NewStackTrie(options)
-
 	// Add a single small-element to the trie(s)
 	key := make([]byte, 5)
 	key[0] = 1
@@ -1053,7 +1048,7 @@ func TestCommitSequenceSmallRoot(t *testing.T) {
 	db.Commit(root)
 
 	// And flush stacktrie -> disk
-	stRoot := stTrie.Commit()
+	stRoot := stTrie.Hash()
 	if stRoot != root {
 		t.Fatalf("root wrong, got %x exp %x", stRoot, root)
 	}

From 0863aea71c346b35f024de89a34ca99565da11a3 Mon Sep 17 00:00:00 2001
From: Gary Rong <garyrong0905@gmail.com>
Date: Mon, 25 Mar 2024 15:03:48 +0800
Subject: [PATCH 2/7] eth/protocols/snap: add test

---
 eth/protocols/snap/gentrie_test.go | 74 ++++++++++++++++++++++++++++++
 eth/protocols/snap/sync.go         |  5 ++
 2 files changed, 79 insertions(+)

diff --git a/eth/protocols/snap/gentrie_test.go b/eth/protocols/snap/gentrie_test.go
index b3a97e2d01e0..10a006f053f8 100644
--- a/eth/protocols/snap/gentrie_test.go
+++ b/eth/protocols/snap/gentrie_test.go
@@ -339,3 +339,77 @@ func TestFlushPartialTree(t *testing.T) {
 		}
 	}
 }
+
+// TestBoundSplit ensures two consecutive trie chunks are not overlapped with
+// each other.
+func TestBoundSplit(t *testing.T) {
+	var entries []*kv
+	for i := 0; i < 1024; i++ {
+		var val []byte
+		if rand.Intn(3) == 0 {
+			val = testrand.Bytes(3)
+		} else {
+			val = testrand.Bytes(32)
+		}
+		entries = append(entries, &kv{
+			k: testrand.Bytes(32),
+			v: val,
+		})
+	}
+	slices.SortFunc(entries, (*kv).cmp)
+
+	for j := 0; j < 100; j++ {
+		var (
+			next int
+			last int
+			db   = rawdb.NewMemoryDatabase()
+
+			lastRightRoot []byte
+		)
+		for {
+			if next == len(entries) {
+				break
+			}
+			last = rand.Intn(len(entries)-next) + next
+
+			r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last)
+			updates := r.updates()
+
+			// Skip if the chunk is zero-size
+			if len(updates) == 0 {
+				next = last + 1
+				continue
+			}
+
+			// Ensure the updates in two consecutive chunks are not overlapped.
+			// The only overlapping part should be deletion.
+			if lastRightRoot != nil && len(updates) > 0 {
+				// Derive the path of left-most node in this chunk
+				var leftRoot []byte
+				for path, hash := range r.updates() {
+					if hash == (common.Hash{}) {
+						t.Fatalf("Unexpected deletion %v", []byte(path))
+					}
+					if leftRoot == nil || bytes.Compare(leftRoot, []byte(path)) > 0 {
+						leftRoot = []byte(path)
+					}
+				}
+				if bytes.HasPrefix(lastRightRoot, leftRoot) || bytes.HasPrefix(leftRoot, lastRightRoot) {
+					t.Fatalf("Two chunks are not correctly separated, lastRight: %v, left: %v", lastRightRoot, leftRoot)
+				}
+			}
+
+			// Track the updates as the last chunk
+			var rightRoot []byte
+			for path := range updates {
+				if rightRoot == nil ||
+					(bytes.Compare(rightRoot, []byte(path)) < 0) ||
+					(bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) {
+					rightRoot = []byte(path)
+				}
+			}
+			lastRightRoot = rightRoot
+			next = last + 1
+		}
+	}
+}
diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go
index 761e28ce9766..b0ddb8e403f7 100644
--- a/eth/protocols/snap/sync.go
+++ b/eth/protocols/snap/sync.go
@@ -880,12 +880,17 @@ func (s *Syncer) loadSyncStatus() {
 func (s *Syncer) saveSyncStatus() {
 	// Serialize any partial progress to disk before spinning down
 	for _, task := range s.tasks {
+		// Claim the right boundary as incomplete before flushing the
+		// accumulated nodes in batch, the nodes on right boundary
+		// will be discarded and cleaned up by this call.
 		task.genTrie.commit(false)
 		if err := task.genBatch.Write(); err != nil {
 			log.Error("Failed to persist account slots", "err", err)
 		}
 		for _, subtasks := range task.SubTasks {
 			for _, subtask := range subtasks {
+				// Same for account trie, discard and cleanup the
+				// incomplete right boundary.
 				subtask.genTrie.commit(false)
 				if err := subtask.genBatch.Write(); err != nil {
 					log.Error("Failed to persist storage slots", "err", err)

From 0befeaa302d42d15a82ca3259fc11e8127334e45 Mon Sep 17 00:00:00 2001
From: Gary Rong <garyrong0905@gmail.com>
Date: Tue, 9 Apr 2024 15:36:07 +0800
Subject: [PATCH 3/7] eth/protocols/snap, trie: improve comments

---
 eth/protocols/snap/gentrie.go | 60 ++++++++++++++++++++---------------
 trie/stacktrie.go             |  5 +--
 2 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/eth/protocols/snap/gentrie.go b/eth/protocols/snap/gentrie.go
index d452cbc8b24f..5d58f20897ac 100644
--- a/eth/protocols/snap/gentrie.go
+++ b/eth/protocols/snap/gentrie.go
@@ -53,18 +53,18 @@ type pathTrie struct {
 	// Flag whether the nodes on the left boundary are skipped for committing.
 	// If it's set, then nodes on the left boundary are regarded as incomplete
 	// due to potentially missing left children.
-	noLeftBound bool
-	db          ethdb.KeyValueReader
-	batch       ethdb.Batch
+	skipLeftBoundary bool
+	db               ethdb.KeyValueReader
+	batch            ethdb.Batch
 }
 
 // newPathTrie initializes the path trie.
-func newPathTrie(owner common.Hash, noLeftBound bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie {
+func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie {
 	tr := &pathTrie{
-		owner:       owner,
-		noLeftBound: noLeftBound,
-		db:          db,
-		batch:       batch,
+		owner:            owner,
+		skipLeftBoundary: skipLeftBoundary,
+		db:               db,
+		batch:            batch,
 	}
 	tr.tr = trie.NewStackTrie(tr.onTrieNode)
 	return tr
@@ -87,30 +87,34 @@ func newPathTrie(owner common.Hash, noLeftBound bool, db ethdb.KeyValueReader, b
 // to the root node should be removed as well; otherwise, they might potentially
 // disrupt the state healing process, leaving behind an inconsistent state.
 func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
-	// Filter out the nodes on the left boundary if noLeftBound is configured.
+	// Filter out the nodes on the left boundary if skipLeftBoundary is configured.
 	// Nodes are considered to be on the left boundary if it's the first one
 	// produced, or on the path of the first produced one.
-	if t.noLeftBound && (t.first == nil || bytes.HasPrefix(t.first, path)) {
+	if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) {
 		if t.first == nil {
 			// Memorize the path of first produced node, which is regarded
 			// as left boundary. Deep-copy is necessary as the path given
 			// is volatile.
 			t.first = append([]byte{}, path...)
 
-			// The position of first complete sub trie (e.g. N_3) can be determined
+			// The position of first complete sub trie (e.g. N_4) can be determined
 			// by the first produced node(e.g. N_1) correctly, with a branch node
-			// (e.g. N_2) as the common parent for shared path prefix. Therefore,
+			// (e.g. N_5) as the common parent for shared path prefix. Therefore,
 			// the nodes along the path from root to N_1 can be regarded as left
-			// boundary. The leftover dangling nodes on left boundary should be
-			// cleaned out first before committing any node.
+			// boundary and the parent of the first complete sub trie. The leftover
+			// dangling nodes on left boundary should be cleaned out first before
+			// committing any node.
 			//
-			//                           +-----+
-			//                           | N_2 |  parent for shared path prefix
-			//                           +-----+
-			//                           /-  -\
+			//                           +-------------+
+			//                           |     N_5     |  parent for shared path prefix
+			//                           +-------------+
+			//                           /-     |     -\
+			//                          /       |      [ others ]
 			//                     +-----+   +-----+
-			// First produced one  | N_1 |   | N_3 | First completed sub trie
+			// First produced one  | N_1 |   | N_4 | First completed sub trie
 			//                     +-----+   +-----+
+			//                                /-  -\
+			//                              N-2 ... N-3
 			//
 			// Nodes must be cleaned from top to bottom as it's possible the procedure
 			// is interrupted in the middle.
@@ -212,6 +216,10 @@ func (t *pathTrie) commit(complete bool) common.Hash {
 	// The nodes on both left and right boundary will still be filtered
 	// out if left boundary filtering is configured.
 	if complete {
+		// The produced hash is meaningless if left side is incomplete
+		if t.skipLeftBoundary {
+			return common.Hash{}
+		}
 		return t.tr.Hash()
 	}
 	// If the right boundary is claimed as incomplete, the uncommitted
@@ -221,18 +229,20 @@ func (t *pathTrie) commit(complete bool) common.Hash {
 	// the nodes of the right boundary must be cleaned out!
 	//
 	// The position of the last complete sub-trie (e.g., N_1) can be correctly
-	// determined by the last produced node (e.g., N_3), with a branch node
-	// (e.g., N_2) as the common parent for the shared path prefix. Therefore,
-	// the nodes along the path from the root to N_3 can be regarded as the
-	// right boundary.
+	// determined by the last produced node (e.g., N_4), with a branch node
+	// (e.g., N_5) as the common parent for the shared path prefix. Therefore,
+	// the nodes along the path from the root to N_4 can be regarded as the
+	// right boundary and the parent of the last complete subtrie.
 	//
 	//                             +-----+
-	//                             | N_2 |  parent for shared path prefix
+	//                             | N_5 |  parent for shared path prefix
 	//                             +-----+
 	//                             /-  -\
 	//                        +-----+   +-----+
-	// Last complete subtrie  | N_1 |   | N_3 | Last produced node
+	// Last complete subtrie  | N_1 |   | N_4 | Last produced node
 	//                        +-----+   +-----+
+	//                        /-  -\
+	//                      N-2 .. N-3
 	//
 	// Another interesting scenario occurs when the trie is committed due to
 	// too many items being accumulated in the batch. To flush them out to
diff --git a/trie/stacktrie.go b/trie/stacktrie.go
index 32784a6c53bc..7bf7519cbc1f 100644
--- a/trie/stacktrie.go
+++ b/trie/stacktrie.go
@@ -34,8 +34,9 @@ var (
 // by the stack trie.
 //
 // The caller should not modify the contents of the returned path and blob
-// slice, and their contents may change after the call. Please deep-copy
-// the slices if necessary.
+// slice, and their contents may change after the call. It is up to the
+// `onTrieNode` receiver function to deep-copy the data if it wants to
+// retain it after the call ends.
 type OnTrieNode func(path []byte, hash common.Hash, blob []byte)
 
 // StackTrie is a trie implementation that expects keys to be inserted

From ebd5fc7070b5a4fc9671a2ff0fcdc6be42245325 Mon Sep 17 00:00:00 2001
From: Gary Rong <garyrong0905@gmail.com>
Date: Tue, 9 Apr 2024 15:55:27 +0800
Subject: [PATCH 4/7] eth/protocols/snap: improve code comment

---
 eth/protocols/snap/gentrie.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/eth/protocols/snap/gentrie.go b/eth/protocols/snap/gentrie.go
index 5d58f20897ac..9ba28b56bded 100644
--- a/eth/protocols/snap/gentrie.go
+++ b/eth/protocols/snap/gentrie.go
@@ -138,7 +138,9 @@ func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
 	// break the state healing.
 	//
 	// The target node is detected if its path is the prefix of last written
-	// one and path gap is non-zero.
+	// one and path gap is larger than one. The current node could be a full
+	// node, or a shortNode with single byte key if the path gap is one,
+	// unnecessary to sweep the internal path in this case.
 	//
 	// Nodes must be cleaned from top to bottom, including the node with the
 	// path of the committed extension node itself.

From 490a66418c184a64d48da89f45fc2b0a4fac5161 Mon Sep 17 00:00:00 2001
From: Gary Rong <garyrong0905@gmail.com>
Date: Wed, 10 Apr 2024 14:59:15 +0800
Subject: [PATCH 5/7] eth, trie: improve code comment

---
 eth/protocols/snap/gentrie.go | 236 ++++++++++++++++------------------
 trie/stacktrie.go             |  16 +--
 2 files changed, 122 insertions(+), 130 deletions(-)

diff --git a/eth/protocols/snap/gentrie.go b/eth/protocols/snap/gentrie.go
index 9ba28b56bded..135ce0107605 100644
--- a/eth/protocols/snap/gentrie.go
+++ b/eth/protocols/snap/gentrie.go
@@ -25,18 +25,14 @@ import (
 	"github.com/ethereum/go-ethereum/trie"
 )
 
-// genTrie interface is used by the trie to generate merkle tree nodes based
-// on a received batch of states.
+// genTrie interface is used by the snap syncer to generate merkle tree nodes
+// based on a received batch of states.
 type genTrie interface {
 	// update inserts the state into generator trie.
 	update(key, value []byte) error
 
-	// commit flushes the leftover nodes produced in the trie into database.
-	// The nodes on right boundary won't be committed unless this function
-	// is called. The flag complete should be set to true if there are more
-	// items on the right side.
-	//
-	// This function must be called before flushing database batch.
+	// commit flushes the right boundary nodes if complete flag is true. This
+	// function must be called before flushing the associated database batch.
 	commit(complete bool) common.Hash
 }
 
@@ -47,11 +43,11 @@ type genTrie interface {
 type pathTrie struct {
 	owner common.Hash     // identifier of trie owner, empty for account trie
 	tr    *trie.StackTrie // underlying raw stack trie
-	first []byte          // the path of first written node
-	last  []byte          // the path of last written node
+	first []byte          // the path of first committed node by stackTrie
+	last  []byte          // the path of last committed node by stackTrie
 
-	// Flag whether the nodes on the left boundary are skipped for committing.
-	// If it's set, then nodes on the left boundary are regarded as incomplete
+	// This flag indicates whether nodes on the left boundary are skipped for
+	// committing. If set, the left boundary nodes are considered incomplete
 	// due to potentially missing left children.
 	skipLeftBoundary bool
 	db               ethdb.KeyValueReader
@@ -70,58 +66,52 @@ func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueRead
 	return tr
 }
 
-// onTrieNode is invoked whenever a new node is produced by the stackTrie.
+// onTrieNode is invoked whenever a new node is committed by the stackTrie.
 //
-// As the produced nodes might be incomplete if they are on the boundaries
+// As the committed nodes might be incomplete if they are on the boundaries
 // (left or right), this function has the ability to detect the incomplete
-// ones and filter them out for committing. Namely, only the nodes belonging
-// to completed subtries will be committed.
+// ones and filter them out for committing.
 //
 // Additionally, the assumption is made that there may exist leftover dangling
-// nodes in the database. This function has the ability to detect all the
-// dangling nodes that fall within the committed subtries (on the path covered
-// by internal extension nodes) and remove them from the database. This property
-// ensures that the entire path space is uniquely occupied by committed subtries.
+// nodes in the database. This function has the ability to detect the dangling
+// nodes that fall within the path space of committed nodes (specifically on
+// the path covered by internal extension nodes) and remove them from the
+// database. This property ensures that the entire path space is uniquely
+// occupied by committed nodes.
 //
-// Furthermore, all leftover dangling nodes along the path from committed tries
-// to the root node should be removed as well; otherwise, they might potentially
-// disrupt the state healing process, leaving behind an inconsistent state.
+// Furthermore, all leftover dangling nodes along the path from committed nodes
+// to the trie root (left and right boundaries) should be removed as well;
+// otherwise, they might potentially disrupt the state healing process.
 func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
-	// Filter out the nodes on the left boundary if skipLeftBoundary is configured.
-	// Nodes are considered to be on the left boundary if it's the first one
-	// produced, or on the path of the first produced one.
+	// Filter out the nodes on the left boundary if skipLeftBoundary is
+	// configured. Nodes are considered to be on the left boundary if
+	// it's the first one to be committed, or the parent/ancestor of the
+	// first committed node.
 	if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) {
 		if t.first == nil {
-			// Memorize the path of first produced node, which is regarded
+			// Memorize the path of first committed node, which is regarded
 			// as left boundary. Deep-copy is necessary as the path given
 			// is volatile.
 			t.first = append([]byte{}, path...)
 
-			// The position of first complete sub trie (e.g. N_4) can be determined
-			// by the first produced node(e.g. N_1) correctly, with a branch node
-			// (e.g. N_5) as the common parent for shared path prefix. Therefore,
-			// the nodes along the path from root to N_1 can be regarded as left
-			// boundary and the parent of the first complete sub trie. The leftover
-			// dangling nodes on left boundary should be cleaned out first before
-			// committing any node.
-			//
-			//                           +-------------+
-			//                           |     N_5     |  parent for shared path prefix
-			//                           +-------------+
-			//                           /-     |     -\
-			//                          /       |      [ others ]
-			//                     +-----+   +-----+
-			// First produced one  | N_1 |   | N_4 | First completed sub trie
-			//                     +-----+   +-----+
-			//                                /-  -\
-			//                              N-2 ... N-3
+			// The left boundary can be uniquely determined by the first committed node
+			// from stackTrie (e.g., N_1), as the shared path prefix between the first
+			// two inserted state items is deterministic (the path of N_3). The path
+			// from trie root towards the first committed node is considered the left
+			// boundary. The potential leftover dangling nodes on left boundary should
+			// be cleaned out.
 			//
-			// Nodes must be cleaned from top to bottom as it's possible the procedure
-			// is interrupted in the middle.
+			//                            +-----+
+			//                            | N_3 | shared path prefix of state_1 and state_2
+			//                            +-----+
+			//                            /-   -\
+			//                       +-----+   +-----+
+			// First committed node  | N_1 |   | N_2 | latest inserted node (contain state_2)
+			//                       +-----+   +-----+
 			//
-			// The node with the path of the first produced node is not removed, as
-			// it's a sibling of the first complete sub-trie, not the parent. There
-			// is no reason to remove it.
+			// The node with the path of the first committed one (e.g, N_1) is not
+			// removed because it's a sibling of the complete nodes, not the parent
+			// or ancestor.
 			for i := 0; i < len(path); i++ {
 				t.delete(path[:i], false)
 			}
@@ -131,21 +121,21 @@ func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
 	// If boundary filtering is not configured, or the node is not on the left
 	// boundary, commit it to database.
 	//
-	// Note, the nodes fall within the path between extension node and its
-	// **in disk** child must be cleaned out before committing the extension
-	// node. This is essential in snap sync to avoid leaving dangling nodes
-	// within this range covered by extension node which could potentially
-	// break the state healing.
+	// Note: If the current committed node is an extension node, then the nodes
+	// falling within the path between itself and its standalone (not embedded
+	// in parent) child should be cleaned out for exclusively occupy the inner
+	// path.
 	//
-	// The target node is detected if its path is the prefix of last written
-	// one and path gap is larger than one. The current node could be a full
-	// node, or a shortNode with single byte key if the path gap is one,
-	// unnecessary to sweep the internal path in this case.
+	// This is essential in snap sync to avoid leaving dangling nodes within
+	// this range covered by extension node which could potentially break the
+	// state healing.
 	//
-	// Nodes must be cleaned from top to bottom, including the node with the
-	// path of the committed extension node itself.
+	// The extension node is detected if its path is the prefix of last written
+	// one and path gap is larger than one. If the path gap is only one byte,
+	// the current node could either be a full node, or a extension with single
+	// byte key. In either case, no gaps will be left in the path.
 	if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 {
-		for i := len(path); i < len(t.last); i++ {
+		for i := len(path) + 1; i < len(t.last); i++ {
 			t.delete(t.last[:i], true)
 		}
 	}
@@ -168,37 +158,47 @@ func (t *pathTrie) write(path []byte, blob []byte) {
 	}
 }
 
-// delete commits the node deletion to provided database batch in path mode.
-func (t *pathTrie) delete(path []byte, inner bool) {
-	if t.owner == (common.Hash{}) {
-		if rawdb.ExistsAccountTrieNode(t.db, path) {
-			rawdb.DeleteAccountTrieNode(t.batch, path)
-			if inner {
-				accountInnerDeleteGauge.Inc(1)
-			} else {
-				accountOuterDeleteGauge.Inc(1)
-			}
-		}
-		if inner {
-			accountInnerLookupGauge.Inc(1)
-		} else {
-			accountOuterLookupGauge.Inc(1)
-		}
+func (t *pathTrie) deleteAccountNode(path []byte, inner bool) {
+	if inner {
+		accountInnerLookupGauge.Inc(1)
+	} else {
+		accountOuterLookupGauge.Inc(1)
+	}
+	if !rawdb.ExistsAccountTrieNode(t.db, path) {
 		return
 	}
-	if rawdb.ExistsStorageTrieNode(t.db, t.owner, path) {
-		rawdb.DeleteStorageTrieNode(t.batch, t.owner, path)
-		if inner {
-			storageInnerDeleteGauge.Inc(1)
-		} else {
-			storageOuterDeleteGauge.Inc(1)
-		}
+	if inner {
+		accountInnerDeleteGauge.Inc(1)
+	} else {
+		accountOuterDeleteGauge.Inc(1)
 	}
+	rawdb.DeleteAccountTrieNode(t.batch, path)
+}
+
+func (t *pathTrie) deleteStorageNode(path []byte, inner bool) {
 	if inner {
 		storageInnerLookupGauge.Inc(1)
 	} else {
 		storageOuterLookupGauge.Inc(1)
 	}
+	if !rawdb.ExistsStorageTrieNode(t.db, t.owner, path) {
+		return
+	}
+	if inner {
+		storageInnerDeleteGauge.Inc(1)
+	} else {
+		storageOuterDeleteGauge.Inc(1)
+	}
+	rawdb.DeleteStorageTrieNode(t.batch, t.owner, path)
+}
+
+// delete commits the node deletion to provided database batch in path mode.
+func (t *pathTrie) delete(path []byte, inner bool) {
+	if t.owner == (common.Hash{}) {
+		t.deleteAccountNode(path, inner)
+	} else {
+		t.deleteStorageNode(path, inner)
+	}
 }
 
 // update implements genTrie interface, inserting a (key, value) pair into the
@@ -208,8 +208,8 @@ func (t *pathTrie) update(key, value []byte) error {
 }
 
 // commit implements genTrie interface, flushing the right boundary if it's
-// regarded as complete. Otherwise, the nodes on the right boundary are discarded
-// and cleaned up.
+// considered as complete. Otherwise, the nodes on the right boundary are
+// discarded and cleaned up.
 //
 // Note, this function must be called before flushing database batch, otherwise,
 // dangling nodes might be left in database.
@@ -218,51 +218,43 @@ func (t *pathTrie) commit(complete bool) common.Hash {
 	// The nodes on both left and right boundary will still be filtered
 	// out if left boundary filtering is configured.
 	if complete {
-		// The produced hash is meaningless if left side is incomplete
+		// Commit all inserted but not yet committed nodes(on the right
+		// boundary) in the stackTrie.
+		hash := t.tr.Hash()
 		if t.skipLeftBoundary {
-			return common.Hash{}
+			return common.Hash{} // hash is meaningless if left side is incomplete
 		}
-		return t.tr.Hash()
+		return hash
 	}
-	// If the right boundary is claimed as incomplete, the uncommitted
-	// nodes should be discarded, as they might be incomplete due to
-	// missing children on the right side. Furthermore, previously committed
-	// nodes can be the children of the right boundary nodes; therefore,
-	// the nodes of the right boundary must be cleaned out!
+	// Discard nodes on the right boundary as it's claimed as incomplete. These
+	// nodes might be incomplete due to missing children on the right side.
+	// Furthermore, the potential leftover nodes on right boundary should also
+	// be cleaned out.
 	//
-	// The position of the last complete sub-trie (e.g., N_1) can be correctly
-	// determined by the last produced node (e.g., N_4), with a branch node
-	// (e.g., N_5) as the common parent for the shared path prefix. Therefore,
-	// the nodes along the path from the root to N_4 can be regarded as the
-	// right boundary and the parent of the last complete subtrie.
+	// The right boundary can be uniquely determined by the last committed node
+	// from stackTrie (e.g., N_1), as the shared path prefix between the last
+	// two inserted state items is deterministic (the path of N_3). The path
+	// from trie root towards the last committed node is considered the right
+	// boundary (root to N_3).
 	//
-	//                             +-----+
-	//                             | N_5 |  parent for shared path prefix
-	//                             +-----+
-	//                             /-  -\
-	//                        +-----+   +-----+
-	// Last complete subtrie  | N_1 |   | N_4 | Last produced node
-	//                        +-----+   +-----+
-	//                        /-  -\
-	//                      N-2 .. N-3
+	//                           +-----+
+	//                           | N_3 | shared path prefix of last two states
+	//                           +-----+
+	//                           /-   -\
+	//                      +-----+   +-----+
+	// Last committed node  | N_1 |   | N_2 | latest inserted node  (contain last state)
+	//                      +-----+   +-----+
 	//
 	// Another interesting scenario occurs when the trie is committed due to
 	// too many items being accumulated in the batch. To flush them out to
-	// the database, the path of the last inserted item is temporarily treated
-	// as an incomplete right boundary, and nodes on this path are removed.
-	//
+	// the database, the path of the last inserted node (N_2) is temporarily
+	// treated as an incomplete right boundary, and nodes on this path are
+	// removed (e.g. from root to N_3).
 	// However, this path will be reclaimed as an internal path by inserting
-	// more items after the batch flush. Newly produced nodes on this path
-	// can be committed with no issues as they are actually complete (also
-	// from a database perspective, first deleting and then rewriting is
-	// still a valid data update).
-	//
-	// Nodes must be cleaned from top to bottom as it's possible the procedure
-	// is interrupted in the middle.
+	// more items after the batch flush. New nodes on this path can be committed
+	// with no issues as they are actually complete. Also, from a database
+	// perspective, first deleting and then rewriting is a valid data update.
 	for i := 0; i < len(t.last); i++ {
-		// The node with the path of the last produced node is not removed, as
-		// it's a sibling of the last complete sub-trie, not the parent. There
-		// is no reason to remove it.
 		t.delete(t.last[:i], false)
 	}
 	return common.Hash{} // the hash is meaningless for incomplete commit
diff --git a/trie/stacktrie.go b/trie/stacktrie.go
index 7bf7519cbc1f..9c574db0bfa5 100644
--- a/trie/stacktrie.go
+++ b/trie/stacktrie.go
@@ -30,13 +30,13 @@ var (
 	_      = types.TrieHasher((*StackTrie)(nil))
 )
 
-// OnTrieNode is a callback method to invoke when a trie node is produced
-// by the stack trie.
+// OnTrieNode is a callback method invoked when a trie node is committed
+// by the stack trie. The node is only committed if it's considered complete.
 //
 // The caller should not modify the contents of the returned path and blob
-// slice, and their contents may change after the call. It is up to the
-// `onTrieNode` receiver function to deep-copy the data if it wants to
-// retain it after the call ends.
+// slice, and their contents may be changed after the call. It is up to the
+// `onTrieNode` receiver function to deep-copy the data if it wants to retain
+// it after the call ends.
 type OnTrieNode func(path []byte, hash common.Hash, blob []byte)
 
 // StackTrie is a trie implementation that expects keys to be inserted
@@ -49,8 +49,8 @@ type StackTrie struct {
 	onTrieNode OnTrieNode
 }
 
-// NewStackTrie allocates and initializes an empty trie. The produced nodes will
-// be discarded immediately if no callback is provided.
+// NewStackTrie allocates and initializes an empty trie. The committed nodes
+// will be discarded immediately if no callback is configured.
 func NewStackTrie(onTrieNode OnTrieNode) *StackTrie {
 	return &StackTrie{
 		root:       stPool.Get().(*stNode),
@@ -374,7 +374,7 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 
 	// Invoke the callback it's provided. Notably, the path and blob slices are
 	// volatile, please deep-copy the slices in callback if the contents need
-	// to be saved.
+	// to be retained.
 	if t.onTrieNode != nil {
 		t.onTrieNode(path, common.BytesToHash(st.val), blob)
 	}

From e24280f8470515fb38314c9acd754f5ec307aaf0 Mon Sep 17 00:00:00 2001
From: Gary Rong <garyrong0905@gmail.com>
Date: Thu, 11 Apr 2024 10:27:20 +0800
Subject: [PATCH 6/7] eth/protocols/snap: improve comment

---
 eth/protocols/snap/gentrie.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/eth/protocols/snap/gentrie.go b/eth/protocols/snap/gentrie.go
index 135ce0107605..8ef1a007530e 100644
--- a/eth/protocols/snap/gentrie.go
+++ b/eth/protocols/snap/gentrie.go
@@ -28,7 +28,7 @@ import (
 // genTrie interface is used by the snap syncer to generate merkle tree nodes
 // based on a received batch of states.
 type genTrie interface {
-	// update inserts the state into generator trie.
+	// update inserts the state item into generator trie.
 	update(key, value []byte) error
 
 	// commit flushes the right boundary nodes if complete flag is true. This
@@ -110,8 +110,8 @@ func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
 			//                       +-----+   +-----+
 			//
 			// The node with the path of the first committed one (e.g, N_1) is not
-			// removed because it's a sibling of the complete nodes, not the parent
-			// or ancestor.
+			// removed because it's a sibling of the nodes we want to commit, not
+			// the parent or ancestor.
 			for i := 0; i < len(path); i++ {
 				t.delete(path[:i], false)
 			}
@@ -130,7 +130,7 @@ func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
 	// this range covered by extension node which could potentially break the
 	// state healing.
 	//
-	// The extension node is detected if its path is the prefix of last written
+	// The extension node is detected if its path is the prefix of last committed
 	// one and path gap is larger than one. If the path gap is only one byte,
 	// the current node could either be a full node, or a extension with single
 	// byte key. In either case, no gaps will be left in the path.

From e635c8c0a840cb9274e3685bb38278766b22dee9 Mon Sep 17 00:00:00 2001
From: Gary Rong <garyrong0905@gmail.com>
Date: Thu, 11 Apr 2024 11:41:29 +0800
Subject: [PATCH 7/7] eth/protocols/snap: improve tests

---
 eth/protocols/snap/gentrie_test.go | 188 +++++++++++++++++++++++++----
 1 file changed, 163 insertions(+), 25 deletions(-)

diff --git a/eth/protocols/snap/gentrie_test.go b/eth/protocols/snap/gentrie_test.go
index 10a006f053f8..1fb2dbce7568 100644
--- a/eth/protocols/snap/gentrie_test.go
+++ b/eth/protocols/snap/gentrie_test.go
@@ -63,7 +63,9 @@ func (r *replayer) decode(key []byte, value []byte) {
 	}
 }
 
-func (r *replayer) updates() map[string]common.Hash {
+// updates returns a set of effective mutations. Multiple mutations targeting
+// the same node path will be merged in FIFO order.
+func (r *replayer) modifies() map[string]common.Hash {
 	set := make(map[string]common.Hash)
 	for i, path := range r.paths {
 		set[path] = r.hashes[i]
@@ -71,6 +73,18 @@ func (r *replayer) updates() map[string]common.Hash {
 	return set
 }
 
+// updates returns the number of updates.
+func (r *replayer) updates() int {
+	var count int
+	for _, hash := range r.modifies() {
+		if hash == (common.Hash{}) {
+			continue
+		}
+		count++
+	}
+	return count
+}
+
 // Put inserts the given value into the key-value data store.
 func (r *replayer) Put(key []byte, value []byte) error {
 	r.decode(key, value)
@@ -95,7 +109,7 @@ func byteToHex(str []byte) []byte {
 
 // innerNodes returns the internal nodes narrowed by two boundaries along with
 // the leftmost and rightmost sub-trie roots.
-func innerNodes(first, last []byte, nodes map[string]common.Hash, t *testing.T) (map[string]common.Hash, []byte, []byte) {
+func innerNodes(first, last []byte, includeLeft, includeRight bool, nodes map[string]common.Hash, t *testing.T) (map[string]common.Hash, []byte, []byte) {
 	var (
 		leftRoot  []byte
 		rightRoot []byte
@@ -108,11 +122,11 @@ func innerNodes(first, last []byte, nodes map[string]common.Hash, t *testing.T)
 			t.Fatalf("Unexpected deletion, %v", []byte(path))
 		}
 		// Filter out the siblings on the left side or the left boundary nodes.
-		if bytes.Compare(firstHex, []byte(path)) > 0 || bytes.HasPrefix(firstHex, []byte(path)) {
+		if !includeLeft && (bytes.Compare(firstHex, []byte(path)) > 0 || bytes.HasPrefix(firstHex, []byte(path))) {
 			continue
 		}
 		// Filter out the siblings on the right side or the right boundary nodes.
-		if bytes.Compare(lastHex, []byte(path)) < 0 || bytes.HasPrefix(lastHex, []byte(path)) {
+		if !includeRight && (bytes.Compare(lastHex, []byte(path)) < 0 || bytes.HasPrefix(lastHex, []byte(path))) {
 			continue
 		}
 		inner[path] = hash
@@ -180,8 +194,98 @@ func TestPartialGentree(t *testing.T) {
 				db    = rawdb.NewMemoryDatabase()
 				batch = db.NewBatch()
 			)
-			inner, leftRoot, rightRoot := innerNodes(entries[first].k, entries[last].k, nodes, t)
+			// Build the partial tree with specific boundaries
+			r := buildPartial(common.Hash{}, db, batch, entries, first, last)
+			if r.unknowns > 0 {
+				t.Fatalf("Unknown database write: %d", r.unknowns)
+			}
+
+			// Ensure all the internal nodes are produced
+			var (
+				set         = r.modifies()
+				inner, _, _ = innerNodes(entries[first].k, entries[last].k, first == 0, last == len(entries)-1, nodes, t)
+			)
+			for path, hash := range inner {
+				if _, ok := set[path]; !ok {
+					t.Fatalf("Missing nodes %v", []byte(path))
+				}
+				if hash != set[path] {
+					t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path])
+				}
+			}
+			if r.updates() != len(inner) {
+				t.Fatalf("Unexpected node write detected, want: %d, got: %d", len(inner), r.updates())
+			}
+		}
+		for j := 0; j < 100; j++ {
+			var (
+				first int
+				last  int
+			)
+			for {
+				first = rand.Intn(len(entries))
+				last = rand.Intn(len(entries))
+				if first <= last {
+					break
+				}
+			}
+			check(first, last)
+		}
+		var cases = []struct {
+			first int
+			last  int
+		}{
+			{0, len(entries) - 1},                // full
+			{1, len(entries) - 1},                // no left
+			{2, len(entries) - 1},                // no left
+			{2, len(entries) - 2},                // no left and right
+			{2, len(entries) - 2},                // no left and right
+			{len(entries) / 2, len(entries) / 2}, // single
+			{0, 0},                               // single first
+			{len(entries) - 1, len(entries) - 1}, // single last
+		}
+		for _, c := range cases {
+			check(c.first, c.last)
+		}
+	}
+}
+
+// TestGentreeDanglingClearing tests if the dangling nodes falling within the
+// path space of constructed tree can be correctly removed.
+func TestGentreeDanglingClearing(t *testing.T) {
+	for round := 0; round < 100; round++ {
+		var (
+			n       = rand.Intn(1024) + 10
+			entries []*kv
+		)
+		for i := 0; i < n; i++ {
+			var val []byte
+			if rand.Intn(3) == 0 {
+				val = testrand.Bytes(3)
+			} else {
+				val = testrand.Bytes(32)
+			}
+			entries = append(entries, &kv{
+				k: testrand.Bytes(32),
+				v: val,
+			})
+		}
+		slices.SortFunc(entries, (*kv).cmp)
 
+		nodes := make(map[string]common.Hash)
+		tr := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
+			nodes[string(path)] = hash
+		})
+		for i := 0; i < len(entries); i++ {
+			tr.Update(entries[i].k, entries[i].v)
+		}
+		tr.Hash()
+
+		check := func(first, last int) {
+			var (
+				db    = rawdb.NewMemoryDatabase()
+				batch = db.NewBatch()
+			)
 			// Write the junk nodes as the dangling
 			var injects []string
 			for path := range nodes {
@@ -193,27 +297,23 @@ func TestPartialGentree(t *testing.T) {
 					injects = append(injects, path[:i])
 				}
 			}
+			if len(injects) == 0 {
+				return
+			}
 			for _, path := range injects {
 				rawdb.WriteAccountTrieNode(db, []byte(path), testrand.Bytes(32))
 			}
+
 			// Build the partial tree with specific range
 			replay := buildPartial(common.Hash{}, db, batch, entries, first, last)
 			if replay.unknowns > 0 {
 				t.Fatalf("Unknown database write: %d", replay.unknowns)
 			}
+			set := replay.modifies()
 
-			// Ensure all the internal nodes are produced
-			set := replay.updates()
-			for path, hash := range inner {
-				if _, ok := set[path]; !ok {
-					t.Fatalf("Missing nodes %v", []byte(path))
-				}
-				if hash != set[path] {
-					t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path])
-				}
-			}
-
-			// Make sure all injected junks are correctly deleted
+			// Make sure the injected junks falling within the path space of
+			// committed trie nodes are correctly deleted.
+			_, leftRoot, rightRoot := innerNodes(entries[first].k, entries[last].k, first == 0, last == len(entries)-1, nodes, t)
 			for _, path := range injects {
 				if bytes.Compare([]byte(path), leftRoot) < 0 && !bytes.HasPrefix(leftRoot, []byte(path)) {
 					continue
@@ -260,7 +360,7 @@ func TestPartialGentree(t *testing.T) {
 }
 
 // TestFlushPartialTree tests the gentrie can produce complete inner trie nodes
-// even with lots of batch flushes in the middle.
+// even with lots of batch flushes.
 func TestFlushPartialTree(t *testing.T) {
 	var entries []*kv
 	for i := 0; i < 1024; i++ {
@@ -305,7 +405,7 @@ func TestFlushPartialTree(t *testing.T) {
 			batch    = db.NewBatch()
 			combined = db.NewBatch()
 		)
-		inner, _, _ := innerNodes(entries[c.first].k, entries[c.last].k, nodes, t)
+		inner, _, _ := innerNodes(entries[c.first].k, entries[c.last].k, c.first == 0, c.last == len(entries)-1, nodes, t)
 
 		tr := newPathTrie(common.Hash{}, c.first != 0, db, batch)
 		for i := c.first; i <= c.last; i++ {
@@ -328,7 +428,7 @@ func TestFlushPartialTree(t *testing.T) {
 		combined.Replay(r)
 
 		// Ensure all the internal nodes are produced
-		set := r.updates()
+		set := r.modifies()
 		for path, hash := range inner {
 			if _, ok := set[path]; !ok {
 				t.Fatalf("Missing nodes %v", []byte(path))
@@ -337,6 +437,9 @@ func TestFlushPartialTree(t *testing.T) {
 				t.Fatalf("Inconsistent node, want %x, got: %x", hash, set[path])
 			}
 		}
+		if r.updates() != len(inner) {
+			t.Fatalf("Unexpected node write detected, want: %d, got: %d", len(inner), r.updates())
+		}
 	}
 }
 
@@ -373,20 +476,20 @@ func TestBoundSplit(t *testing.T) {
 			last = rand.Intn(len(entries)-next) + next
 
 			r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last)
-			updates := r.updates()
+			set := r.modifies()
 
 			// Skip if the chunk is zero-size
-			if len(updates) == 0 {
+			if r.updates() == 0 {
 				next = last + 1
 				continue
 			}
 
 			// Ensure the updates in two consecutive chunks are not overlapped.
 			// The only overlapping part should be deletion.
-			if lastRightRoot != nil && len(updates) > 0 {
+			if lastRightRoot != nil && len(set) > 0 {
 				// Derive the path of left-most node in this chunk
 				var leftRoot []byte
-				for path, hash := range r.updates() {
+				for path, hash := range r.modifies() {
 					if hash == (common.Hash{}) {
 						t.Fatalf("Unexpected deletion %v", []byte(path))
 					}
@@ -401,7 +504,7 @@ func TestBoundSplit(t *testing.T) {
 
 			// Track the updates as the last chunk
 			var rightRoot []byte
-			for path := range updates {
+			for path := range set {
 				if rightRoot == nil ||
 					(bytes.Compare(rightRoot, []byte(path)) < 0) ||
 					(bytes.Compare(rightRoot, []byte(path)) > 0 && bytes.HasPrefix(rightRoot, []byte(path))) {
@@ -413,3 +516,38 @@ func TestBoundSplit(t *testing.T) {
 		}
 	}
 }
+
+// TestTinyPartialTree tests if the partial tree is too tiny(has less than two
+// states), then nothing should be committed.
+func TestTinyPartialTree(t *testing.T) {
+	var entries []*kv
+	for i := 0; i < 1024; i++ {
+		var val []byte
+		if rand.Intn(3) == 0 {
+			val = testrand.Bytes(3)
+		} else {
+			val = testrand.Bytes(32)
+		}
+		entries = append(entries, &kv{
+			k: testrand.Bytes(32),
+			v: val,
+		})
+	}
+	slices.SortFunc(entries, (*kv).cmp)
+
+	for i := 0; i < len(entries); i++ {
+		next := i
+		last := i + 1
+		if last >= len(entries) {
+			last = len(entries) - 1
+		}
+		db := rawdb.NewMemoryDatabase()
+		r := buildPartial(common.Hash{}, db, db.NewBatch(), entries, next, last)
+
+		if next != 0 && last != len(entries)-1 {
+			if r.updates() != 0 {
+				t.Fatalf("Unexpected data writes, got: %d", r.updates())
+			}
+		}
+	}
+}