Skip to content

Commit 4c981d9

Browse files
rjl493456442shekhirin
authored andcommitted
core: rework tx indexer (ethereum#25723)
This PR reworks tx indexer a bit. Compared to the original version, one scenario is no longer handled - upgrading from legacy geth without indexer support. The tx indexer was introduced in 2020 and have been present through hardforks, so it can be assumed that all Geth nodes have tx indexer already. So we can simplify the tx indexer logic a bit: - If the tail flag is not present, it means node is just initialized may or may not with an ancient store attached. In this case all blocks are regarded as unindexed - If the tail flag is present, it means blocks below tail are unindexed, blocks above tail are indexed This change also address some weird cornercases that could make the indexer not work after a crash.
1 parent b3a23f6 commit 4c981d9

File tree

3 files changed

+280
-121
lines changed

3 files changed

+280
-121
lines changed

core/blockchain.go

Lines changed: 55 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -292,22 +292,16 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis
292292
bc.currentFinalizedBlock.Store(nilBlock)
293293
bc.currentSafeBlock.Store(nilBlock)
294294

295-
// Initialize the chain with ancient data if it isn't empty.
296-
var txIndexBlock uint64
297-
295+
// If Geth is initialized with an external ancient store, re-initialize the
296+
// missing chain indexes and chain flags. This procedure can survive crash
297+
// and can be resumed in next restart since chain flags are updated in last step.
298298
if bc.empty() {
299299
rawdb.InitDatabaseFromFreezer(bc.db)
300-
// If ancient database is not empty, reconstruct all missing
301-
// indices in the background.
302-
frozen, _ := bc.db.Ancients()
303-
if frozen > 0 {
304-
txIndexBlock = frozen
305-
}
306300
}
301+
// Load blockchain states from disk
307302
if err := bc.loadLastState(); err != nil {
308303
return nil, err
309304
}
310-
311305
// Make sure the state associated with the block is available
312306
head := bc.CurrentBlock()
313307
if _, err := state.New(head.Root(), bc.stateCache, bc.snaps); err != nil {
@@ -415,14 +409,6 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis
415409
bc.wg.Add(1)
416410
go bc.updateFutureBlocks()
417411

418-
// Start tx indexer/unindexer.
419-
if txLookupLimit != nil {
420-
bc.txLookupLimit = *txLookupLimit
421-
422-
bc.wg.Add(1)
423-
go bc.maintainTxIndex(txIndexBlock)
424-
}
425-
426412
// If periodic cache journal is required, spin it up.
427413
if bc.cacheConfig.TrieCleanRejournal > 0 {
428414
if bc.cacheConfig.TrieCleanRejournal < time.Minute {
@@ -442,6 +428,13 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis
442428
bc.SetHead(compat.RewindTo)
443429
rawdb.WriteChainConfig(db, genesisHash, chainConfig)
444430
}
431+
// Start tx indexer/unindexer if required.
432+
if txLookupLimit != nil {
433+
bc.txLookupLimit = *txLookupLimit
434+
435+
bc.wg.Add(1)
436+
go bc.maintainTxIndex()
437+
}
445438
return bc, nil
446439
}
447440

@@ -2289,72 +2282,58 @@ func (bc *BlockChain) skipBlock(err error, it *insertIterator) bool {
22892282
return false
22902283
}
22912284

2285+
// indexBlocks reindexes or unindexes transactions depending on user configuration
2286+
func (bc *BlockChain) indexBlocks(tail *uint64, head uint64, done chan struct{}) {
2287+
defer func() { close(done) }()
2288+
2289+
// The tail flag is not existent, it means the node is just initialized
2290+
// and all blocks(may from ancient store) are not indexed yet.
2291+
if tail == nil {
2292+
from := uint64(0)
2293+
if bc.txLookupLimit != 0 && head >= bc.txLookupLimit {
2294+
from = head - bc.txLookupLimit + 1
2295+
}
2296+
rawdb.IndexTransactions(bc.db, from, head+1, bc.quit)
2297+
return
2298+
}
2299+
// The tail flag is existent, but the whole chain is required to be indexed.
2300+
if bc.txLookupLimit == 0 || head < bc.txLookupLimit {
2301+
if *tail > 0 {
2302+
// It can happen when chain is rewound to a historical point which
2303+
// is even lower than the indexes tail, recap the indexing target
2304+
// to new head to avoid reading non-existent block bodies.
2305+
end := *tail
2306+
if end > head+1 {
2307+
end = head + 1
2308+
}
2309+
rawdb.IndexTransactions(bc.db, 0, end, bc.quit)
2310+
}
2311+
return
2312+
}
2313+
// Update the transaction index to the new chain state
2314+
if head-bc.txLookupLimit+1 < *tail {
2315+
// Reindex a part of missing indices and rewind index tail to HEAD-limit
2316+
rawdb.IndexTransactions(bc.db, head-bc.txLookupLimit+1, *tail, bc.quit)
2317+
} else {
2318+
// Unindex a part of stale indices and forward index tail to HEAD-limit
2319+
rawdb.UnindexTransactions(bc.db, *tail, head-bc.txLookupLimit+1, bc.quit)
2320+
}
2321+
}
2322+
22922323
// maintainTxIndex is responsible for the construction and deletion of the
22932324
// transaction index.
22942325
//
22952326
// User can use flag `txlookuplimit` to specify a "recentness" block, below
22962327
// which ancient tx indices get deleted. If `txlookuplimit` is 0, it means
22972328
// all tx indices will be reserved.
22982329
//
2299-
// The user can adjust the txlookuplimit value for each launch after fast
2300-
// sync, Geth will automatically construct the missing indices and delete
2301-
// the extra indices.
2302-
func (bc *BlockChain) maintainTxIndex(ancients uint64) {
2330+
// The user can adjust the txlookuplimit value for each launch after sync,
2331+
// Geth will automatically construct the missing indices or delete the extra
2332+
// indices.
2333+
func (bc *BlockChain) maintainTxIndex() {
23032334
defer bc.wg.Done()
23042335

2305-
// Before starting the actual maintenance, we need to handle a special case,
2306-
// where user might init Geth with an external ancient database. If so, we
2307-
// need to reindex all necessary transactions before starting to process any
2308-
// pruning requests.
2309-
if ancients > 0 {
2310-
var from = uint64(0)
2311-
if bc.txLookupLimit != 0 && ancients > bc.txLookupLimit {
2312-
from = ancients - bc.txLookupLimit
2313-
}
2314-
rawdb.IndexTransactions(bc.db, from, ancients, bc.quit)
2315-
}
2316-
2317-
// indexBlocks reindexes or unindexes transactions depending on user configuration
2318-
indexBlocks := func(tail *uint64, head uint64, done chan struct{}) {
2319-
defer func() { done <- struct{}{} }()
2320-
2321-
// If the user just upgraded Geth to a new version which supports transaction
2322-
// index pruning, write the new tail and remove anything older.
2323-
if tail == nil {
2324-
if bc.txLookupLimit == 0 || head < bc.txLookupLimit {
2325-
// Nothing to delete, write the tail and return
2326-
rawdb.WriteTxIndexTail(bc.db, 0)
2327-
} else {
2328-
// Prune all stale tx indices and record the tx index tail
2329-
rawdb.UnindexTransactions(bc.db, 0, head-bc.txLookupLimit+1, bc.quit)
2330-
}
2331-
return
2332-
}
2333-
// If a previous indexing existed, make sure that we fill in any missing entries
2334-
if bc.txLookupLimit == 0 || head < bc.txLookupLimit {
2335-
if *tail > 0 {
2336-
// It can happen when chain is rewound to a historical point which
2337-
// is even lower than the indexes tail, recap the indexing target
2338-
// to new head to avoid reading non-existent block bodies.
2339-
end := *tail
2340-
if end > head+1 {
2341-
end = head + 1
2342-
}
2343-
rawdb.IndexTransactions(bc.db, 0, end, bc.quit)
2344-
}
2345-
return
2346-
}
2347-
// Update the transaction index to the new chain state
2348-
if head-bc.txLookupLimit+1 < *tail {
2349-
// Reindex a part of missing indices and rewind index tail to HEAD-limit
2350-
rawdb.IndexTransactions(bc.db, head-bc.txLookupLimit+1, *tail, bc.quit)
2351-
} else {
2352-
// Unindex a part of stale indices and forward index tail to HEAD-limit
2353-
rawdb.UnindexTransactions(bc.db, *tail, head-bc.txLookupLimit+1, bc.quit)
2354-
}
2355-
}
2356-
2357-
// Any reindexing done, start listening to chain events and moving the index window
2336+
// Listening to chain events and manipulate the transaction indexes.
23582337
var (
23592338
done chan struct{} // Non-nil if background unindexing or reindexing routine is active.
23602339
headCh = make(chan ChainHeadEvent, 1) // Buffered to avoid locking up the event feed
@@ -2370,7 +2349,7 @@ func (bc *BlockChain) maintainTxIndex(ancients uint64) {
23702349
case head := <-headCh:
23712350
if done == nil {
23722351
done = make(chan struct{})
2373-
go indexBlocks(rawdb.ReadTxIndexTail(bc.db), head.Block.NumberU64(), done)
2352+
go bc.indexBlocks(rawdb.ReadTxIndexTail(bc.db), head.Block.NumberU64(), done)
23742353
}
23752354
case <-done:
23762355
done = nil

0 commit comments

Comments
 (0)