Skip to content

Commit 0b53b29

Browse files
authored
core/rawdb: fix cornercase shutdown behaviour in freezer (#26485)
This PR does a few things. It fixes a shutdown-order flaw in the chainfreezer. Previously, the chain-freezer would shutdown the freezer backend first, and then signal for the loop to exit. This can lead to a scenario where the freezer tries to fsync closed files, which is an error-conditon that could lead to exit via log.Crit. It also makes the printout more detailed when truncating 'dangling' items, by showing the exact number instead of approximate MB. This PR also adds calls to fsync files before closing them, and also makes the `db inspect` command slightly more robust.
1 parent 450d771 commit 0b53b29

File tree

4 files changed

+62
-30
lines changed

4 files changed

+62
-30
lines changed

cmd/geth/dbcmd.go

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -551,16 +551,8 @@ func freezerInspect(ctx *cli.Context) error {
551551
return err
552552
}
553553
stack, _ := makeConfigNode(ctx)
554-
defer stack.Close()
555-
556-
db := utils.MakeChainDatabase(ctx, stack, true)
557-
defer db.Close()
558-
559-
ancient, err := db.AncientDatadir()
560-
if err != nil {
561-
log.Info("Failed to retrieve ancient root", "err", err)
562-
return err
563-
}
554+
ancient := stack.ResolveAncient("chaindata", ctx.String(utils.AncientFlag.Name))
555+
stack.Close()
564556
return rawdb.InspectFreezerTable(ancient, freezer, table, start, end)
565557
}
566558

core/rawdb/chain_freezer.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,13 @@ func newChainFreezer(datadir string, namespace string, readonly bool, maxTableSi
7070

7171
// Close closes the chain freezer instance and terminates the background thread.
7272
func (f *chainFreezer) Close() error {
73-
err := f.Freezer.Close()
7473
select {
7574
case <-f.quit:
7675
default:
7776
close(f.quit)
7877
}
7978
f.wg.Wait()
80-
return err
79+
return f.Freezer.Close()
8180
}
8281

8382
// freeze is a background thread that periodically checks the blockchain for any

core/rawdb/freezer_table.go

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ func (t *freezerTable) repair() error {
229229
lastIndex indexEntry
230230
contentSize int64
231231
contentExp int64
232+
verbose bool
232233
)
233234
// Read index zero, determine what file is the earliest
234235
// and what item offset to use
@@ -272,17 +273,18 @@ func (t *freezerTable) repair() error {
272273
// Keep truncating both files until they come in sync
273274
contentExp = int64(lastIndex.offset)
274275
for contentExp != contentSize {
276+
verbose = true
275277
// Truncate the head file to the last offset pointer
276278
if contentExp < contentSize {
277-
t.logger.Warn("Truncating dangling head", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize))
279+
t.logger.Warn("Truncating dangling head", "indexed", contentExp, "stored", contentSize)
278280
if err := truncateFreezerFile(t.head, contentExp); err != nil {
279281
return err
280282
}
281283
contentSize = contentExp
282284
}
283285
// Truncate the index to point within the head file
284286
if contentExp > contentSize {
285-
t.logger.Warn("Truncating dangling indexes", "indexed", common.StorageSize(contentExp), "stored", common.StorageSize(contentSize))
287+
t.logger.Warn("Truncating dangling indexes", "indexes", offsetsSize/indexEntrySize, "indexed", contentExp, "stored", contentSize)
286288
if err := truncateFreezerFile(t.index, offsetsSize-indexEntrySize); err != nil {
287289
return err
288290
}
@@ -343,7 +345,11 @@ func (t *freezerTable) repair() error {
343345
if err := t.preopen(); err != nil {
344346
return err
345347
}
346-
t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes))
348+
if verbose {
349+
t.logger.Info("Chain freezer table opened", "items", t.items, "size", t.headBytes)
350+
} else {
351+
t.logger.Debug("Chain freezer table opened", "items", t.items, "size", common.StorageSize(t.headBytes))
352+
}
347353
return nil
348354
}
349355

@@ -553,21 +559,31 @@ func (t *freezerTable) Close() error {
553559
defer t.lock.Unlock()
554560

555561
var errs []error
556-
if err := t.index.Close(); err != nil {
557-
errs = append(errs, err)
558-
}
559-
t.index = nil
560-
561-
if err := t.meta.Close(); err != nil {
562-
errs = append(errs, err)
562+
doClose := func(f *os.File, sync bool, close bool) {
563+
if sync && !t.readonly {
564+
if err := f.Sync(); err != nil {
565+
errs = append(errs, err)
566+
}
567+
}
568+
if close {
569+
if err := f.Close(); err != nil {
570+
errs = append(errs, err)
571+
}
572+
}
563573
}
564-
t.meta = nil
565-
574+
// Trying to fsync a file opened in rdonly causes "Access denied"
575+
// error on Windows.
576+
doClose(t.index, true, true)
577+
doClose(t.meta, true, true)
578+
// The preopened non-head data-files are all opened in readonly.
579+
// The head is opened in rw-mode, so we sync it here - but since it's also
580+
// part of t.files, it will be closed in the loop below.
581+
doClose(t.head, true, false) // sync but do not close
566582
for _, f := range t.files {
567-
if err := f.Close(); err != nil {
568-
errs = append(errs, err)
569-
}
583+
doClose(f, false, true) // close but do not sync
570584
}
585+
t.index = nil
586+
t.meta = nil
571587
t.head = nil
572588

573589
if errs != nil {
@@ -724,7 +740,7 @@ func (t *freezerTable) retrieveItems(start, count, maxBytes uint64) ([]byte, []i
724740
defer t.lock.RUnlock()
725741

726742
// Ensure the table and the item are accessible
727-
if t.index == nil || t.head == nil {
743+
if t.index == nil || t.head == nil || t.meta == nil {
728744
return nil, nil, errClosed
729745
}
730746
var (
@@ -872,7 +888,9 @@ func (t *freezerTable) advanceHead() error {
872888
func (t *freezerTable) Sync() error {
873889
t.lock.Lock()
874890
defer t.lock.Unlock()
875-
891+
if t.index == nil || t.head == nil || t.meta == nil {
892+
return errClosed
893+
}
876894
var err error
877895
trackError := func(e error) {
878896
if e != nil && err == nil {
@@ -903,7 +921,8 @@ func (t *freezerTable) dumpIndex(w io.Writer, start, stop int64) {
903921
fmt.Fprintf(w, "Failed to decode freezer table %v\n", err)
904922
return
905923
}
906-
fmt.Fprintf(w, "Version %d deleted %d, hidden %d\n", meta.Version, atomic.LoadUint64(&t.itemOffset), atomic.LoadUint64(&t.itemHidden))
924+
fmt.Fprintf(w, "Version %d count %d, deleted %d, hidden %d\n", meta.Version,
925+
atomic.LoadUint64(&t.items), atomic.LoadUint64(&t.itemOffset), atomic.LoadUint64(&t.itemHidden))
907926

908927
buf := make([]byte, indexEntrySize)
909928

core/rawdb/freezer_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,3 +407,25 @@ func TestRenameWindows(t *testing.T) {
407407
t.Errorf("unexpected file contents. Got %v\n", buf)
408408
}
409409
}
410+
411+
func TestFreezerCloseSync(t *testing.T) {
412+
t.Parallel()
413+
f, _ := newFreezerForTesting(t, map[string]bool{"a": true, "b": true})
414+
defer f.Close()
415+
416+
// Now, close and sync. This mimics the behaviour if the node is shut down,
417+
// just as the chain freezer is writing.
418+
// 1: thread-1: chain treezer writes, via freezeRange (holds lock)
419+
// 2: thread-2: Close called, waits for write to finish
420+
// 3: thread-1: finishes writing, releases lock
421+
// 4: thread-2: obtains lock, completes Close()
422+
// 5: thread-1: calls f.Sync()
423+
if err := f.Close(); err != nil {
424+
t.Fatal(err)
425+
}
426+
if err := f.Sync(); err == nil {
427+
t.Fatalf("want error, have nil")
428+
} else if have, want := err.Error(), "[closed closed]"; have != want {
429+
t.Fatalf("want %v, have %v", have, want)
430+
}
431+
}

0 commit comments

Comments
 (0)