Skip to content

Commit 223405d

Browse files
snapshots: proper development option to disable peer selection and download checks
1 parent 30179a2 commit 223405d

File tree

1 file changed

+70
-41
lines changed

1 file changed

+70
-41
lines changed

src/discof/restore/fd_snaprd_tile.c

Lines changed: 70 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ struct fd_snaprd_tile {
9696
int malformed;
9797
long deadline_nanos;
9898
ulong ack_cnt;
99-
int peer_selection;
10099

101100
fd_ip4_port_t addr;
102101

@@ -152,6 +151,7 @@ struct fd_snaprd_tile {
152151
uint max_incremental_snapshots_to_keep;
153152
int entrypoints_enabled;
154153
int gossip_peers_enabled;
154+
int peer_selection;
155155
} config;
156156

157157
struct {
@@ -684,7 +684,7 @@ after_credit( fd_snaprd_tile_t * ctx,
684684
return;
685685
}
686686

687-
if( FD_LIKELY( ctx->peer_selection ) ) {
687+
if( FD_LIKELY( ctx->config.peer_selection ) ) {
688688
fd_ssping_advance( ctx->ssping, now, ctx->selector );
689689
fd_http_resolver_advance( ctx->ssresolver, now, ctx->selector );
690690

@@ -744,6 +744,11 @@ after_credit( fd_snaprd_tile_t * ctx,
744744
ctx->metrics.full.bytes_total = ctx->local_in.full_snapshot_size;
745745
ctx->state = FD_SNAPRD_STATE_READING_FULL_FILE;
746746
} else {
747+
if( FD_UNLIKELY( !ctx->config.do_download ) ) {
748+
FD_LOG_ERR(( "Local snapshot `%s` is too old and downloading new snapshots is disabled. "
749+
"Please enable downloading via [snapshots.download] and restart.", ctx->local_in.full_snapshot_path ) );
750+
}
751+
747752
if( FD_UNLIKELY( !ctx->config.incremental_snapshot_fetch ) ) send_expected_slot( stem, best.ssinfo.full.slot );
748753

749754
fd_sspeer_t best_incremental = fd_sspeer_selector_best( ctx->selector, 1, best.ssinfo.full.slot );
@@ -790,9 +795,14 @@ after_credit( fd_snaprd_tile_t * ctx,
790795
ctx->ack_cnt = 0UL;
791796

792797
if( FD_UNLIKELY( ctx->malformed ) ) {
793-
fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_CTRL_RESET_FULL, 0UL, 0UL, 0UL, 0UL, 0UL );
794-
ctx->state = FD_SNAPRD_STATE_FLUSHING_FULL_FILE_RESET;
795-
ctx->malformed = 0;
798+
if( FD_LIKELY( ctx->config.peer_selection ) ) {
799+
fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_CTRL_RESET_FULL, 0UL, 0UL, 0UL, 0UL, 0UL );
800+
ctx->state = FD_SNAPRD_STATE_FLUSHING_FULL_FILE_RESET;
801+
ctx->malformed = 0;
802+
} else {
803+
FD_LOG_ERR(( "Local incremental snapshot `%s` is malformed and no snapshot sources are configured. "
804+
"Please add snapshot sources and restart. ", ctx->local_in.incremental_snapshot_path ));
805+
}
796806
break;
797807
}
798808

@@ -822,6 +832,18 @@ after_credit( fd_snaprd_tile_t * ctx,
822832
if( FD_UNLIKELY( ctx->ack_cnt<NUM_SNAP_CONSUMERS ) ) break;
823833
ctx->ack_cnt = 0UL;
824834

835+
if( FD_UNLIKELY( ctx->malformed ) ) {
836+
if( FD_LIKELY( ctx->config.peer_selection ) ) {
837+
fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_CTRL_RESET_FULL, 0UL, 0UL, 0UL, 0UL, 0UL );
838+
ctx->state = FD_SNAPRD_STATE_FLUSHING_FULL_FILE_RESET;
839+
ctx->malformed = 0;
840+
} else {
841+
FD_LOG_ERR(( "Local full snapshot `%s` is malformed and no snapshot sources are configured. "
842+
"Please add snapshot sources and restart. ", ctx->local_in.full_snapshot_path ));
843+
}
844+
break;
845+
}
846+
825847
if( FD_LIKELY( !ctx->config.incremental_snapshot_fetch ) ) {
826848
ctx->state = FD_SNAPRD_STATE_SHUTDOWN;
827849
metrics_write( ctx ); /* ensures that shutdown state is written to metrics workspace before the tile actually shuts down */
@@ -920,7 +942,7 @@ before_frag( fd_snaprd_tile_t * ctx FD_PARAM_UNUSED,
920942
return !( ( sig==FD_GOSSIP_UPDATE_TAG_CONTACT_INFO ||
921943
sig==FD_GOSSIP_UPDATE_TAG_CONTACT_INFO_REMOVE ||
922944
sig==FD_GOSSIP_UPDATE_TAG_SNAPSHOT_HASHES ) &&
923-
( ctx->config.entrypoints_enabled || ctx->config.gossip_peers_enabled ) && ctx->peer_selection );
945+
( ctx->config.entrypoints_enabled || ctx->config.gossip_peers_enabled ) && ctx->config.peer_selection );
924946
}
925947
return 0;
926948
}
@@ -1091,12 +1113,6 @@ privileged_init( fd_topo_t * topo,
10911113

10921114
fd_memset( &ctx->metrics, 0, sizeof(ctx->metrics) );
10931115

1094-
/* By default, the snaprd tile selects peers and its initial state is
1095-
WAITING_FOR_PEERS. */
1096-
ctx->peer_selection = 1;
1097-
ctx->state = FD_SNAPRD_STATE_WAITING_FOR_PEERS;
1098-
ctx->deadline_nanos = fd_log_wallclock() + FD_SNAPRD_WAITING_FOR_PEERS_TIMEOUT_DEADLINE_NANOS;
1099-
11001116
ctx->local_in.full_snapshot_fd = -1;
11011117
ctx->local_in.incremental_snapshot_fd = -1;
11021118
ctx->local_out.dir_fd = -1;
@@ -1152,41 +1168,54 @@ privileged_init( fd_topo_t * topo,
11521168
if( FD_UNLIKELY( !S_ISREG( incremental_stat.st_mode ) ) ) FD_LOG_ERR(( "incremental snapshot path `%s` is not a regular file", incremental_path ));
11531169
ctx->local_in.incremental_snapshot_size = (ulong)incremental_stat.st_size;
11541170
}
1171+
}
11551172

1156-
ctx->local_out.dir_fd = -1;
1157-
ctx->local_out.full_snapshot_fd = -1;
1158-
ctx->local_out.incremental_snapshot_fd = -1;
1159-
1160-
if( FD_UNLIKELY( tile->snaprd.maximum_local_snapshot_age==0U ) ) {
1161-
/* Disable peer selection if we are reading snapshots from disk
1162-
and there is no maximum local snapshot age set. Set the
1163-
initial state to READING_FULL_FILE to avoid peer selection
1164-
logic.
1165-
1166-
TODO: Why? Document in TOML. */
1167-
ctx->peer_selection = 0;
1168-
ctx->state = FD_SNAPRD_STATE_READING_FULL_FILE;
1169-
ctx->metrics.full.bytes_total = ctx->local_in.full_snapshot_size;
1170-
FD_LOG_NOTICE(( "reading full snapshot from local file `%s`", ctx->local_in.full_snapshot_path ));
1173+
if( FD_UNLIKELY( !tile->snaprd.entrypoints_enabled &&
1174+
!tile->snaprd.gossip_peers_enabled &&
1175+
!tile->snaprd.http.peers_cnt ) ) {
1176+
if( FD_UNLIKELY( ctx->local_in.full_snapshot_slot==ULONG_MAX ) ) {
1177+
FD_LOG_ERR(("No local full snapshots found in `%s` and no snapshot sources configured. "
1178+
"Please add snapshot sources and restart. ", tile->snaprd.snapshots_path ));
11711179
}
1172-
}
11731180

1174-
/* Set up download descriptors because even if we have local
1181+
if( FD_UNLIKELY( tile->snaprd.incremental_snapshot_fetch && ctx->local_in.incremental_snapshot_slot==ULONG_MAX ) ) {
1182+
FD_LOG_ERR(( "No local incremental snapshots found in `%s` and no snapshot sources configured. "
1183+
"Please add snapshot sources or set [snapshots.incremental_snapshots] to false and restart. ", tile->snaprd.snapshots_path ));
1184+
}
1185+
1186+
/* Disable peer selection when there are no configured snapshot
1187+
sources but local snapshots exist. The snaprd state machine is
1188+
configured to start in the READING_FULL_FILE state and does not
1189+
attempt to select peers to download snapshots from. */
1190+
FD_TEST( ctx->local_in.full_snapshot_slot!=ULONG_MAX );
1191+
ctx->config.peer_selection = 0;
1192+
ctx->state = FD_SNAPRD_STATE_READING_FULL_FILE;
1193+
ctx->metrics.full.bytes_total = ctx->local_in.full_snapshot_size;
1194+
FD_LOG_NOTICE(( "reading full snapshot from local file `%s`", ctx->local_in.full_snapshot_path ));
1195+
} else {
1196+
/* Set up download descriptors because even if we have local
11751197
snapshots, we may need to download new snapshots if the local
11761198
snapshots are too old. */
1177-
ctx->local_out.dir_fd = open( tile->snaprd.snapshots_path, O_DIRECTORY|O_CLOEXEC );
1178-
if( FD_UNLIKELY( -1==ctx->local_out.dir_fd ) ) FD_LOG_ERR(( "open() failed `%s` (%i-%s)", tile->snaprd.snapshots_path, errno, fd_io_strerror( errno ) ));
1179-
1180-
FD_TEST( fd_cstr_printf_check( ctx->local_out.full_snapshot_path, PATH_MAX, NULL, "%s/snapshot.tar.bz2-partial", tile->snaprd.snapshots_path ) );
1181-
ctx->local_out.full_snapshot_fd = openat( ctx->local_out.dir_fd, "snapshot.tar.bz2-partial", O_WRONLY|O_CREAT|O_TRUNC|O_NONBLOCK, S_IRUSR|S_IWUSR );
1182-
if( FD_UNLIKELY( -1==ctx->local_out.full_snapshot_fd ) ) FD_LOG_ERR(( "open() failed `%s` (%i-%s)", ctx->local_out.full_snapshot_path, errno, fd_io_strerror( errno ) ));
1199+
ctx->local_out.dir_fd = open( tile->snaprd.snapshots_path, O_DIRECTORY|O_CLOEXEC );
1200+
if( FD_UNLIKELY( -1==ctx->local_out.dir_fd ) ) FD_LOG_ERR(( "open() failed `%s` (%i-%s)", tile->snaprd.snapshots_path, errno, fd_io_strerror( errno ) ));
1201+
1202+
FD_TEST( fd_cstr_printf_check( ctx->local_out.full_snapshot_path, PATH_MAX, NULL, "%s/snapshot.tar.bz2-partial", tile->snaprd.snapshots_path ) );
1203+
ctx->local_out.full_snapshot_fd = openat( ctx->local_out.dir_fd, "snapshot.tar.bz2-partial", O_WRONLY|O_CREAT|O_TRUNC|O_NONBLOCK, S_IRUSR|S_IWUSR );
1204+
if( FD_UNLIKELY( -1==ctx->local_out.full_snapshot_fd ) ) FD_LOG_ERR(( "open() failed `%s` (%i-%s)", ctx->local_out.full_snapshot_path, errno, fd_io_strerror( errno ) ));
1205+
1206+
if( FD_LIKELY( tile->snaprd.incremental_snapshot_fetch ) ) {
1207+
FD_TEST( fd_cstr_printf_check( ctx->local_out.incremental_snapshot_path, PATH_MAX, NULL, "%s/incremental-snapshot.tar.bz2-partial", tile->snaprd.snapshots_path ) );
1208+
ctx->local_out.incremental_snapshot_fd = openat( ctx->local_out.dir_fd, "incremental-snapshot.tar.bz2-partial", O_WRONLY|O_CREAT|O_TRUNC|O_NONBLOCK, S_IRUSR|S_IWUSR );
1209+
if( FD_UNLIKELY( -1==ctx->local_out.incremental_snapshot_fd ) ) FD_LOG_ERR(( "open() failed `%s` (%i-%s)", ctx->local_out.incremental_snapshot_path, errno, fd_io_strerror( errno ) ));
1210+
} else {
1211+
ctx->local_out.incremental_snapshot_fd = -1;
1212+
}
11831213

1184-
if( FD_LIKELY( tile->snaprd.incremental_snapshot_fetch ) ) {
1185-
FD_TEST( fd_cstr_printf_check( ctx->local_out.incremental_snapshot_path, PATH_MAX, NULL, "%s/incremental-snapshot.tar.bz2-partial", tile->snaprd.snapshots_path ) );
1186-
ctx->local_out.incremental_snapshot_fd = openat( ctx->local_out.dir_fd, "incremental-snapshot.tar.bz2-partial", O_WRONLY|O_CREAT|O_TRUNC|O_NONBLOCK, S_IRUSR|S_IWUSR );
1187-
if( FD_UNLIKELY( -1==ctx->local_out.incremental_snapshot_fd ) ) FD_LOG_ERR(( "open() failed `%s` (%i-%s)", ctx->local_out.incremental_snapshot_path, errno, fd_io_strerror( errno ) ));
1188-
} else {
1189-
ctx->local_out.incremental_snapshot_fd = -1;
1214+
/* By default, the snaprd tile selects peers and its initial state
1215+
is WAITING_FOR_PEERS. */
1216+
ctx->config.peer_selection = 1;
1217+
ctx->state = FD_SNAPRD_STATE_WAITING_FOR_PEERS;
1218+
ctx->deadline_nanos = fd_log_wallclock() + FD_SNAPRD_WAITING_FOR_PEERS_TIMEOUT_DEADLINE_NANOS;
11901219
}
11911220
}
11921221

0 commit comments

Comments
 (0)