Skip to content

Commit 4ea6c22

Browse files
committed
Rewrite and separate program cache
- Reduce funk key size to 32 bytes (from 40 bytes) - Move program cache to separate funk instance and wksp - Restructure program cache logic / general cleanup - Reduce verbosity in API documentation - Clean up API naming - Split up API into a few smaller headers - Reduce program cache fill allocations and memory copies (directly populate calldests map, skip calldests map alloc for newer sBPF versions) - Remove broken/unused concurrency model in previous program cache implementation - Remove strict instruction logging requirements (no program cache tombstone entries for obviously invalid programs, such as invalid section table) - Simplify cache invalidation logic ("queue program for re- verification"): now just inserts a tombstone - Reuse cache entries across forks (cache entries tracked at fork graph nodes determined by program modify slot, instead of program load slot) - Remove program pre-loading - Make program cache fills shared memory concurrent (in exec tiles)
1 parent 8fe4ae4 commit 4ea6c22

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+3040
-2245
lines changed

book/api/metrics-generated.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,3 +1057,19 @@
10571057
| <span class="metrics-name">backt_&#8203;start_&#8203;slot</span> | gauge | The slot at which the backtest started |
10581058

10591059
</div>
1060+
1061+
## Exec Tile
1062+
1063+
<div class="metrics">
1064+
1065+
| Metric | Type | Description |
1066+
|--------|------|-------------|
1067+
| <span class="metrics-name">exec_&#8203;progcache_&#8203;misses</span> | counter | Number of program cache misses |
1068+
| <span class="metrics-name">exec_&#8203;progcache_&#8203;hits</span> | counter | Number of program cache hits |
1069+
| <span class="metrics-name">exec_&#8203;progcache_&#8203;fills</span> | counter | Number of program cache insertions |
1070+
| <span class="metrics-name">exec_&#8203;progcache_&#8203;fill_&#8203;tot_&#8203;sz</span> | counter | Total number of bytes inserted into program cache |
1071+
| <span class="metrics-name">exec_&#8203;progcache_&#8203;fill_&#8203;fails</span> | counter | Number of program cache load fails (tombstones inserted) |
1072+
| <span class="metrics-name">exec_&#8203;progcache_&#8203;dup_&#8203;inserts</span> | counter | Number of time two tiles raced to insert the same cache entry |
1073+
| <span class="metrics-name">exec_&#8203;progcache_&#8203;invalidations</span> | counter | Number of program cache invalidations |
1074+
1075+
</div>

src/app/firedancer-dev/commands/backtest.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "../../../ballet/lthash/fd_lthash.h"
2929
#include "../../../flamenco/runtime/context/fd_capture_ctx.h"
3030
#include "../../../disco/pack/fd_pack_cost.h"
31+
#include "../../../flamenco/progcache/fd_progcache_admin.h"
3132

3233
#include "../main.h"
3334

@@ -77,9 +78,16 @@ backtest_topo( config_t * config ) {
7778
config->firedancer.funk.max_database_transactions,
7879
config->firedancer.funk.heap_size_gib,
7980
config->firedancer.funk.lock_pages );
80-
8181
fd_topob_tile_uses( topo, replay_tile, funk_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
8282

83+
fd_topob_wksp( topo, "progcache" );
84+
fd_topo_obj_t * progcache_obj = setup_topo_progcache( topo, "progcache",
85+
fd_progcache_est_rec_max( config->firedancer.runtime.program_cache.heap_size_mib<<20,
86+
config->firedancer.runtime.program_cache.mean_cache_entry_size ),
87+
config->firedancer.funk.max_database_transactions,
88+
config->firedancer.runtime.program_cache.heap_size_mib<<20 );
89+
fd_topob_tile_uses( topo, replay_tile, progcache_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
90+
8391
/**********************************************************************/
8492
/* Add the executor tiles to topo */
8593
/**********************************************************************/

src/app/firedancer/config/default.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,17 @@ user = ""
533533
# max_fork_width, the client will crash.
534534
max_fork_width = 32
535535

536+
# The program cache pre-loads frequently executed programs for
537+
# faster transaction execution.
538+
[runtime.program_cache]
539+
# The size of the loaded program cache in MiB.
540+
heap_size_mib = 2048
541+
542+
# The mean expected heap utilization of a cache entry. Controls
543+
# the size of metadata structures (e.g. cache entry table). It
544+
# is not recommended to change this setting.
545+
mean_cache_entry_size = 131072
546+
536547
# This section configures the "groove" persistent account database.
537548
# [groove]
538549
# ...

src/app/firedancer/topology.c

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
#include "../../discof/restore/utils/fd_ssmsg.h"
2222
#include "../../flamenco/gossip/fd_gossip.h"
2323
#include "../../flamenco/runtime/context/fd_capture_ctx.h"
24-
#include "../../funk/fd_funk.h" /* funk_footprint() */
24+
#include "../../flamenco/progcache/fd_progcache_admin.h"
2525

2626
#include <sys/random.h>
2727
#include <sys/types.h>
@@ -99,6 +99,35 @@ setup_topo_funk( fd_topo_t * topo,
9999
return obj;
100100
}
101101

102+
fd_topo_obj_t *
103+
setup_topo_progcache( fd_topo_t * topo,
104+
char const * wksp_name,
105+
ulong max_cache_entries,
106+
ulong max_database_transactions,
107+
ulong heap_size ) {
108+
fd_topo_obj_t * obj = fd_topob_obj( topo, "funk", wksp_name );
109+
FD_TEST( fd_pod_insert_ulong( topo->props, "progcache", obj->id ) );
110+
FD_TEST( fd_pod_insertf_ulong( topo->props, max_cache_entries, "obj.%lu.rec_max", obj->id ) );
111+
FD_TEST( fd_pod_insertf_ulong( topo->props, max_database_transactions, "obj.%lu.txn_max", obj->id ) );
112+
FD_TEST( fd_pod_insertf_ulong( topo->props, heap_size, "obj.%lu.heap_max", obj->id ) );
113+
ulong funk_footprint = fd_funk_footprint( max_database_transactions, max_cache_entries );
114+
if( FD_UNLIKELY( !funk_footprint ) ) FD_LOG_ERR(( "Invalid [runtime.program_cache] parameters" ));
115+
if( FD_UNLIKELY( heap_size<(2*funk_footprint) ) ) {
116+
FD_LOG_ERR(( "Invalid [runtime.program_cache] parameters: heap_size_mib should be at least %lu",
117+
( 4*funk_footprint )>>20 ));
118+
}
119+
120+
/* Increase workspace partition count */
121+
ulong wksp_idx = fd_topo_find_wksp( topo, wksp_name );
122+
FD_TEST( wksp_idx!=ULONG_MAX );
123+
fd_topo_wksp_t * wksp = &topo->workspaces[ wksp_idx ];
124+
ulong part_max = fd_wksp_part_max_est( heap_size, 1U<<14U );
125+
if( FD_UNLIKELY( !part_max ) ) FD_LOG_ERR(( "fd_wksp_part_max_est(%lu,16KiB) failed", funk_footprint ));
126+
wksp->part_max += part_max;
127+
128+
return obj;
129+
}
130+
102131
fd_topo_obj_t *
103132
setup_topo_store( fd_topo_t * topo,
104133
char const * wksp_name,
@@ -276,8 +305,8 @@ fd_topo_initialize( config_t * config ) {
276305
fd_topob_wksp( topo, "poh_shred" );
277306
fd_topob_wksp( topo, "poh_replay" );
278307

279-
/* TODO: WTF are these for? */
280308
fd_topob_wksp( topo, "funk" );
309+
fd_topob_wksp( topo, "progcache" );
281310
fd_topob_wksp( topo, "bh_cmp" );
282311
fd_topob_wksp( topo, "fec_sets" );
283312
fd_topob_wksp( topo, "txncache" );
@@ -708,6 +737,15 @@ fd_topo_initialize( config_t * config ) {
708737
FOR(resolv_tile_cnt) fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "resolv", i ) ], banks_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
709738
FD_TEST( fd_pod_insertf_ulong( topo->props, banks_obj->id, "banks" ) );
710739

740+
fd_topo_obj_t * progcache_obj = setup_topo_progcache( topo, "progcache",
741+
fd_progcache_est_rec_max( config->firedancer.runtime.program_cache.heap_size_mib<<20,
742+
config->firedancer.runtime.program_cache.mean_cache_entry_size ),
743+
config->firedancer.funk.max_database_transactions,
744+
config->firedancer.runtime.program_cache.heap_size_mib<<20 );
745+
/**/ fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "replay", 0UL ) ], progcache_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
746+
FOR(exec_tile_cnt) fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "exec", i ) ], progcache_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
747+
FOR(bank_tile_cnt) fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "bank", i ) ], progcache_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
748+
711749
/* TODO: This should not exist in production */
712750
fd_topo_obj_t * bank_hash_cmp_obj = setup_topo_bank_hash_cmp( topo, "bh_cmp" );
713751
/**/ fd_topob_tile_uses( topo, &topo->tiles[ fd_topo_find_tile( topo, "replay", 0UL ) ], bank_hash_cmp_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
@@ -901,8 +939,9 @@ fd_topo_configure_tile( fd_topo_tile_t * tile,
901939

902940
tile->replay.tx_metadata_storage = config->rpc.extended_tx_metadata_storage;
903941

904-
tile->replay.txncache_obj_id = fd_pod_query_ulong( config->topo.props, "txncache", ULONG_MAX );
905-
tile->replay.funk_obj_id = fd_pod_query_ulong( config->topo.props, "funk", ULONG_MAX );
942+
tile->replay.txncache_obj_id = fd_pod_query_ulong( config->topo.props, "txncache", ULONG_MAX ); FD_TEST( tile->replay.txncache_obj_id !=ULONG_MAX );
943+
tile->replay.funk_obj_id = fd_pod_query_ulong( config->topo.props, "funk", ULONG_MAX ); FD_TEST( tile->replay.funk_obj_id !=ULONG_MAX );
944+
tile->replay.progcache_obj_id = fd_pod_query_ulong( config->topo.props, "progcache", ULONG_MAX ); FD_TEST( tile->replay.progcache_obj_id!=ULONG_MAX );
906945

907946
strncpy( tile->replay.cluster_version, config->tiles.replay.cluster_version, sizeof(tile->replay.cluster_version) );
908947

@@ -925,8 +964,9 @@ fd_topo_configure_tile( fd_topo_tile_t * tile,
925964

926965
} else if( FD_UNLIKELY( !strcmp( tile->name, "exec" ) ) ) {
927966

928-
tile->exec.funk_obj_id = fd_pod_query_ulong( config->topo.props, "funk", ULONG_MAX );
929-
tile->exec.txncache_obj_id = fd_pod_query_ulong( config->topo.props, "txncache", ULONG_MAX );
967+
tile->exec.funk_obj_id = fd_pod_query_ulong( config->topo.props, "funk", ULONG_MAX ); FD_TEST( tile->exec.funk_obj_id !=ULONG_MAX );
968+
tile->exec.txncache_obj_id = fd_pod_query_ulong( config->topo.props, "txncache", ULONG_MAX ); FD_TEST( tile->exec.txncache_obj_id !=ULONG_MAX );
969+
tile->exec.progcache_obj_id = fd_pod_query_ulong( config->topo.props, "progcache", ULONG_MAX ); FD_TEST( tile->exec.progcache_obj_id!=ULONG_MAX );
930970

931971
tile->exec.max_live_slots = config->firedancer.runtime.max_live_slots;
932972

@@ -999,8 +1039,9 @@ fd_topo_configure_tile( fd_topo_tile_t * tile,
9991039
}
10001040

10011041
} else if( FD_UNLIKELY( !strcmp( tile->name, "bank" ) ) ) {
1002-
tile->bank.txncache_obj_id = fd_pod_query_ulong( config->topo.props, "txncache", ULONG_MAX );
1003-
tile->bank.funk_obj_id = fd_pod_query_ulong( config->topo.props, "funk", ULONG_MAX );
1042+
tile->bank.txncache_obj_id = fd_pod_query_ulong( config->topo.props, "txncache", ULONG_MAX );
1043+
tile->bank.funk_obj_id = fd_pod_query_ulong( config->topo.props, "funk", ULONG_MAX );
1044+
tile->bank.progcache_obj_id = fd_pod_query_ulong( config->topo.props, "progcache", ULONG_MAX );
10041045

10051046
tile->bank.max_live_slots = config->firedancer.runtime.max_live_slots;
10061047

src/app/firedancer/topology.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@ setup_topo_funk( fd_topo_t * topo,
3030
ulong heap_size_gib,
3131
int lock_pages );
3232

33+
fd_topo_obj_t *
34+
setup_topo_progcache( fd_topo_t * topo,
35+
char const * wksp_name,
36+
ulong max_cache_entries,
37+
ulong max_database_transactions,
38+
ulong heap_size_gib );
39+
3340
fd_topo_obj_t *
3441
setup_topo_runtime_pub( fd_topo_t * topo,
3542
char const * wksp_name,

src/app/shared/fd_config.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ struct fd_configf {
112112
ulong max_live_slots;
113113
ulong max_vote_accounts;
114114
ulong max_fork_width;
115+
116+
struct {
117+
ulong heap_size_mib;
118+
ulong mean_cache_entry_size;
119+
} program_cache;
115120
} runtime;
116121

117122
struct {

src/app/shared/fd_config_parse.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ fd_config_extract_podf( uchar * pod,
9191
CFG_POP ( ulong, runtime.max_vote_accounts );
9292
CFG_POP ( ulong, runtime.max_fork_width );
9393

94+
CFG_POP ( ulong, runtime.program_cache.heap_size_mib );
95+
CFG_POP ( ulong, runtime.program_cache.mean_cache_entry_size );
96+
9497
CFG_POP ( ulong, store.max_completed_shred_sets );
9598

9699
CFG_POP ( bool, snapshots.incremental_snapshots );
34.6 KB
Binary file not shown.

src/disco/metrics/generate/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class Tile(Enum):
3535
BANKF = 29
3636
RESOLF = 30
3737
BACKT = 31
38+
EXEC = 32
3839

3940
class MetricType(Enum):
4041
COUNTER = 0

src/disco/metrics/generated/fd_metrics_all.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ const char * FD_METRICS_TILE_KIND_NAMES[FD_METRICS_TILE_KIND_CNT] = {
6464
"bankf",
6565
"resolf",
6666
"backt",
67+
"exec",
6768
};
6869

6970
const ulong FD_METRICS_TILE_KIND_SIZES[FD_METRICS_TILE_KIND_CNT] = {
@@ -95,6 +96,7 @@ const ulong FD_METRICS_TILE_KIND_SIZES[FD_METRICS_TILE_KIND_CNT] = {
9596
FD_METRICS_BANKF_TOTAL,
9697
FD_METRICS_RESOLF_TOTAL,
9798
FD_METRICS_BACKT_TOTAL,
99+
FD_METRICS_EXEC_TOTAL,
98100
};
99101
const fd_metrics_meta_t * FD_METRICS_TILE_KIND_METRICS[FD_METRICS_TILE_KIND_CNT] = {
100102
FD_METRICS_NET,
@@ -125,4 +127,5 @@ const fd_metrics_meta_t * FD_METRICS_TILE_KIND_METRICS[FD_METRICS_TILE_KIND_CNT]
125127
FD_METRICS_BANKF,
126128
FD_METRICS_RESOLF,
127129
FD_METRICS_BACKT,
130+
FD_METRICS_EXEC,
128131
};

0 commit comments

Comments
 (0)