@@ -1041,3 +1041,297 @@ def test_no_batches_mixed_with_prompt_tokens_and_prompt_embeds():
10411041 for seq in scheduled_seq_group .seq_group .seqs :
10421042 seq .status = SequenceStatus .FINISHED_STOPPED
10431043 scheduler .free_finished_seq_groups ()
1044+
1045+
1046+ def test_remove_seq_from_computed_blocks_tracker ():
1047+ """
1048+ Test that computed_blocks_tracker correctly removes stale sequences
1049+ during scheduling.
1050+
1051+ The test covers 9 scheduling branches where stale seqs are removed:
1052+ - 1 in _schedule_swapped
1053+ - 1 in _schedule_priority_preemption
1054+ - 7 in _schedule_prefill
1055+
1056+ Each branch is tested to ensure proper cleanup of
1057+ _seq_id_to_num_tokens_computed.
1058+ """
1059+ # Budget can not schedule in swapped
1060+ block_size = 2
1061+ max_seq_group = 3
1062+ seq_tokens_with_swapped : list [list [int ]] = []
1063+ blocks_to_swap_out : list [tuple [int , int ]] = []
1064+ curr_loras : set [int ] = set ()
1065+
1066+ scheduler = initialize_scheduler (
1067+ block_size = block_size ,
1068+ num_cpu_blocks = 64 ,
1069+ num_gpu_blocks = 16 ,
1070+ max_num_seqs = max_seq_group ,
1071+ enable_prefix_caching = True ,
1072+ )
1073+ budget = create_token_budget (token_budget = 15 )
1074+
1075+ seq_length = 16
1076+ num_seqs = 3
1077+ for i in range (num_seqs ):
1078+ seq_tokens_with_swapped .append ([i ] * seq_length )
1079+
1080+ seq_and_seq_groups = [
1081+ create_dummy_prompt (f"{ i } " ,
1082+ prompt_tokens = seq_tokens_with_swapped [i ],
1083+ block_size = block_size )
1084+ for i in range (len (seq_tokens_with_swapped ))
1085+ ]
1086+
1087+ for _ , seq_group in seq_and_seq_groups :
1088+ scheduler ._allocate_and_set_running (seq_group )
1089+ scheduler ._swap_out (seq_group , blocks_to_swap_out )
1090+ scheduler ._add_seq_group_to_swapped (seq_group )
1091+
1092+ scheduler ._schedule_swapped (budget , curr_loras )
1093+ seq_id_to_num_tokens_computed = (
1094+ scheduler .block_manager ._computed_blocks_tracker .
1095+ _seq_id_to_num_tokens_computed .get (1 ))
1096+ assert seq_id_to_num_tokens_computed is None
1097+
1098+ # Prefill schedule don't have a space for another LoRA, so
1099+ # we ignore this request for now.
1100+ block_size = 4
1101+ lora_config = LoRAConfig (max_lora_rank = 8 , max_loras = 1 )
1102+ scheduler = initialize_scheduler (lora_config = lora_config ,
1103+ block_size = block_size ,
1104+ num_cpu_blocks = 64 ,
1105+ num_gpu_blocks = 64 ,
1106+ enable_prefix_caching = True )
1107+ budget = create_token_budget (token_budget = 120 )
1108+ num_seqs = 2
1109+ for i in range (num_seqs ):
1110+ _ , seq_group = create_dummy_prompt (str (i ),
1111+ prompt_length = seq_length ,
1112+ block_size = block_size ,
1113+ lora_request = LoRARequest (
1114+ lora_name = str (i ),
1115+ lora_int_id = i + 1 ,
1116+ lora_path = "abc" ))
1117+ scheduler .add_seq_group (seq_group )
1118+
1119+ scheduler ._schedule_prefills (budget , curr_loras )
1120+ seq_id_to_num_tokens_computed = (
1121+ scheduler .block_manager ._computed_blocks_tracker .
1122+ _seq_id_to_num_tokens_computed .get (1 ))
1123+ assert seq_id_to_num_tokens_computed is None
1124+
1125+ # Priority preemption schedule
1126+ scheduler ._schedule_priority_preemption (budget )
1127+ seq_id_to_num_tokens_computed = (
1128+ scheduler .block_manager ._computed_blocks_tracker .
1129+ _seq_id_to_num_tokens_computed .get (1 ))
1130+ assert seq_id_to_num_tokens_computed is None
1131+
1132+ # Prefill scheduler does not schedule batches with prompt tokens and
1133+ # prompt embeddings co-mingled.
1134+ block_size = 2
1135+ max_seq_group = 3
1136+ scheduler = initialize_scheduler (
1137+ block_size = block_size ,
1138+ num_cpu_blocks = 16 ,
1139+ num_gpu_blocks = 16 ,
1140+ max_num_seqs = max_seq_group ,
1141+ max_model_len = 100 ,
1142+ enable_prefix_caching = True ,
1143+ )
1144+ seq_length = 7
1145+ embedding_size = 5
1146+ seq_tokens_with_embedding : list [list [int ]] = []
1147+ seq_embeds : list [Optional [torch .Tensor ]] = []
1148+
1149+ seq_tokens_with_embedding .append (list (range (seq_length )))
1150+ seq_embeds .append (None )
1151+ seq_tokens_with_embedding .append ([0 ] * seq_length )
1152+ seq_embeds .append (torch .rand (embedding_size ))
1153+
1154+ seq_and_seq_groups = [
1155+ create_dummy_prompt (f"{ i } " ,
1156+ prompt_tokens = seq_tokens_with_embedding [i ],
1157+ prompt_embeds = seq_embeds [i ],
1158+ block_size = block_size )
1159+ for i in range (len (seq_tokens_with_embedding ))
1160+ ]
1161+
1162+ for _ , seq_group in seq_and_seq_groups :
1163+ scheduler .add_seq_group (seq_group )
1164+
1165+ scheduler ._schedule_default ()
1166+ seq_id_to_num_tokens_computed = (
1167+ scheduler .block_manager ._computed_blocks_tracker .
1168+ _seq_id_to_num_tokens_computed .get (1 ))
1169+ assert seq_id_to_num_tokens_computed is None
1170+
1171+ # Prefill scheduler budget num_batched_tokens
1172+ # >= scheduler_config max_num_batched_tokens
1173+ block_size = 2
1174+ max_seq_group = 3
1175+ seq_tokens_prefill_budget : list [list [int ]] = []
1176+
1177+ scheduler = initialize_scheduler (
1178+ block_size = block_size ,
1179+ max_token_budget = 8 ,
1180+ num_cpu_blocks = 16 ,
1181+ num_gpu_blocks = 16 ,
1182+ max_num_seqs = max_seq_group ,
1183+ max_model_len = 5 ,
1184+ enable_prefix_caching = True ,
1185+ )
1186+ seq_length = 4
1187+ num_seqs = 3
1188+ for i in range (num_seqs ):
1189+ seq_tokens_prefill_budget .append ([i ] * seq_length )
1190+
1191+ seq_and_seq_groups = [
1192+ create_dummy_prompt (f"{ i } " ,
1193+ prompt_tokens = seq_tokens_prefill_budget [i ],
1194+ block_size = block_size )
1195+ for i in range (len (seq_tokens_prefill_budget ))
1196+ ]
1197+
1198+ for _ , seq_group in seq_and_seq_groups :
1199+ scheduler .add_seq_group (seq_group )
1200+
1201+ scheduler ._schedule_default ()
1202+ seq_id_to_num_tokens_computed = (
1203+ scheduler .block_manager ._computed_blocks_tracker .
1204+ _seq_id_to_num_tokens_computed .get (2 ))
1205+ assert seq_id_to_num_tokens_computed is None
1206+
1207+ # Budget can not schedule in waiting
1208+ block_size = 2
1209+ max_seq_group = 3
1210+
1211+ scheduler = initialize_scheduler (
1212+ block_size = block_size ,
1213+ max_token_budget = 30 ,
1214+ num_cpu_blocks = 16 ,
1215+ num_gpu_blocks = 16 ,
1216+ max_num_seqs = max_seq_group ,
1217+ max_model_len = 30 ,
1218+ enable_prefix_caching = True ,
1219+ )
1220+ seq_length = 16
1221+ num_seqs = 3
1222+ seq_tokens_prefill_budget_waiting : list [list [int ]] = []
1223+
1224+ for i in range (num_seqs ):
1225+ seq_tokens_prefill_budget_waiting .append (list (range (seq_length )))
1226+
1227+ seq_and_seq_groups = [
1228+ create_dummy_prompt (f"{ i } " ,
1229+ prompt_tokens = seq_tokens_prefill_budget_waiting [i ],
1230+ block_size = block_size )
1231+ for i in range (len (seq_tokens_prefill_budget_waiting ))
1232+ ]
1233+
1234+ for _ , seq_group in seq_and_seq_groups :
1235+ scheduler .add_seq_group (seq_group )
1236+
1237+ scheduler ._schedule_default ()
1238+ seq_id_to_num_tokens_computed = (
1239+ scheduler .block_manager ._computed_blocks_tracker .
1240+ _seq_id_to_num_tokens_computed .get (1 ))
1241+ assert seq_id_to_num_tokens_computed is None
1242+
1243+ # Sequence num_new_tokens > prompt_limit marked FINISHED_IGNORED
1244+ block_size = 2
1245+ max_seq_group = 3
1246+ scheduler = initialize_scheduler (
1247+ block_size = block_size ,
1248+ num_cpu_blocks = 16 ,
1249+ num_gpu_blocks = 16 ,
1250+ max_num_seqs = max_seq_group ,
1251+ max_model_len = 30 ,
1252+ enable_prefix_caching = True ,
1253+ )
1254+
1255+ seq_length = 31
1256+ seq_tokens_prompt_limit : list [list [int ]] = []
1257+ seq_tokens_prompt_limit .append (list (range (seq_length )))
1258+ seq_and_seq_groups = [
1259+ create_dummy_prompt ("0" ,
1260+ prompt_tokens = seq_tokens_prompt_limit [0 ],
1261+ block_size = block_size )
1262+ ]
1263+ for _ , seq_group in seq_and_seq_groups :
1264+ scheduler .add_seq_group (seq_group )
1265+ scheduler ._schedule_default ()
1266+ seq_id_to_num_tokens_computed = (
1267+ scheduler .block_manager ._computed_blocks_tracker .
1268+ _seq_id_to_num_tokens_computed .get (0 ))
1269+ assert seq_id_to_num_tokens_computed is None
1270+
1271+ # Budget can not allocate, AllocStatus is NEVER marked FINISHED_IGNORED
1272+ block_size = 2
1273+ max_seq_group = 3
1274+ scheduler = initialize_scheduler (
1275+ block_size = block_size ,
1276+ num_cpu_blocks = 160 ,
1277+ num_gpu_blocks = 160 ,
1278+ max_num_seqs = max_seq_group ,
1279+ max_model_len = 320 ,
1280+ enable_prefix_caching = True ,
1281+ )
1282+
1283+ seq_length = 320
1284+ num_seqs = 1
1285+ seq_tokens_never : list [list [int ]] = []
1286+ for i in range (num_seqs ):
1287+ seq_tokens_never .append (list (range (seq_length )))
1288+
1289+ seq_and_seq_groups = [
1290+ create_dummy_prompt (f"{ i } " ,
1291+ prompt_tokens = seq_tokens_never [i ],
1292+ block_size = block_size )
1293+ for i in range (len (seq_tokens_never ))
1294+ ]
1295+
1296+ for _ , seq_group in seq_and_seq_groups :
1297+ scheduler .add_seq_group (seq_group )
1298+
1299+ scheduler ._schedule_default ()
1300+ seq_id_to_num_tokens_computed = (
1301+ scheduler .block_manager ._computed_blocks_tracker .
1302+ _seq_id_to_num_tokens_computed .get (0 ))
1303+ assert seq_id_to_num_tokens_computed is None
1304+
1305+ # Budget can not allocate, AllocStatus is LATER
1306+ block_size = 2
1307+ max_seq_group = 3
1308+ scheduler = initialize_scheduler (
1309+ block_size = block_size ,
1310+ num_cpu_blocks = 160 ,
1311+ num_gpu_blocks = 160 ,
1312+ max_num_seqs = max_seq_group ,
1313+ max_model_len = 320 ,
1314+ enable_prefix_caching = True ,
1315+ )
1316+
1317+ seq_length = 160
1318+ num_seqs = 2
1319+ seq_tokens_later : list [list [int ]] = []
1320+ for i in range (num_seqs ):
1321+ seq_tokens_later .append (list (range (seq_length )))
1322+
1323+ seq_and_seq_groups = [
1324+ create_dummy_prompt (f"{ i } " ,
1325+ prompt_tokens = seq_tokens_later [i ],
1326+ block_size = block_size )
1327+ for i in range (len (seq_tokens_later ))
1328+ ]
1329+
1330+ for _ , seq_group in seq_and_seq_groups :
1331+ scheduler .add_seq_group (seq_group )
1332+
1333+ scheduler ._schedule_default ()
1334+ seq_id_to_num_tokens_computed = (
1335+ scheduler .block_manager ._computed_blocks_tracker .
1336+ _seq_id_to_num_tokens_computed .get (1 ))
1337+ assert seq_id_to_num_tokens_computed is None
0 commit comments