@@ -334,20 +334,22 @@ def create_dp_placement_groups(
334334 "No nodes with resources found in Ray cluster." )
335335 assert dp_master_ip_key in nodes [0 ], (
336336 "The DP master node (ip: %s) is missing or dead" , dp_master_ip )
337+ device_str = current_platform .ray_device_key
337338 for node_resources in nodes :
338- if "GPU" not in node_resources :
339+ if device_str not in node_resources :
339340 continue
340341 # For now, each DP rank can only be assigned to one node
341342 # TODO(rui): support allocating a single DP rank
342343 # to multiple nodes
343- available_engine_count = int (node_resources ["GPU" ]) // world_size
344+ available_engine_count = int (
345+ node_resources [device_str ]) // world_size
344346 if dp_master_ip_key in node_resources :
345347 assert available_engine_count >= local_engine_count , (
346348 "Not enough resources to allocate DP ranks "
347349 f"on DP master node { dp_master_ip } " )
348350 for i in range (local_engine_count ):
349351 bundles = [{
350- "GPU" : 1.0 ,
352+ device_str : 1.0 ,
351353 "node:" + dp_master_ip : 0.001
352354 }] * world_size + [{
353355 "CPU" : 1.0
@@ -363,7 +365,7 @@ def create_dp_placement_groups(
363365 for i in range (available_engine_count ):
364366 if len (placement_groups ) == num_pg_to_create :
365367 break
366- bundles = [{"GPU" : 1.0 }] * world_size + [{"CPU" : 1.0 }]
368+ bundles = [{device_str : 1.0 }] * world_size + [{"CPU" : 1.0 }]
367369 pg = ray .util .placement_group (
368370 name = f"dp_rank_{ len (placement_groups )} " ,
369371 strategy = "STRICT_PACK" ,
@@ -415,17 +417,18 @@ def add_dp_placement_groups(
415417 local_dp_ranks = []
416418 num_pg_created = 0
417419
420+ device_str = current_platform .ray_device_key
418421 for node in nodes :
419422 if num_pg_created >= num_pg_to_create :
420423 break
421424
422425 node_ip = node .node_ip
423426 node_id = node .node_id
424- available_gpus = int (available_resources [node_id ]["GPU" ])
427+ available_gpus = int (available_resources [node_id ][device_str ])
425428
426429 # Get total GPUs on this node from the node's resources
427430 # Ray stores node resources with node ID as key
428- total_gpus = int (total_resources [node_id ]["GPU" ])
431+ total_gpus = int (total_resources [node_id ][device_str ])
429432
430433 # Calculate used GPUs and used engines on this node
431434 used_gpus = max (0 , total_gpus - available_gpus )
@@ -444,13 +447,13 @@ def add_dp_placement_groups(
444447 # Create bundles with node constraint for master node
445448 if node_ip == dp_master_ip :
446449 bundles = [{
447- "GPU" : 1.0 ,
450+ device_str : 1.0 ,
448451 "node:" + dp_master_ip : 0.001
449452 }] * world_size + [{
450453 "CPU" : 1.0
451454 }]
452455 else :
453- bundles = [{"GPU" : 1.0 }] * world_size + [{"CPU" : 1.0 }]
456+ bundles = [{device_str : 1.0 }] * world_size + [{"CPU" : 1.0 }]
454457
455458 pg = ray .util .placement_group (
456459 name = f"dp_rank_{ rank } " ,
0 commit comments