@@ -848,7 +848,7 @@ def _best_effort_node_assignment(
848848 # No instances launched at all, e.g. CreateFleet API returns no EC2 instances,
849849 # or no left instances available from a best-effort EC2 launch
850850 logger .info ("No launched instances found for nodes %s" , print_with_count (nodes_resume_list ))
851- self ._update_failed_nodes (set (nodes_resume_list ), "InsufficientInstanceCapacity" , override = False )
851+ self ._update_failed_nodes (set (nodes_resume_list ), "InsufficientInstanceCapacity(Check slurm_resume log for ec2 error codes) " , override = False )
852852
853853 def _all_or_nothing_node_assignment (
854854 self ,
@@ -903,7 +903,7 @@ def _all_or_nothing_node_assignment(
903903 # No instances launched at all, e.g. CreateFleet API returns no EC2 instances,
904904 # or no left instances available from a best-effort EC2 launch
905905 logger .info ("No launched instances found for nodes %s" , print_with_count (nodes_resume_list ))
906- self ._update_failed_nodes (set (nodes_resume_list ), "InsufficientInstanceCapacity" , override = False )
906+ self ._update_failed_nodes (set (nodes_resume_list ), "InsufficientInstanceCapacity(Check slurm_resume log for ec2 error codes) " , override = False )
907907
908908 def _launch_instances ( # noqa: C901
909909 self ,
@@ -986,7 +986,7 @@ def _launch_ec2_instances(self, batch_nodes, compute_resource, fleet_manager, in
986986 # queue_2: {cr_3: list[EC2Instance]}
987987 # }
988988 else :
989- self ._update_failed_nodes (set (batch_nodes ), "InsufficientInstanceCapacity" )
989+ self ._update_failed_nodes (set (batch_nodes ), "InsufficientInstanceCapacity(Check slurm_resume log for ec2 error codes) " )
990990 return launched_ec2_instances
991991
992992 def _get_fleet_manager (self , all_or_nothing_batch , compute_resource , queue ):
0 commit comments