@@ -231,7 +231,7 @@ def _wait_for_cluster_state(
231231 cluster_id : str ,
232232 target_state : V1ClusterState ,
233233 timeout_seconds : int = MAX_CLUSTER_WAIT_TIME ,
234- poll_duration_seconds : int = 10 ,
234+ poll_duration_seconds : int = 60 ,
235235) -> None :
236236 """_wait_for_cluster_state waits until the provided cluster has reached a desired state, or failed.
237237
@@ -307,21 +307,24 @@ def _cluster_status_long(cluster: V1GetClusterResponse, desired_state: V1Cluster
307307 duration = _format_elapsed_seconds (elapsed )
308308
309309 if current_state == V1ClusterState .FAILED :
310- return dedent (
311- f""" \
312- The requested cluster operation for cluster { cluster_id } has errors:
313- { current_reason }
310+ if not _is_retryable_error ( current_reason ):
311+ return dedent (
312+ f""" \
313+ The requested cluster operation for cluster { cluster_id } has errors:
314314
315- ---
316- We are automatically retrying, and an automated alert has been created
315+ { current_reason }
317316
318- WARNING: Any non-deleted cluster may be using resources.
319- To avoid incuring cost on your cloud provider, delete the cluster using the following command:
320- lightning delete cluster { cluster_id }
317+ --------------------------------------------------------------
321318
322- Contact [email protected] for additional help 323- """
324- )
319+ We are automatically retrying, and an automated alert has been created
320+
321+ WARNING: Any non-deleted cluster may be using resources.
322+ To avoid incuring cost on your cloud provider, delete the cluster using the following command:
323+ lightning delete cluster { cluster_id }
324+
325+ Contact [email protected] for additional help 326+ """
327+ )
325328
326329 if desired_state == current_state == V1ClusterState .RUNNING :
327330 return dedent (
@@ -352,6 +355,10 @@ def _cluster_status_long(cluster: V1GetClusterResponse, desired_state: V1Cluster
352355 raise click .ClickException (f"Unknown cluster desired state { desired_state } " )
353356
354357
358+ def _is_retryable_error (error_message : str ) -> bool :
359+ return "resources failed to delete" in error_message
360+
361+
355362def _format_elapsed_seconds (seconds : Union [float , int ]) -> str :
356363 """Turns seconds into a duration string.
357364
0 commit comments