From df2fc00ae3cd498e55b355aff5412a393808fda9 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Thu, 18 May 2017 23:35:09 +0800 Subject: [PATCH] Log the driver exit status explicitly. Close apache-spark-on-k8s/spark#276 --- .../spark/deploy/kubernetes/submit/v1/Client.scala | 10 +++++++++- .../submit/v1/LoggingPodStatusWatcher.scala | 12 ++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index 8f1e356bec8ca..b370d088aa630 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -231,7 +231,15 @@ private[spark] class Client( if (waitForAppCompletion) { logInfo(s"Waiting for application $kubernetesAppId to finish...") driverPodCompletedLatch.await() - logInfo(s"Application $kubernetesAppId finished.") + + val exitMessage = loggingWatch.getDriverPodExitStatus match { + case (Some(error), _) => s"failed with error: $error" + case (None, 0) => "finished successfully" + case (None, code) => + s"failed with exit code $code. You may want to check the driver pod logs." + } + + logInfo(s"Application $kubernetesAppId $exitMessage") } else { logInfo(s"Application $kubernetesAppId successfully launched.") } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala index 537bcccaa1458..94de9e754f305 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala @@ -52,6 +52,9 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL private def status: String = pod.map(_.getStatus().getContainerStatuses().toString()) .getOrElse("unknown") + private var driverPodExitCode: Int = 0 + private var driverPodErrorMessage: Option[String] = None + def start(): Unit = { if (interval > 0) { scheduler.scheduleAtFixedRate(logRunnable, 0, interval, TimeUnit.MILLISECONDS) @@ -62,14 +65,17 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL this.pod = Option(pod) action match { case Action.DELETED => + driverPodErrorMessage = Some("the driver pod was deleted") closeWatch() case Action.ERROR => + driverPodErrorMessage = Some("error happened with the driver pod") closeWatch() case _ => logLongStatus() if (hasCompleted()) { + driverPodExitCode = getDriverPodExitCode closeWatch() } } @@ -129,4 +135,10 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL s"\n\t $k: $newValue" }.mkString("") } + + def getDriverPodExitCode: Int = { + pod.get.getStatus().getContainerStatuses().asScala.last.getState.getTerminated.getExitCode + } + + def getDriverPodExitStatus: (Option[String], Int) = (driverPodErrorMessage, driverPodExitCode) }