add testSparkPlanMetricsWithPredicates and comments for sort time

seancxmao · seancxmao · commit c3336d8568f0 · 2018-12-28T18:21:40.000+08:00
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -202,19 +202,14 @@ class SQLMetricsSuite extends SparkFunSuite with SQLMetricsTestUtils with Shared
     // Because of SPARK-25267, ConvertToLocalRelation is disabled in the test cases of sql/core,
     // so Project here is not collapsed into LocalTableScan.
     val df = Seq(1, 3, 2).toDF("id").sort('id)
-    val metrics = getSparkPlanMetrics(df, 2, Set(0))
-    assert(metrics.isDefined)
-    val sortMetrics = metrics.get.get(0).get
-    // Check node 0 is Sort node
-    val operatorName = sortMetrics._1
-    assert(operatorName == "Sort")
-    // Check metrics values
-    val sortTimeStr = sortMetrics._2.get("sort time total (min, med, max)").get.toString
-    assert(timingMetricStats(sortTimeStr).forall { case (sortTime, _) => sortTime >= 0 })
-    val peakMemoryStr = sortMetrics._2.get("peak memory total (min, med, max)").get.toString
-    assert(sizeMetricStats(peakMemoryStr).forall { case (peakMemory, _) => peakMemory > 0 })
-    val spillSizeStr = sortMetrics._2.get("spill size total (min, med, max)").get.toString
-    assert(sizeMetricStats(spillSizeStr).forall { case (spillSize, _) => spillSize >= 0 })
+    testSparkPlanMetricsWithPredicates(df, 2, Map(
+      0L -> (("Sort", Map(
+        // In SortExec, sort time is collected as nanoseconds, but it is converted and stored as
+        // milliseconds. So sort time may be 0 if sort is executed very fast.
+        "sort time total (min, med, max)" -> timingMetricAllStatsShould(_ >= 0),
+        "peak memory total (min, med, max)" -> sizeMetricAllStatsShould(_ > 0),
+        "spill size total (min, med, max)" -> sizeMetricAllStatsShould(_ >= 0))))
+    ))
   }
 
   test("SortMergeJoin metrics") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
@@ -190,15 +190,34 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
       df: DataFrame,
       expectedNumOfJobs: Int,
       expectedMetrics: Map[Long, (String, Map[String, Any])]): Unit = {
-    val optActualMetrics = getSparkPlanMetrics(df, expectedNumOfJobs, expectedMetrics.keySet)
+    val expectedMetricsPredicates = expectedMetrics.mapValues { case (nodeName, nodeMetrics) =>
+      (nodeName, nodeMetrics.mapValues(expectedMetricValue =>
+        (actualMetricValue: Any) => expectedMetricValue.toString === actualMetricValue)
+    )}
+    testSparkPlanMetricsWithPredicates(df, expectedNumOfJobs, expectedMetricsPredicates)
+  }
+
+  /**
+   * Call `df.collect()` and verify if the collected metrics satisfy the specified predicates.
+   * @param df `DataFrame` to run
+   * @param expectedNumOfJobs number of jobs that will run
+   * @param expectedMetricsPredicates the expected metrics predicates. The format is
+   *                        `nodeId -> (operatorName, metric name -> metric value predicate)`.
+   */
+  protected def testSparkPlanMetricsWithPredicates(
+      df: DataFrame,
+      expectedNumOfJobs: Int,
+      expectedMetricsPredicates: Map[Long, (String, Map[String, Any => Boolean])]): Unit = {
+    val optActualMetrics =
+      getSparkPlanMetrics(df, expectedNumOfJobs, expectedMetricsPredicates.keySet)
     optActualMetrics.foreach { actualMetrics =>
-      assert(expectedMetrics.keySet === actualMetrics.keySet)
-      for (nodeId <- expectedMetrics.keySet) {
-        val (expectedNodeName, expectedMetricsMap) = expectedMetrics(nodeId)
+      assert(expectedMetricsPredicates.keySet === actualMetrics.keySet)
+      for (nodeId <- expectedMetricsPredicates.keySet) {
+        val (expectedNodeName, expectedMetricsPredicatesMap) = expectedMetricsPredicates(nodeId)
         val (actualNodeName, actualMetricsMap) = actualMetrics(nodeId)
         assert(expectedNodeName === actualNodeName)
-        for (metricName <- expectedMetricsMap.keySet) {
-          assert(expectedMetricsMap(metricName).toString === actualMetricsMap(metricName))
+        for (metricName <- expectedMetricsPredicatesMap.keySet) {
+          assert(expectedMetricsPredicatesMap(metricName)(actualMetricsMap(metricName)))
         }
       }
     }
@@ -248,6 +267,28 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
   protected def timingMetricStats(metricStr: String): Seq[(Float, String)] = {
     metricStats(metricStr).map(stringToDuration)
   }
+
+  /**
+   * Returns a function to check whether all stats (sum, min, med and max) of a timing metric
+   * satisfy the specified predicate.
+   * @param predicate predicate to check stats
+   * @return function to check all stats of a timing metric
+   */
+  protected def timingMetricAllStatsShould(predicate: Float => Boolean): Any => Boolean = {
+    (timingMetric: Any) =>
+      timingMetricStats(timingMetric.toString).forall { case (duration, _) => predicate(duration) }
+  }
+
+  /**
+   * Returns a function to check whether all stats (sum, min, med and max) of a size metric satisfy
+   * the specified predicate.
+   * @param predicate predicate to check stats
+   * @return function to check all stats of a size metric
+   */
+  protected def sizeMetricAllStatsShould(predicate: Float => Boolean): Any => Boolean = {
+    (sizeMetric: Any) =>
+      sizeMetricStats(sizeMetric.toString).forall { case (bytes, _) => predicate(bytes)}
+  }
 }