remove unintended changes

shipra-a · shipra-a · commit a7e97ddf3c46 · 2021-06-30T18:20:59.000-07:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -69,7 +69,8 @@ case class HashAggregateExec(
       "aggTime" -> SQLMetrics.createTimingMetric(sparkContext, "time in aggregation build"),
       "avgHashProbe" ->
         SQLMetrics.createAverageMetric(sparkContext, "avg hash probe bucket list iters"),
-      "numTasksFallBacked" -> SQLMetrics.createMetric(sparkContext, "number of sort fallback tasks"))
+      "numTasksFallBacked" -> SQLMetrics.createMetric(sparkContext,
+        "number of sort fallback tasks"))
     if (skipPartialAggregateEnabled) {
       metrics ++ Map("partialAggSkipped" -> SQLMetrics.createMetric(sparkContext,
         "number of skipped records for partial aggregates"))
@@ -78,23 +79,6 @@ case class HashAggregateExec(
     }
   }
 
-  override def output: Seq[Attribute] = resultExpressions.map(_.toAttribute)
-
-  override protected def outputExpressions: Seq[NamedExpression] = resultExpressions
-
-  override def producedAttributes: AttributeSet =
-    AttributeSet(aggregateAttributes) ++
-    AttributeSet(resultExpressions.diff(groupingExpressions).map(_.toAttribute)) ++
-    AttributeSet(aggregateBufferAttributes)
-
-  override def requiredChildDistribution: List[Distribution] = {
-    requiredChildDistributionExpressions match {
-      case Some(exprs) if exprs.isEmpty => AllTuples :: Nil
-      case Some(exprs) if exprs.nonEmpty => ClusteredDistribution(exprs) :: Nil
-      case None => UnspecifiedDistribution :: Nil
-    }
-  }
-
   // This is for testing. We force TungstenAggregationIterator to fall back to the unsafe row hash
   // map and/or the sort-based aggregation once it has processed a given number of input rows.
   private val testFallbackStartsAt: Option[(Int, Int)] = {
@@ -520,8 +504,6 @@ case class HashAggregateExec(
 
     // merge the final hashMap into sorter
     numTasksFallBacked += 1
-    sorter.merge(hashMap.destructAndCreateExternalSorter())
-    hashMap.free()
     if (!skipPartialAggTerm) {
       sorter.merge(hashMap.destructAndCreateExternalSorter())
       hashMap.free()
@@ -724,8 +706,6 @@ case class HashAggregateExec(
 
   private def doProduceWithKeys(ctx: CodegenContext): String = {
     val initAgg = ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, "initAgg")
-    if (conf.enableTwoLevelAggMap) {
-
     var childrenConsumed: String = null
     if (skipPartialAggregateEnabled) {
       skipPartialAggTerm = ctx.
@@ -833,7 +813,8 @@ case class HashAggregateExec(
     }
 
     val finishRegularHashMap = s"$iterTerm = $thisPlan.finishAggregate(" +
-      s"$hashMapTerm, $sorterTerm, $peakMemory, $spillSize, $avgHashProbe, $numTasksFallBacked, $mapCleared);"
+      s"$hashMapTerm, $sorterTerm, $peakMemory, $spillSize, $avgHashProbe, $numTasksFallBacked," +
+      s" $mapCleared);"
     val finishHashMap = if (isFastHashMapEnabled) {
       s"""
          |$iterTermForFastHashMap = $fastHashMapTerm.rowIterator();
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.{Dataset, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.expressions.aggregate.Partial
 import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeAndComment, CodeGenerator}
 import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite
-import org.apache.spark.sql.execution.aggregate.{AggUtils, HashAggregateExec}
+import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, ShuffledHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.functions._
@@ -279,7 +279,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
         val twoJoinsDF = df1.join(df2, $"k1" < $"k2").crossJoin(df3)
         hasJoinInCodegen = twoJoinsDF.queryExecution.executedPlan.collect {
           case WholeStageCodegenExec(BroadcastNestedLoopJoinExec(
-          _: BroadcastNestedLoopJoinExec, _, _, _, _)) => true
+            _: BroadcastNestedLoopJoinExec, _, _, _, _)) => true
         }.size === 1
         assert(hasJoinInCodegen == codegenEnabled)
         checkAnswer(twoJoinsDF,
@@ -318,7 +318,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
           .join(df3, $"k1" <= $"k3", "left_outer")
         hasJoinInCodegen = twoJoinsDF.queryExecution.executedPlan.collect {
           case WholeStageCodegenExec(BroadcastNestedLoopJoinExec(
-          _: BroadcastNestedLoopJoinExec, _, _, _, _)) => true
+            _: BroadcastNestedLoopJoinExec, _, _, _, _)) => true
         }.size === 1
         assert(hasJoinInCodegen == codegenEnabled)
         checkAnswer(twoJoinsDF,
@@ -387,7 +387,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     val plan = ds.queryExecution.executedPlan
     assert(plan.find(p =>
       p.isInstanceOf[WholeStageCodegenExec] &&
-        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[SerializeFromObjectExec]).isDefined)
+      p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[SerializeFromObjectExec]).isDefined)
     assert(ds.collect() === 0.until(10).map(_.toString).toArray)
   }
 
@@ -405,7 +405,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     val plan = ds.queryExecution.executedPlan
     assert(plan.find(p =>
       p.isInstanceOf[WholeStageCodegenExec] &&
-        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[FilterExec]).isDefined)
+      p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[FilterExec]).isDefined)
     assert(ds.collect() === Array(0, 6))
   }
 
@@ -418,7 +418,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     val planInt = dsIntFilter.queryExecution.executedPlan
     assert(planInt.collect {
       case WholeStageCodegenExec(FilterExec(_,
-      ColumnarToRowExec(InputAdapter(_: InMemoryTableScanExec)))) => ()
+          ColumnarToRowExec(InputAdapter(_: InMemoryTableScanExec)))) => ()
     }.length == 1)
     assert(dsIntFilter.collect() === Array(1, 2))
 
@@ -556,7 +556,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
           .write.mode(SaveMode.Overwrite).parquet(path)
 
         withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "255",
-          SQLConf.WHOLESTAGE_SPLIT_CONSUME_FUNC_BY_OPERATOR.key -> "true") {
+            SQLConf.WHOLESTAGE_SPLIT_CONSUME_FUNC_BY_OPERATOR.key -> "true") {
           val projection = Seq.tabulate(columnNum)(i => s"c$i + c$i as newC$i")
           val df = spark.read.parquet(path).selectExpr(projection: _*)
 
@@ -642,18 +642,18 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
         .join(baseTable, "idx")
       assert(distinctWithId.queryExecution.executedPlan.collectFirst {
         case WholeStageCodegenExec(
-        ProjectExec(_, BroadcastHashJoinExec(_, _, _, _, _, _: HashAggregateExec, _, _))) => true
+          ProjectExec(_, BroadcastHashJoinExec(_, _, _, _, _, _: HashAggregateExec, _, _))) => true
       }.isDefined)
       checkAnswer(distinctWithId, Seq(Row(1, 0), Row(1, 0)))
 
       // BroadcastHashJoinExec with a HashAggregateExec child containing a Final mode aggregate
       // expression
       val groupByWithId =
-      baseTable.groupBy("idx").sum().withColumn("id", monotonically_increasing_id())
+        baseTable.groupBy("idx").sum().withColumn("id", monotonically_increasing_id())
         .join(baseTable, "idx")
       assert(groupByWithId.queryExecution.executedPlan.collectFirst {
         case WholeStageCodegenExec(
-        ProjectExec(_, BroadcastHashJoinExec(_, _, _, _, _, _: HashAggregateExec, _, _))) => true
+          ProjectExec(_, BroadcastHashJoinExec(_, _, _, _, _, _: HashAggregateExec, _, _))) => true
       }.isDefined)
       checkAnswer(groupByWithId, Seq(Row(1, 2, 0), Row(1, 2, 0)))
     }
@@ -682,17 +682,17 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     assert(
       executedPlan.find {
         case WholeStageCodegenExec(
-        HashAggregateExec(_, _, _, _, _, _, _: LocalTableScanExec)) => true
+          HashAggregateExec(_, _, _, _, _, _, _: LocalTableScanExec)) => true
         case _ => false
       }.isDefined,
       "LocalTableScanExec should be within a WholeStageCodegen domain.")
   }
 
   test("Give up splitting aggregate code if a parameter length goes over the limit") {
     withSQLConf(
-      SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC.key -> "true",
-      SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1",
-      "spark.sql.CodeGenerator.validParamLength" -> "0") {
+        SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC.key -> "true",
+        SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1",
+        "spark.sql.CodeGenerator.validParamLength" -> "0") {
       withTable("t") {
         val expectedErrMsg = "Failed to split aggregate code into small functions"
         Seq(
@@ -711,9 +711,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
 
   test("Give up splitting subexpression code if a parameter length goes over the limit") {
     withSQLConf(
-      SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC.key -> "false",
-      SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1",
-      "spark.sql.CodeGenerator.validParamLength" -> "0") {
+        SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC.key -> "false",
+        SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1",
+        "spark.sql.CodeGenerator.validParamLength" -> "0") {
       withTable("t") {
         val expectedErrMsg = "Failed to split subexpression code into small functions"
         Seq(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -657,9 +657,9 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
 
   test("SPARK-25602: SparkPlan.getByteArrayRdd should not consume the input when not necessary") {
     def checkFilterAndRangeMetrics(
-                                    df: DataFrame,
-                                    filterNumOutputs: Int,
-                                    rangeNumOutputs: Int): Unit = {
+        df: DataFrame,
+        filterNumOutputs: Int,
+        rangeNumOutputs: Int): Unit = {
       val plan = df.queryExecution.executedPlan
 
       val filters = collectNodeWithinWholeStage[FilterExec](plan)