diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/AliasAwareOutputExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/AliasAwareOutputExpression.scala index cfe229945929c..2cca7b844cc82 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/AliasAwareOutputExpression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/AliasAwareOutputExpression.scala @@ -40,6 +40,7 @@ trait AliasAwareOutputExpression extends SQLConfHelper { // more than `aliasCandidateLimit` attributes for an expression. In those cases the old logic // handled only the last alias so we need to make sure that we give precedence to that. // If the `outputExpressions` contain simple attributes we need to add those too to the map. + @transient private lazy val aliasMap = { val aliases = mutable.Map[Expression, mutable.ArrayBuffer[Attribute]]() outputExpressions.reverse.foreach { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala index 52f15cf7b650e..d43331d57c47a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala @@ -44,12 +44,13 @@ case class BatchScanExec( applyPartialClustering: Boolean = false, replicatePartitions: Boolean = false) extends DataSourceV2ScanExecBase { - @transient lazy val batch = scan.toBatch + @transient lazy val batch = if (scan == null) null else scan.toBatch // TODO: unify the equal/hashCode implementation for all data source v2 query plans. override def equals(other: Any): Boolean = other match { case other: BatchScanExec => - this.batch == other.batch && this.runtimeFilters == other.runtimeFilters && + this.batch != null && this.batch == other.batch && + this.runtimeFilters == other.runtimeFilters && this.commonPartitionValues == other.commonPartitionValues && this.replicatePartitions == other.replicatePartitions && this.applyPartialClustering == other.applyPartialClustering diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 4d76013d65935..7c15a05c5867c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -2679,4 +2679,20 @@ class SubquerySuite extends QueryTest Row(8, 6)) } } + + test("SPARK-42745: Improved AliasAwareOutputExpression works with DSv2") { + withSQLConf( + SQLConf.USE_V1_SOURCE_LIST.key -> "") { + withTempPath { path => + spark.range(0) + .write + .mode("overwrite") + .parquet(path.getCanonicalPath) + withTempView("t1") { + spark.read.parquet(path.toString).createOrReplaceTempView("t1") + checkAnswer(sql("select (select sum(id) from t1)"), Row(null)) + } + } + } + } }