Skip to content

Commit 1a797b4

Browse files
committed
comments
1 parent d4e9015 commit 1a797b4

File tree

8 files changed

+41
-39
lines changed

8 files changed

+41
-39
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ class Analyzer(catalog: Catalog,
6767
extendedRules : _*),
6868
Batch("Check Analysis", Once,
6969
CheckResolution),
70-
Batch("AnalysisOperators", fixedPoint,
71-
EliminateAnalysisOperators)
70+
Batch("Remove SubQueries", fixedPoint,
71+
EliminateSubQueries)
7272
)
7373

7474
/**
@@ -104,25 +104,27 @@ class Analyzer(catalog: Catalog,
104104
s"of type ${f.condition.dataType.simpleString} is not a boolean.")
105105

106106
case aggregatePlan @ Aggregate(groupingExprs, aggregateExprs, child) =>
107-
def isValidAggregateExpression(expr: Expression): Boolean = expr match {
108-
case _: AggregateExpression => true
109-
case e: Attribute => groupingExprs.contains(e)
110-
case e if groupingExprs.contains(e) => true
111-
case e if e.references.isEmpty => true
112-
case e => e.children.forall(isValidAggregateExpression)
107+
def checkValidAggregateExpression(expr: Expression): Unit = expr match {
108+
case _: AggregateExpression => // OK
109+
case e: Attribute if !groupingExprs.contains(e) =>
110+
failAnalysis(
111+
s"expression '${e.prettyString}' is neither present in the group by, " +
112+
s"nor is it an aggregate function. " +
113+
"Add to group by or wrap in first() if you don't care which value you get.")
114+
case e: Attribute => // OK
115+
case e if groupingExprs.contains(e) => // OK
116+
case e if e.references.isEmpty => // OK
117+
case e => e.children.foreach(checkValidAggregateExpression)
113118
}
114119

115-
aggregateExprs.find { e =>
116-
!isValidAggregateExpression(e.transform {
117-
// Should trim aliases around `GetField`s. These aliases are introduced while
118-
// resolving struct field accesses, because `GetField` is not a `NamedExpression`.
119-
// (Should we just turn `GetField` into a `NamedExpression`?)
120-
case Alias(g: GetField, _) => g
121-
})
122-
}.foreach { e =>
123-
failAnalysis(
124-
s"expression '${e.prettyString}' is not an aggregate function or in the group by")
125-
}
120+
val cleaned = aggregateExprs.map(_.transform {
121+
// Should trim aliases around `GetField`s. These aliases are introduced while
122+
// resolving struct field accesses, because `GetField` is not a `NamedExpression`.
123+
// (Should we just turn `GetField` into a `NamedExpression`?)
124+
case Alias(g, _) => g
125+
})
126+
127+
cleaned.foreach(checkValidAggregateExpression)
126128

127129
case o if o.children.nonEmpty && !o.references.subsetOf(o.inputSet) =>
128130
val missingAttributes = (o.references -- o.inputSet).map(_.prettyString).mkString(",")
@@ -247,7 +249,7 @@ class Analyzer(catalog: Catalog,
247249
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
248250
case i @ InsertIntoTable(UnresolvedRelation(tableIdentifier, alias), _, _, _) =>
249251
i.copy(
250-
table = EliminateAnalysisOperators(catalog.lookupRelation(tableIdentifier, alias)))
252+
table = EliminateSubQueries(catalog.lookupRelation(tableIdentifier, alias)))
251253
case UnresolvedRelation(tableIdentifier, alias) =>
252254
catalog.lookupRelation(tableIdentifier, alias)
253255
}
@@ -494,7 +496,7 @@ class Analyzer(catalog: Catalog,
494496
* only required to provide scoping information for attributes and can be removed once analysis is
495497
* complete.
496498
*/
497-
object EliminateAnalysisOperators extends Rule[LogicalPlan] {
499+
object EliminateSubQueries extends Rule[LogicalPlan] {
498500
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
499501
case Subquery(_, child) => child
500502
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.catalyst.optimizer
1919

20-
import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators
20+
import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
2121
import org.apache.spark.sql.catalyst.expressions._
2222
import org.apache.spark.sql.catalyst.plans.logical._
2323
import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -30,7 +30,7 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
3030
object Optimize extends RuleExecutor[LogicalPlan] {
3131
val batches =
3232
Batch("AnalysisNodes", Once,
33-
EliminateAnalysisOperators) ::
33+
EliminateSubQueries) ::
3434
Batch("Constant Folding", FixedPoint(50),
3535
NullPropagation,
3636
ConstantFolding,

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.catalyst.optimizer
1919

20-
import org.apache.spark.sql.catalyst.analysis.{UnresolvedGetField, EliminateAnalysisOperators}
20+
import org.apache.spark.sql.catalyst.analysis.{UnresolvedGetField, EliminateSubQueries}
2121
import org.apache.spark.sql.catalyst.expressions._
2222
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
2323
import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -33,7 +33,7 @@ class ConstantFoldingSuite extends PlanTest {
3333
object Optimize extends RuleExecutor[LogicalPlan] {
3434
val batches =
3535
Batch("AnalysisNodes", Once,
36-
EliminateAnalysisOperators) ::
36+
EliminateSubQueries) ::
3737
Batch("ConstantFolding", Once,
3838
ConstantFolding,
3939
BooleanSimplification) :: Nil

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.sql.catalyst.optimizer
1919

2020
import org.apache.spark.sql.catalyst.analysis
21-
import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators
21+
import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
2222
import org.apache.spark.sql.catalyst.expressions.Explode
2323
import org.apache.spark.sql.catalyst.plans.logical._
2424
import org.apache.spark.sql.catalyst.plans.{PlanTest, LeftOuter, RightOuter}
@@ -32,7 +32,7 @@ class FilterPushdownSuite extends PlanTest {
3232
object Optimize extends RuleExecutor[LogicalPlan] {
3333
val batches =
3434
Batch("Subqueries", Once,
35-
EliminateAnalysisOperators) ::
35+
EliminateSubQueries) ::
3636
Batch("Filter Pushdown", Once,
3737
CombineFilters,
3838
PushPredicateThroughProject,
@@ -351,7 +351,7 @@ class FilterPushdownSuite extends PlanTest {
351351
}
352352
val optimized = Optimize(originalQuery.analyze)
353353

354-
comparePlans(analysis.EliminateAnalysisOperators(originalQuery.analyze), optimized)
354+
comparePlans(analysis.EliminateSubQueries(originalQuery.analyze), optimized)
355355
}
356356

357357
test("joins: conjunctive predicates") {
@@ -370,7 +370,7 @@ class FilterPushdownSuite extends PlanTest {
370370
left.join(right, condition = Some("x.b".attr === "y.b".attr))
371371
.analyze
372372

373-
comparePlans(optimized, analysis.EliminateAnalysisOperators(correctAnswer))
373+
comparePlans(optimized, analysis.EliminateSubQueries(correctAnswer))
374374
}
375375

376376
test("joins: conjunctive predicates #2") {
@@ -389,7 +389,7 @@ class FilterPushdownSuite extends PlanTest {
389389
left.join(right, condition = Some("x.b".attr === "y.b".attr))
390390
.analyze
391391

392-
comparePlans(optimized, analysis.EliminateAnalysisOperators(correctAnswer))
392+
comparePlans(optimized, analysis.EliminateSubQueries(correctAnswer))
393393
}
394394

395395
test("joins: conjunctive predicates #3") {
@@ -412,7 +412,7 @@ class FilterPushdownSuite extends PlanTest {
412412
condition = Some("z.a".attr === "x.b".attr))
413413
.analyze
414414

415-
comparePlans(optimized, analysis.EliminateAnalysisOperators(correctAnswer))
415+
comparePlans(optimized, analysis.EliminateSubQueries(correctAnswer))
416416
}
417417

418418
val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.sql.catalyst.optimizer
1919

2020
import scala.collection.immutable.HashSet
21-
import org.apache.spark.sql.catalyst.analysis.{EliminateAnalysisOperators, UnresolvedAttribute}
21+
import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedAttribute}
2222
import org.apache.spark.sql.catalyst.expressions._
2323
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
2424
import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -34,7 +34,7 @@ class OptimizeInSuite extends PlanTest {
3434
object Optimize extends RuleExecutor[LogicalPlan] {
3535
val batches =
3636
Batch("AnalysisNodes", Once,
37-
EliminateAnalysisOperators) ::
37+
EliminateSubQueries) ::
3838
Batch("ConstantFolding", Once,
3939
ConstantFolding,
4040
BooleanSimplification,

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnionPushdownSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.sql.catalyst.optimizer
1919

2020
import org.apache.spark.sql.catalyst.analysis
21-
import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators
21+
import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
2222
import org.apache.spark.sql.catalyst.plans.logical._
2323
import org.apache.spark.sql.catalyst.plans.{PlanTest, LeftOuter, RightOuter}
2424
import org.apache.spark.sql.catalyst.rules._
@@ -29,7 +29,7 @@ class UnionPushdownSuite extends PlanTest {
2929
object Optimize extends RuleExecutor[LogicalPlan] {
3030
val batches =
3131
Batch("Subqueries", Once,
32-
EliminateAnalysisOperators) ::
32+
EliminateSubQueries) ::
3333
Batch("Union Pushdown", Once,
3434
UnionPushdown) :: Nil
3535
}

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import org.apache.spark.SparkContext
3737
import org.apache.spark.annotation.Experimental
3838
import org.apache.spark.sql._
3939
import org.apache.spark.sql.catalyst.ScalaReflection
40-
import org.apache.spark.sql.catalyst.analysis.{Analyzer, EliminateAnalysisOperators, OverrideCatalog, OverrideFunctionRegistry}
40+
import org.apache.spark.sql.catalyst.analysis.{Analyzer, EliminateSubQueries, OverrideCatalog, OverrideFunctionRegistry}
4141
import org.apache.spark.sql.catalyst.plans.logical._
4242
import org.apache.spark.sql.execution.{ExecutedCommand, ExtractPythonUdfs, SetCommand, QueryExecutionException}
4343
import org.apache.spark.sql.hive.execution.{HiveNativeCommand, DescribeHiveTableCommand}
@@ -104,7 +104,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
104104
*/
105105
@Experimental
106106
def analyze(tableName: String) {
107-
val relation = EliminateAnalysisOperators(catalog.lookupRelation(Seq(tableName)))
107+
val relation = EliminateSubQueries(catalog.lookupRelation(Seq(tableName)))
108108

109109
relation match {
110110
case relation: MetastoreRelation =>

sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.sql.hive.execution
1919

2020
import org.apache.spark.annotation.DeveloperApi
21-
import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators
21+
import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
2222
import org.apache.spark.sql.catalyst.util._
2323
import org.apache.spark.sql.sources._
2424
import org.apache.spark.sql.{DataFrame, SQLContext}
@@ -175,7 +175,7 @@ case class CreateMetastoreDataSourceAsSelect(
175175
val resolved =
176176
ResolvedDataSource(sqlContext, Some(query.schema), provider, optionsWithPath)
177177
val createdRelation = LogicalRelation(resolved.relation)
178-
EliminateAnalysisOperators(sqlContext.table(tableName).logicalPlan) match {
178+
EliminateSubQueries(sqlContext.table(tableName).logicalPlan) match {
179179
case l @ LogicalRelation(i: InsertableRelation) =>
180180
if (l.schema != createdRelation.schema) {
181181
val errorDescription =

0 commit comments

Comments
 (0)