Skip to content

Commit 6197cd5

Browse files
committed
[SQL] Better error messages for analysis failures
1 parent a38e23c commit 6197cd5

File tree

4 files changed

+121
-74
lines changed

4 files changed

+121
-74
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 64 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.errors.TreeNodeException
2323
import org.apache.spark.sql.catalyst.expressions._
2424
import org.apache.spark.sql.catalyst.plans.logical._
2525
import org.apache.spark.sql.catalyst.rules._
26-
import org.apache.spark.sql.types.{ArrayType, StructField, StructType, IntegerType}
26+
import org.apache.spark.sql.types._
2727

2828
/**
2929
* A trivial [[Analyzer]] with an [[EmptyCatalog]] and [[EmptyFunctionRegistry]]. Used for testing
@@ -66,9 +66,7 @@ class Analyzer(catalog: Catalog,
6666
typeCoercionRules ++
6767
extendedRules : _*),
6868
Batch("Check Analysis", Once,
69-
CheckResolution ::
70-
CheckAggregation ::
71-
Nil: _*),
69+
CheckResolution),
7270
Batch("AnalysisOperators", fixedPoint,
7371
EliminateAnalysisOperators)
7472
)
@@ -77,21 +75,70 @@ class Analyzer(catalog: Catalog,
7775
* Makes sure all attributes and logical plans have been resolved.
7876
*/
7977
object CheckResolution extends Rule[LogicalPlan] {
78+
def failAnalysis(msg: String) = { throw new AnalysisException(msg) }
79+
8080
def apply(plan: LogicalPlan): LogicalPlan = {
81-
plan.transformUp {
82-
case p if p.expressions.exists(!_.resolved) =>
83-
val missing = p.expressions.filterNot(_.resolved).map(_.prettyString).mkString(",")
84-
val from = p.inputSet.map(_.name).mkString("{", ", ", "}")
85-
86-
throw new AnalysisException(s"Cannot resolve '$missing' given input columns $from")
87-
case p if !p.resolved && p.childrenResolved =>
88-
throw new AnalysisException(s"Unresolved operator in the query plan ${p.simpleString}")
89-
} match {
90-
// As a backstop, use the root node to check that the entire plan tree is resolved.
91-
case p if !p.resolved =>
92-
throw new AnalysisException(s"Unresolved operator in the query plan ${p.simpleString}")
93-
case p => p
81+
plan.foreachUp {
82+
case operator: LogicalPlan =>
83+
operator transformAllExpressions {
84+
case a: Attribute if !a.resolved =>
85+
val from = operator.inputSet.map(_.name).mkString("{", ", ", "}")
86+
failAnalysis(s"cannot resolve '$a' given input columns $from")
87+
88+
case c: Cast if !c.resolved =>
89+
failAnalysis(
90+
s"invalid cast from ${c.child.dataType.simpleString} to ${c.dataType.simpleString}")
91+
92+
case b: BinaryExpression if !b.resolved =>
93+
failAnalysis(
94+
s"invalid expression ${b.prettyString} " +
95+
s"between ${b.left.simpleString} and ${b.right.simpleString}")
96+
97+
98+
}
99+
100+
operator match {
101+
case f: Filter if f.condition.dataType != BooleanType =>
102+
failAnalysis(s"filter expression '${f.condition.prettyString}' is not a boolean.")
103+
104+
case aggregatePlan @ Aggregate(groupingExprs, aggregateExprs, child) =>
105+
def isValidAggregateExpression(expr: Expression): Boolean = expr match {
106+
case _: AggregateExpression => true
107+
case e: Attribute => groupingExprs.contains(e)
108+
case e if groupingExprs.contains(e) => true
109+
case e if e.references.isEmpty => true
110+
case e => e.children.forall(isValidAggregateExpression)
111+
}
112+
113+
aggregateExprs.find { e =>
114+
!isValidAggregateExpression(e.transform {
115+
// Should trim aliases around `GetField`s. These aliases are introduced while
116+
// resolving struct field accesses, because `GetField` is not a `NamedExpression`.
117+
// (Should we just turn `GetField` into a `NamedExpression`?)
118+
case Alias(g: GetField, _) => g
119+
})
120+
}.foreach { e =>
121+
failAnalysis(s"expression must be aggregates or be in group by $e")
122+
}
123+
124+
aggregatePlan
125+
126+
case o if o.children.nonEmpty && !o.references.subsetOf(o.inputSet) =>
127+
val missingAttributes = (o.references -- o.inputSet).map(_.prettyString).mkString(",")
128+
val input = o.inputSet.map(_.prettyString).mkString(",")
129+
130+
failAnalysis(s"resolved attributes $missingAttributes missing from $input")
131+
132+
// Catch all
133+
case o if !o.resolved =>
134+
failAnalysis(
135+
s"unresolved operator ${operator.simpleString}")
136+
137+
case _ => // Analysis successful!
138+
}
94139
}
140+
141+
plan
95142
}
96143
}
97144

@@ -192,37 +239,6 @@ class Analyzer(catalog: Catalog,
192239
}
193240
}
194241

195-
/**
196-
* Checks for non-aggregated attributes with aggregation
197-
*/
198-
object CheckAggregation extends Rule[LogicalPlan] {
199-
def apply(plan: LogicalPlan): LogicalPlan = {
200-
plan.transform {
201-
case aggregatePlan @ Aggregate(groupingExprs, aggregateExprs, child) =>
202-
def isValidAggregateExpression(expr: Expression): Boolean = expr match {
203-
case _: AggregateExpression => true
204-
case e: Attribute => groupingExprs.contains(e)
205-
case e if groupingExprs.contains(e) => true
206-
case e if e.references.isEmpty => true
207-
case e => e.children.forall(isValidAggregateExpression)
208-
}
209-
210-
aggregateExprs.find { e =>
211-
!isValidAggregateExpression(e.transform {
212-
// Should trim aliases around `GetField`s. These aliases are introduced while
213-
// resolving struct field accesses, because `GetField` is not a `NamedExpression`.
214-
// (Should we just turn `GetField` into a `NamedExpression`?)
215-
case Alias(g: GetField, _) => g
216-
})
217-
}.foreach { e =>
218-
throw new TreeNodeException(plan, s"Expression not in GROUP BY: $e")
219-
}
220-
221-
aggregatePlan
222-
}
223-
}
224-
}
225-
226242
/**
227243
* Replaces [[UnresolvedRelation]]s with concrete relations from the catalog.
228244
*/

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@ package org.apache.spark.sql.catalyst.expressions
2020
import org.apache.spark.sql.catalyst.analysis.Star
2121

2222
protected class AttributeEquals(val a: Attribute) {
23-
override def hashCode() = a.exprId.hashCode()
23+
override def hashCode() = a match {
24+
case ar: AttributeReference => ar.exprId.hashCode()
25+
case a => a.hashCode()
26+
}
27+
2428
override def equals(other: Any) = (a, other.asInstanceOf[AttributeEquals].a) match {
2529
case (a1: AttributeReference, a2: AttributeReference) => a1.exprId == a2.exprId
2630
case (a1, a2) => a1 == a2

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] {
4646
children.foreach(_.foreach(f))
4747
}
4848

49+
/**
50+
* Runs the given function recursively on [[children]] then on this node.
51+
* @param f the function to be applied to each node in the tree.
52+
*/
53+
def foreachUp(f: BaseType => Unit): Unit = {
54+
children.foreach(_.foreach(f))
55+
f(this)
56+
}
57+
4958
/**
5059
* Returns a Seq containing the result of applying the given function to each
5160
* node in this tree in a preorder traversal.

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
2020
import org.scalatest.{BeforeAndAfter, FunSuite}
2121

2222
import org.apache.spark.sql.AnalysisException
23-
import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference}
23+
import org.apache.spark.sql.catalyst.expressions.{Literal, Alias, AttributeReference}
2424
import org.apache.spark.sql.catalyst.plans.logical._
2525
import org.apache.spark.sql.types._
2626

@@ -108,24 +108,45 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter {
108108
testRelation)
109109
}
110110

111-
test("throw errors for unresolved attributes during analysis") {
112-
val e = intercept[AnalysisException] {
113-
caseSensitiveAnalyze(Project(Seq(UnresolvedAttribute("abcd")), testRelation))
111+
def errorTest(
112+
name: String,
113+
plan: LogicalPlan,
114+
errorMessages: Seq[String],
115+
caseSensitive: Boolean = true) = {
116+
test(name) {
117+
val error = intercept[AnalysisException] {
118+
if(caseSensitive) {
119+
caseSensitiveAnalyze(plan)
120+
} else {
121+
caseInsensitiveAnalyze(plan)
122+
}
123+
}
124+
125+
errorMessages.foreach(m => assert(error.getMessage contains m))
114126
}
115-
assert(e.getMessage().toLowerCase.contains("cannot resolve"))
116127
}
117128

118-
test("throw errors for unresolved plans during analysis") {
119-
case class UnresolvedTestPlan() extends LeafNode {
120-
override lazy val resolved = false
121-
override def output = Nil
122-
}
123-
val e = intercept[AnalysisException] {
124-
caseSensitiveAnalyze(UnresolvedTestPlan())
125-
}
126-
assert(e.getMessage().toLowerCase.contains("unresolved"))
129+
errorTest(
130+
"unresolved attributes",
131+
testRelation.select('abcd),
132+
"cannot resolve" :: "abcd" :: Nil)
133+
134+
errorTest(
135+
"bad casts",
136+
testRelation.select(Literal(1).cast(BinaryType).as('badCast)),
137+
"invalid cast" :: Literal(1).dataType.simpleString :: BinaryType.simpleString :: Nil)
138+
139+
case class UnresolvedTestPlan() extends LeafNode {
140+
override lazy val resolved = false
141+
override def output = Nil
127142
}
128143

144+
errorTest(
145+
"catch all unresolved plan",
146+
UnresolvedTestPlan(),
147+
"unresolved" :: Nil)
148+
149+
129150
test("divide should be casted into fractional types") {
130151
val testRelation2 = LocalRelation(
131152
AttributeReference("a", StringType)(),
@@ -134,18 +155,15 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter {
134155
AttributeReference("d", DecimalType.Unlimited)(),
135156
AttributeReference("e", ShortType)())
136157

137-
val expr0 = 'a / 2
138-
val expr1 = 'a / 'b
139-
val expr2 = 'a / 'c
140-
val expr3 = 'a / 'd
141-
val expr4 = 'e / 'e
142-
val plan = caseInsensitiveAnalyze(Project(
143-
Alias(expr0, s"Analyzer($expr0)")() ::
144-
Alias(expr1, s"Analyzer($expr1)")() ::
145-
Alias(expr2, s"Analyzer($expr2)")() ::
146-
Alias(expr3, s"Analyzer($expr3)")() ::
147-
Alias(expr4, s"Analyzer($expr4)")() :: Nil, testRelation2))
158+
val plan = caseInsensitiveAnalyze(
159+
testRelation2.select(
160+
'a / Literal(2) as 'div1,
161+
'a / 'b as 'div2,
162+
'a / 'c as 'div3,
163+
'a / 'd as 'div4,
164+
'e / 'e as 'div5))
148165
val pl = plan.asInstanceOf[Project].projectList
166+
149167
assert(pl(0).dataType == DoubleType)
150168
assert(pl(1).dataType == DoubleType)
151169
assert(pl(2).dataType == DoubleType)

0 commit comments

Comments
 (0)