Skip to content

Commit 92a7efb

Browse files
committed
Cannot run intersect/except with map type
1 parent f79371a commit 92a7efb

File tree

2 files changed

+41
-3
lines changed

2 files changed

+41
-3
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,18 @@ trait CheckAnalysis extends PredicateHelper {
4444
}).length > 1
4545
}
4646

47+
protected def hasMapType(dt: DataType): Boolean = {
48+
dt.existsRecursively(_.isInstanceOf[MapType])
49+
}
50+
51+
protected def mapColumnInSetOperation(plan: LogicalPlan): Option[Attribute] = plan match {
52+
case _: Intersect | _: Except | _: Distinct =>
53+
plan.output.find(a => hasMapType(a.dataType))
54+
case d: Deduplicate =>
55+
d.keys.find(a => hasMapType(a.dataType))
56+
case _ => None
57+
}
58+
4759
private def checkLimitClause(limitExpr: Expression): Unit = {
4860
limitExpr match {
4961
case e if !e.foldable => failAnalysis(
@@ -121,8 +133,7 @@ trait CheckAnalysis extends PredicateHelper {
121133
if (conditions.isEmpty && query.output.size != 1) {
122134
failAnalysis(
123135
s"Scalar subquery must return only one column, but got ${query.output.size}")
124-
}
125-
else if (conditions.nonEmpty) {
136+
} else if (conditions.nonEmpty) {
126137
// Collect the columns from the subquery for further checking.
127138
var subqueryColumns = conditions.flatMap(_.references).filter(query.output.contains)
128139

@@ -200,7 +211,7 @@ trait CheckAnalysis extends PredicateHelper {
200211
s"filter expression '${f.condition.sql}' " +
201212
s"of type ${f.condition.dataType.simpleString} is not a boolean.")
202213

203-
case f @ Filter(condition, child) =>
214+
case Filter(condition, _) =>
204215
splitConjunctivePredicates(condition).foreach {
205216
case _: PredicateSubquery | Not(_: PredicateSubquery) =>
206217
case e if PredicateSubquery.hasNullAwarePredicateWithinNot(e) =>
@@ -374,6 +385,14 @@ trait CheckAnalysis extends PredicateHelper {
374385
|Conflicting attributes: ${conflictingAttributes.mkString(",")}
375386
""".stripMargin)
376387

388+
// TODO: although map type is not orderable, technically map type should be able to be
389+
// used in equality comparison, remove this type check once we support it.
390+
case o if mapColumnInSetOperation(o).isDefined =>
391+
val mapCol = mapColumnInSetOperation(o).get
392+
failAnalysis("Cannot have map type columns in DataFrame which calls " +
393+
s"set operations(intersect, except, etc.), but the type of column ${mapCol.name} " +
394+
"is " + mapCol.dataType.simpleString)
395+
377396
case o if o.expressions.exists(!_.deterministic) &&
378397
!o.isInstanceOf[Project] && !o.isInstanceOf[Filter] &&
379398
!o.isInstanceOf[Aggregate] && !o.isInstanceOf[Window] =>

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1703,4 +1703,23 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
17031703
val df = spark.range(1).selectExpr("CAST(id as DECIMAL) as x").selectExpr("percentile(x, 0.5)")
17041704
checkAnswer(df, Row(BigDecimal(0.0)) :: Nil)
17051705
}
1706+
1707+
test("SPARK-19893: cannot run set operations with map type") {
1708+
val df = spark.range(1).select(map(lit("key"), $"id").as("m"))
1709+
val e = intercept[AnalysisException](df.intersect(df))
1710+
assert(e.message.contains(
1711+
"Cannot have map type columns in DataFrame which calls set operations"))
1712+
val e2 = intercept[AnalysisException](df.except(df))
1713+
assert(e2.message.contains(
1714+
"Cannot have map type columns in DataFrame which calls set operations"))
1715+
val e3 = intercept[AnalysisException](df.distinct())
1716+
assert(e3.message.contains(
1717+
"Cannot have map type columns in DataFrame which calls set operations"))
1718+
withTempView("v") {
1719+
df.createOrReplaceTempView("v")
1720+
val e4 = intercept[AnalysisException](sql("SELECT DISTINCT m FROM v"))
1721+
assert(e4.message.contains(
1722+
"Cannot have map type columns in DataFrame which calls set operations"))
1723+
}
1724+
}
17061725
}

0 commit comments

Comments
 (0)