Skip to content

Commit e7de7e5

Browse files
YanTangZhaimarmbrus
authored andcommitted
[SPARK-4693] [SQL] PruningPredicates may be wrong if predicates contains an empty AttributeSet() references
The sql "select * from spark_test::for_test where abs(20141202) is not null" has predicates=List(IS NOT NULL HiveSimpleUdf#org.apache.hadoop.hive.ql.udf.UDFAbs(20141202)) and partitionKeyIds=AttributeSet(). PruningPredicates is List(IS NOT NULL HiveSimpleUdf#org.apache.hadoop.hive.ql.udf.UDFAbs(20141202)). Then the exception "java.lang.IllegalArgumentException: requirement failed: Partition pruning predicates only supported for partitioned tables." is thrown. The sql "select * from spark_test::for_test_partitioned_table where abs(20141202) is not null and type_id=11 and platform = 3" with partitioned key insert_date has predicates=List(IS NOT NULL HiveSimpleUdf#org.apache.hadoop.hive.ql.udf.UDFAbs(20141202), (type_id#12 = 11), (platform#8 = 3)) and partitionKeyIds=AttributeSet(insert_date#24). PruningPredicates is List(IS NOT NULL HiveSimpleUdf#org.apache.hadoop.hive.ql.udf.UDFAbs(20141202)). Author: YanTangZhai <[email protected]> Author: yantangzhai <[email protected]> Closes apache#3556 from YanTangZhai/SPARK-4693 and squashes the following commits: 620ebe3 [yantangzhai] [SPARK-4693] [SQL] PruningPredicates may be wrong if predicates contains an empty AttributeSet() references 37cfdf5 [yantangzhai] [SPARK-4693] [SQL] PruningPredicates may be wrong if predicates contains an empty AttributeSet() references 70a3544 [yantangzhai] [SPARK-4693] [SQL] PruningPredicates may be wrong if predicates contains an empty AttributeSet() references efa9b03 [YanTangZhai] Update HiveQuerySuite.scala 72accf1 [YanTangZhai] Update HiveQuerySuite.scala e572b9a [YanTangZhai] Update HiveStrategies.scala 6e643f8 [YanTangZhai] Merge pull request #11 from apache/master e249846 [YanTangZhai] Merge pull request #10 from apache/master d26d982 [YanTangZhai] Merge pull request #9 from apache/master 76d4027 [YanTangZhai] Merge pull request #8 from apache/master 03b62b0 [YanTangZhai] Merge pull request #7 from apache/master 8a00106 [YanTangZhai] Merge pull request #6 from apache/master cbcba66 [YanTangZhai] Merge pull request #3 from apache/master cdef539 [YanTangZhai] Merge pull request #1 from apache/master
1 parent 22ddb6e commit e7de7e5

File tree

3 files changed

+14
-2
lines changed

3 files changed

+14
-2
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,4 +112,6 @@ class AttributeSet private (val baseSet: Set[AttributeEquals])
112112
override def toSeq: Seq[Attribute] = baseSet.map(_.a).toArray.toSeq
113113

114114
override def toString = "{" + baseSet.map(_.a).mkString(", ") + "}"
115+
116+
override def isEmpty: Boolean = baseSet.isEmpty
115117
}

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,9 @@ private[hive] trait HiveStrategies {
210210
// Filter out all predicates that only deal with partition keys, these are given to the
211211
// hive table scan operator to be used for partition pruning.
212212
val partitionKeyIds = AttributeSet(relation.partitionKeys)
213-
val (pruningPredicates, otherPredicates) = predicates.partition {
214-
_.references.subsetOf(partitionKeyIds)
213+
val (pruningPredicates, otherPredicates) = predicates.partition { predicate =>
214+
!predicate.references.isEmpty &&
215+
predicate.references.subsetOf(partitionKeyIds)
215216
}
216217

217218
pruneFilterProject(

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,15 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
411411
createQueryTest("select null from table",
412412
"SELECT null FROM src LIMIT 1")
413413

414+
test("predicates contains an empty AttributeSet() references") {
415+
sql(
416+
"""
417+
|SELECT a FROM (
418+
| SELECT 1 AS a FROM src LIMIT 1 ) table
419+
|WHERE abs(20141202) is not null
420+
""".stripMargin).collect()
421+
}
422+
414423
test("implement identity function using case statement") {
415424
val actual = sql("SELECT (CASE key WHEN key THEN key END) FROM src")
416425
.map { case Row(i: Int) => i }

0 commit comments

Comments
 (0)