Skip to content

Commit fc3df45

Browse files
mauropalsgraafgatorsmile
authored andcommitted
[SPARK-24536] Validate that an evaluated limit clause cannot be null
It proposes a version in which nullable expressions are not valid in the limit clause It was tested with unit and e2e tests. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Mauro Palsgraaf <[email protected]> Closes #21807 from mauropalsgraaf/SPARK-24536. (cherry picked from commit 4ac2126) Signed-off-by: Xiao Li <[email protected]>
1 parent 25ea27b commit fc3df45

File tree

4 files changed

+51
-19
lines changed

4 files changed

+51
-19
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,15 @@ trait CheckAnalysis extends PredicateHelper {
6666
limitExpr.sql)
6767
case e if e.dataType != IntegerType => failAnalysis(
6868
s"The limit expression must be integer type, but got " +
69-
e.dataType.simpleString)
70-
case e if e.eval().asInstanceOf[Int] < 0 => failAnalysis(
71-
"The limit expression must be equal to or greater than 0, but got " +
72-
e.eval().asInstanceOf[Int])
73-
case e => // OK
69+
e.dataType.catalogString)
70+
case e =>
71+
e.eval() match {
72+
case null => failAnalysis(
73+
s"The evaluated limit expression must not be null, but got ${limitExpr.sql}")
74+
case v: Int if v < 0 => failAnalysis(
75+
s"The limit expression must be equal to or greater than 0, but got $v")
76+
case _ => // OK
77+
}
7478
}
7579
}
7680

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,12 @@ class AnalysisErrorSuite extends AnalysisTest {
392392
"Generators are not supported outside the SELECT clause, but got: Sort" :: Nil
393393
)
394394

395+
errorTest(
396+
"an evaluated limit class must not be null",
397+
testRelation.limit(Literal(null, IntegerType)),
398+
"The evaluated limit expression must not be null, but got " :: Nil
399+
)
400+
395401
errorTest(
396402
"num_rows in limit clause must be equal to or greater than 0",
397403
listRelation.limit(-1),

sql/core/src/test/resources/sql-tests/inputs/limit.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ SELECT * FROM testdata LIMIT CAST(1 AS int);
1313
SELECT * FROM testdata LIMIT -1;
1414
SELECT * FROM testData TABLESAMPLE (-1 ROWS);
1515

16+
17+
SELECT * FROM testdata LIMIT CAST(1 AS INT);
18+
-- evaluated limit must not be null
19+
SELECT * FROM testdata LIMIT CAST(NULL AS INT);
20+
1621
-- limit must be foldable
1722
SELECT * FROM testdata LIMIT key > 3;
1823

sql/core/src/test/resources/sql-tests/results/limit.sql.out

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 12
2+
-- Number of queries: 14
33

44

55
-- !query 0
@@ -66,44 +66,61 @@ The limit expression must be equal to or greater than 0, but got -1;
6666

6767

6868
-- !query 7
69-
SELECT * FROM testdata LIMIT key > 3
69+
SELECT * FROM testdata LIMIT CAST(1 AS INT)
7070
-- !query 7 schema
71-
struct<>
71+
struct<key:int,value:string>
7272
-- !query 7 output
73-
org.apache.spark.sql.AnalysisException
74-
The limit expression must evaluate to a constant value, but got (testdata.`key` > 3);
73+
1 1
7574

7675

7776
-- !query 8
78-
SELECT * FROM testdata LIMIT true
77+
SELECT * FROM testdata LIMIT CAST(NULL AS INT)
7978
-- !query 8 schema
8079
struct<>
8180
-- !query 8 output
8281
org.apache.spark.sql.AnalysisException
83-
The limit expression must be integer type, but got boolean;
82+
The evaluated limit expression must not be null, but got CAST(NULL AS INT);
8483

8584

8685
-- !query 9
87-
SELECT * FROM testdata LIMIT 'a'
86+
SELECT * FROM testdata LIMIT key > 3
8887
-- !query 9 schema
8988
struct<>
9089
-- !query 9 output
9190
org.apache.spark.sql.AnalysisException
92-
The limit expression must be integer type, but got string;
91+
The limit expression must evaluate to a constant value, but got (testdata.`key` > 3);
9392

9493

9594
-- !query 10
96-
SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3
95+
SELECT * FROM testdata LIMIT true
9796
-- !query 10 schema
98-
struct<id:bigint>
97+
struct<>
9998
-- !query 10 output
100-
4
99+
org.apache.spark.sql.AnalysisException
100+
The limit expression must be integer type, but got boolean;
101101

102102

103103
-- !query 11
104-
SELECT * FROM testdata WHERE key < 3 LIMIT ALL
104+
SELECT * FROM testdata LIMIT 'a'
105105
-- !query 11 schema
106-
struct<key:int,value:string>
106+
struct<>
107107
-- !query 11 output
108+
org.apache.spark.sql.AnalysisException
109+
The limit expression must be integer type, but got string;
110+
111+
112+
-- !query 12
113+
SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3
114+
-- !query 12 schema
115+
struct<id:bigint>
116+
-- !query 12 output
117+
4
118+
119+
120+
-- !query 13
121+
SELECT * FROM testdata WHERE key < 3 LIMIT ALL
122+
-- !query 13 schema
123+
struct<key:int,value:string>
124+
-- !query 13 output
108125
1 1
109126
2 2

0 commit comments

Comments
 (0)