Skip to content

Commit 12be2c3

Browse files
committed
address comments.
1 parent 35005ee commit 12be2c3

File tree

2 files changed

+80
-34
lines changed

2 files changed

+80
-34
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/SQLBuilder.scala

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,23 @@ class SQLBuilder(logicalPlan: LogicalPlan, sqlContext: SQLContext) extends Loggi
8383

8484
// TABLESAMPLE is part of tableSource clause in the parser,
8585
// and thus we must handle it with subquery.
86-
case Sample(lb, ub, withReplacement, _, child @ Subquery(alias, grandChild))
87-
if !withReplacement && lb <= (ub + RandomSampler.roundingEpsilon) =>
86+
case p @ Sample(lb, ub, withReplacement, _, _)
87+
if !withReplacement && lb <= (ub + RandomSampler.roundingEpsilon) =>
8888
val fraction = math.min(100, math.max(0, (ub - lb) * 100))
89-
val aliasName = if (grandChild.isInstanceOf[Subquery]) alias else ""
90-
val plan = if (grandChild.isInstanceOf[Subquery]) grandChild else child
91-
s"${toSQL(plan)} TABLESAMPLE($fraction PERCENT) $aliasName"
89+
p.child match {
90+
case m: MetastoreRelation =>
91+
val aliasName = m.alias.getOrElse("")
92+
build(
93+
s"`${m.databaseName}`.`${m.tableName}`",
94+
"TABLESAMPLE(" + fraction + " PERCENT)",
95+
aliasName)
96+
case s: Subquery =>
97+
val aliasName = if (s.child.isInstanceOf[Subquery]) s.alias else ""
98+
val plan = if (s.child.isInstanceOf[Subquery]) s.child else s
99+
build(toSQL(plan), "TABLESAMPLE(" + fraction + " PERCENT)", aliasName)
100+
case _ =>
101+
build(toSQL(p.child), "TABLESAMPLE(" + fraction + " PERCENT)")
102+
}
92103

93104
case p: Filter =>
94105
val whereOrHaving = p.child match {

sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala

Lines changed: 64 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -26,24 +26,32 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
2626
import testImplicits._
2727

2828
protected override def beforeAll(): Unit = {
29+
sql("DROP TABLE IF EXISTS parquet_t0")
30+
sql("DROP TABLE IF EXISTS parquet_t1")
31+
sql("DROP TABLE IF EXISTS parquet_t2")
2932
sql("DROP TABLE IF EXISTS t0")
30-
sql("DROP TABLE IF EXISTS t1")
31-
sql("DROP TABLE IF EXISTS t2")
32-
sqlContext.range(10).write.saveAsTable("t0")
33+
34+
sqlContext.range(10).write.saveAsTable("parquet_t0")
35+
sql("CREATE TABLE t0 AS SELECT * FROM parquet_t0")
3336

3437
sqlContext
3538
.range(10)
3639
.select('id as 'key, concat(lit("val_"), 'id) as 'value)
3740
.write
38-
.saveAsTable("t1")
41+
.saveAsTable("parquet_t1")
3942

40-
sqlContext.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd).write.saveAsTable("t2")
43+
sqlContext
44+
.range(10)
45+
.select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
46+
.write
47+
.saveAsTable("parquet_t2")
4148
}
4249

4350
override protected def afterAll(): Unit = {
51+
sql("DROP TABLE IF EXISTS parquet_t0")
52+
sql("DROP TABLE IF EXISTS parquet_t1")
53+
sql("DROP TABLE IF EXISTS parquet_t2")
4454
sql("DROP TABLE IF EXISTS t0")
45-
sql("DROP TABLE IF EXISTS t1")
46-
sql("DROP TABLE IF EXISTS t2")
4755
}
4856

4957
private def checkHiveQl(hiveQl: String): Unit = {
@@ -82,96 +90,123 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
8290
}
8391

8492
test("in") {
85-
checkHiveQl("SELECT id FROM t0 WHERE id IN (1, 2, 3)")
93+
checkHiveQl("SELECT id FROM parquet_t0 WHERE id IN (1, 2, 3)")
8694
}
8795

8896
test("aggregate function in having clause") {
89-
checkHiveQl("SELECT COUNT(value) FROM t1 GROUP BY key HAVING MAX(key) > 0")
97+
checkHiveQl("SELECT COUNT(value) FROM parquet_t1 GROUP BY key HAVING MAX(key) > 0")
9098
}
9199

92100
test("aggregate function in order by clause") {
93-
checkHiveQl("SELECT COUNT(value) FROM t1 GROUP BY key ORDER BY MAX(key)")
101+
checkHiveQl("SELECT COUNT(value) FROM parquet_t1 GROUP BY key ORDER BY MAX(key)")
94102
}
95103

96104
// When there are multiple aggregate functions in ORDER BY clause, all of them are extracted into
97105
// Aggregate operator and aliased to the same name "aggOrder". This is OK for normal query
98106
// execution since these aliases have different expression ID. But this introduces name collision
99107
// when converting resolved plans back to SQL query strings as expression IDs are stripped.
100108
test("aggregate function in order by clause with multiple order keys") {
101-
checkHiveQl("SELECT COUNT(value) FROM t1 GROUP BY key ORDER BY key, MAX(key)")
109+
checkHiveQl("SELECT COUNT(value) FROM parquet_t1 GROUP BY key ORDER BY key, MAX(key)")
102110
}
103111

104112
test("type widening in union") {
105-
checkHiveQl("SELECT id FROM t0 UNION ALL SELECT CAST(id AS INT) AS id FROM t0")
113+
checkHiveQl("SELECT id FROM parquet_t0 UNION ALL SELECT CAST(id AS INT) AS id FROM parquet_t0")
106114
}
107115

108116
test("self join") {
109-
checkHiveQl("SELECT x.key FROM t1 x JOIN t1 y ON x.key = y.key")
117+
checkHiveQl("SELECT x.key FROM parquet_t1 x JOIN parquet_t1 y ON x.key = y.key")
110118
}
111119

112120
test("self join with group by") {
113-
checkHiveQl("SELECT x.key, COUNT(*) FROM t1 x JOIN t1 y ON x.key = y.key group by x.key")
121+
checkHiveQl(
122+
"SELECT x.key, COUNT(*) FROM parquet_t1 x JOIN parquet_t1 y ON x.key = y.key group by x.key")
114123
}
115124

116125
test("three-child union") {
117-
checkHiveQl("SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0")
126+
checkHiveQl(
127+
"""
128+
|SELECT id FROM parquet_t0
129+
|UNION ALL SELECT id FROM parquet_t0
130+
|UNION ALL SELECT id FROM parquet_t0
131+
""".stripMargin)
118132
}
119133

120134
test("case") {
121-
checkHiveQl("SELECT CASE WHEN id % 2 > 0 THEN 0 WHEN id % 2 = 0 THEN 1 END FROM t0")
135+
checkHiveQl("SELECT CASE WHEN id % 2 > 0 THEN 0 WHEN id % 2 = 0 THEN 1 END FROM parquet_t0")
122136
}
123137

124138
test("case with else") {
125-
checkHiveQl("SELECT CASE WHEN id % 2 > 0 THEN 0 ELSE 1 END FROM t0")
139+
checkHiveQl("SELECT CASE WHEN id % 2 > 0 THEN 0 ELSE 1 END FROM parquet_t0")
126140
}
127141

128142
test("case with key") {
129-
checkHiveQl("SELECT CASE id WHEN 0 THEN 'foo' WHEN 1 THEN 'bar' END FROM t0")
143+
checkHiveQl("SELECT CASE id WHEN 0 THEN 'foo' WHEN 1 THEN 'bar' END FROM parquet_t0")
130144
}
131145

132146
test("case with key and else") {
133-
checkHiveQl("SELECT CASE id WHEN 0 THEN 'foo' WHEN 1 THEN 'bar' ELSE 'baz' END FROM t0")
147+
checkHiveQl("SELECT CASE id WHEN 0 THEN 'foo' WHEN 1 THEN 'bar' ELSE 'baz' END FROM parquet_t0")
134148
}
135149

136150
test("select distinct without aggregate functions") {
137-
checkHiveQl("SELECT DISTINCT id FROM t0")
151+
checkHiveQl("SELECT DISTINCT id FROM parquet_t0")
138152
}
139153

140154
test("cluster by") {
141-
checkHiveQl("SELECT id FROM t0 CLUSTER BY id")
155+
checkHiveQl("SELECT id FROM parquet_t0 CLUSTER BY id")
142156
}
143157

144158
test("distribute by") {
145-
checkHiveQl("SELECT id FROM t0 DISTRIBUTE BY id")
159+
checkHiveQl("SELECT id FROM parquet_t0 DISTRIBUTE BY id")
146160
}
147161

148162
test("distribute by with sort by") {
149-
checkHiveQl("SELECT id FROM t0 DISTRIBUTE BY id SORT BY id")
163+
checkHiveQl("SELECT id FROM parquet_t0 DISTRIBUTE BY id SORT BY id")
150164
}
151165

152166
test("distinct aggregation") {
153-
checkHiveQl("SELECT COUNT(DISTINCT id) FROM t0")
167+
checkHiveQl("SELECT COUNT(DISTINCT id) FROM parquet_t0")
154168
}
155169

156170
test("TABLESAMPLE") {
157-
checkHiveQl("SELECT * FROM t0 TABLESAMPLE(100 PERCENT) s")
171+
// Project [id#2L]
172+
// +- Sample 0.0, 1.0, false, ...
173+
// +- Subquery s
174+
// +- Subquery parquet_t0
175+
// +- Relation[id#2L] ParquetRelation
176+
checkHiveQl("SELECT s.id FROM parquet_t0 TABLESAMPLE(100 PERCENT) s")
177+
178+
// Project [id#2L]
179+
// +- Sample 0.0, 1.0, false, ...
180+
// +- Subquery parquet_t0
181+
// +- Relation[id#2L] ParquetRelation
182+
checkHiveQl("SELECT * FROM parquet_t0 TABLESAMPLE(100 PERCENT)")
183+
184+
// Project [id#21L]
185+
// +- Sample 0.0, 1.0, false, ...
186+
// +- MetastoreRelation default, t0, Some(s)
187+
checkHiveQl("SELECT s.id FROM t0 TABLESAMPLE(100 PERCENT) s")
188+
189+
// Project [id#24L]
190+
// +- Sample 0.0, 1.0, false, ...
191+
// +- MetastoreRelation default, t0, None
158192
checkHiveQl("SELECT * FROM t0 TABLESAMPLE(100 PERCENT)")
193+
159194
// When a sampling fraction is not 100%, the returned results are random.
160195
// Thus, added an always-false filter here to check if the generated plan can be successfully
161196
// executed.
162-
checkHiveQl("SELECT s.id FROM t0 TABLESAMPLE(0.1 PERCENT) s WHERE 1=0")
163-
checkHiveQl("SELECT * FROM t0 TABLESAMPLE(0.1 PERCENT) WHERE 1=0")
197+
checkHiveQl("SELECT s.id FROM parquet_t0 TABLESAMPLE(0.1 PERCENT) s WHERE 1=0")
198+
checkHiveQl("SELECT * FROM parquet_t0 TABLESAMPLE(0.1 PERCENT) WHERE 1=0")
164199
}
165200

166201
// TODO Enable this
167202
// Query plans transformed by DistinctAggregationRewriter are not recognized yet
168203
ignore("multi-distinct columns") {
169-
checkHiveQl("SELECT a, COUNT(DISTINCT b), COUNT(DISTINCT c), SUM(d) FROM t2 GROUP BY a")
204+
checkHiveQl("SELECT a, COUNT(DISTINCT b), COUNT(DISTINCT c), SUM(d) FROM parquet_t2 GROUP BY a")
170205
}
171206

172207
test("persisted data source relations") {
173208
Seq("orc", "json", "parquet").foreach { format =>
174-
val tableName = s"${format}_t0"
209+
val tableName = s"${format}_parquet_t0"
175210
withTable(tableName) {
176211
sqlContext.range(10).write.format(format).saveAsTable(tableName)
177212
checkHiveQl(s"SELECT id FROM $tableName")

0 commit comments

Comments
 (0)