@@ -26,24 +26,32 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
2626 import testImplicits ._
2727
2828 protected override def beforeAll (): Unit = {
29+ sql(" DROP TABLE IF EXISTS parquet_t0" )
30+ sql(" DROP TABLE IF EXISTS parquet_t1" )
31+ sql(" DROP TABLE IF EXISTS parquet_t2" )
2932 sql(" DROP TABLE IF EXISTS t0" )
30- sql( " DROP TABLE IF EXISTS t1 " )
31- sql( " DROP TABLE IF EXISTS t2 " )
32- sqlContext.range( 10 ).write.saveAsTable( " t0 " )
33+
34+ sqlContext.range( 10 ).write.saveAsTable( " parquet_t0 " )
35+ sql( " CREATE TABLE t0 AS SELECT * FROM parquet_t0 " )
3336
3437 sqlContext
3538 .range(10 )
3639 .select(' id as ' key , concat(lit(" val_" ), ' id ) as ' value )
3740 .write
38- .saveAsTable(" t1 " )
41+ .saveAsTable(" parquet_t1 " )
3942
40- sqlContext.range(10 ).select(' id as ' a , ' id as ' b , ' id as ' c , ' id as ' d ).write.saveAsTable(" t2" )
43+ sqlContext
44+ .range(10 )
45+ .select(' id as ' a , ' id as ' b , ' id as ' c , ' id as ' d )
46+ .write
47+ .saveAsTable(" parquet_t2" )
4148 }
4249
4350 override protected def afterAll (): Unit = {
51+ sql(" DROP TABLE IF EXISTS parquet_t0" )
52+ sql(" DROP TABLE IF EXISTS parquet_t1" )
53+ sql(" DROP TABLE IF EXISTS parquet_t2" )
4454 sql(" DROP TABLE IF EXISTS t0" )
45- sql(" DROP TABLE IF EXISTS t1" )
46- sql(" DROP TABLE IF EXISTS t2" )
4755 }
4856
4957 private def checkHiveQl (hiveQl : String ): Unit = {
@@ -82,96 +90,123 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
8290 }
8391
8492 test(" in" ) {
85- checkHiveQl(" SELECT id FROM t0 WHERE id IN (1, 2, 3)" )
93+ checkHiveQl(" SELECT id FROM parquet_t0 WHERE id IN (1, 2, 3)" )
8694 }
8795
8896 test(" aggregate function in having clause" ) {
89- checkHiveQl(" SELECT COUNT(value) FROM t1 GROUP BY key HAVING MAX(key) > 0" )
97+ checkHiveQl(" SELECT COUNT(value) FROM parquet_t1 GROUP BY key HAVING MAX(key) > 0" )
9098 }
9199
92100 test(" aggregate function in order by clause" ) {
93- checkHiveQl(" SELECT COUNT(value) FROM t1 GROUP BY key ORDER BY MAX(key)" )
101+ checkHiveQl(" SELECT COUNT(value) FROM parquet_t1 GROUP BY key ORDER BY MAX(key)" )
94102 }
95103
96104 // When there are multiple aggregate functions in ORDER BY clause, all of them are extracted into
97105 // Aggregate operator and aliased to the same name "aggOrder". This is OK for normal query
98106 // execution since these aliases have different expression ID. But this introduces name collision
99107 // when converting resolved plans back to SQL query strings as expression IDs are stripped.
100108 test(" aggregate function in order by clause with multiple order keys" ) {
101- checkHiveQl(" SELECT COUNT(value) FROM t1 GROUP BY key ORDER BY key, MAX(key)" )
109+ checkHiveQl(" SELECT COUNT(value) FROM parquet_t1 GROUP BY key ORDER BY key, MAX(key)" )
102110 }
103111
104112 test(" type widening in union" ) {
105- checkHiveQl(" SELECT id FROM t0 UNION ALL SELECT CAST(id AS INT) AS id FROM t0 " )
113+ checkHiveQl(" SELECT id FROM parquet_t0 UNION ALL SELECT CAST(id AS INT) AS id FROM parquet_t0 " )
106114 }
107115
108116 test(" self join" ) {
109- checkHiveQl(" SELECT x.key FROM t1 x JOIN t1 y ON x.key = y.key" )
117+ checkHiveQl(" SELECT x.key FROM parquet_t1 x JOIN parquet_t1 y ON x.key = y.key" )
110118 }
111119
112120 test(" self join with group by" ) {
113- checkHiveQl(" SELECT x.key, COUNT(*) FROM t1 x JOIN t1 y ON x.key = y.key group by x.key" )
121+ checkHiveQl(
122+ " SELECT x.key, COUNT(*) FROM parquet_t1 x JOIN parquet_t1 y ON x.key = y.key group by x.key" )
114123 }
115124
116125 test(" three-child union" ) {
117- checkHiveQl(" SELECT id FROM t0 UNION ALL SELECT id FROM t0 UNION ALL SELECT id FROM t0" )
126+ checkHiveQl(
127+ """
128+ |SELECT id FROM parquet_t0
129+ |UNION ALL SELECT id FROM parquet_t0
130+ |UNION ALL SELECT id FROM parquet_t0
131+ """ .stripMargin)
118132 }
119133
120134 test(" case" ) {
121- checkHiveQl(" SELECT CASE WHEN id % 2 > 0 THEN 0 WHEN id % 2 = 0 THEN 1 END FROM t0 " )
135+ checkHiveQl(" SELECT CASE WHEN id % 2 > 0 THEN 0 WHEN id % 2 = 0 THEN 1 END FROM parquet_t0 " )
122136 }
123137
124138 test(" case with else" ) {
125- checkHiveQl(" SELECT CASE WHEN id % 2 > 0 THEN 0 ELSE 1 END FROM t0 " )
139+ checkHiveQl(" SELECT CASE WHEN id % 2 > 0 THEN 0 ELSE 1 END FROM parquet_t0 " )
126140 }
127141
128142 test(" case with key" ) {
129- checkHiveQl(" SELECT CASE id WHEN 0 THEN 'foo' WHEN 1 THEN 'bar' END FROM t0 " )
143+ checkHiveQl(" SELECT CASE id WHEN 0 THEN 'foo' WHEN 1 THEN 'bar' END FROM parquet_t0 " )
130144 }
131145
132146 test(" case with key and else" ) {
133- checkHiveQl(" SELECT CASE id WHEN 0 THEN 'foo' WHEN 1 THEN 'bar' ELSE 'baz' END FROM t0 " )
147+ checkHiveQl(" SELECT CASE id WHEN 0 THEN 'foo' WHEN 1 THEN 'bar' ELSE 'baz' END FROM parquet_t0 " )
134148 }
135149
136150 test(" select distinct without aggregate functions" ) {
137- checkHiveQl(" SELECT DISTINCT id FROM t0 " )
151+ checkHiveQl(" SELECT DISTINCT id FROM parquet_t0 " )
138152 }
139153
140154 test(" cluster by" ) {
141- checkHiveQl(" SELECT id FROM t0 CLUSTER BY id" )
155+ checkHiveQl(" SELECT id FROM parquet_t0 CLUSTER BY id" )
142156 }
143157
144158 test(" distribute by" ) {
145- checkHiveQl(" SELECT id FROM t0 DISTRIBUTE BY id" )
159+ checkHiveQl(" SELECT id FROM parquet_t0 DISTRIBUTE BY id" )
146160 }
147161
148162 test(" distribute by with sort by" ) {
149- checkHiveQl(" SELECT id FROM t0 DISTRIBUTE BY id SORT BY id" )
163+ checkHiveQl(" SELECT id FROM parquet_t0 DISTRIBUTE BY id SORT BY id" )
150164 }
151165
152166 test(" distinct aggregation" ) {
153- checkHiveQl(" SELECT COUNT(DISTINCT id) FROM t0 " )
167+ checkHiveQl(" SELECT COUNT(DISTINCT id) FROM parquet_t0 " )
154168 }
155169
156170 test(" TABLESAMPLE" ) {
157- checkHiveQl(" SELECT * FROM t0 TABLESAMPLE(100 PERCENT) s" )
171+ // Project [id#2L]
172+ // +- Sample 0.0, 1.0, false, ...
173+ // +- Subquery s
174+ // +- Subquery parquet_t0
175+ // +- Relation[id#2L] ParquetRelation
176+ checkHiveQl(" SELECT s.id FROM parquet_t0 TABLESAMPLE(100 PERCENT) s" )
177+
178+ // Project [id#2L]
179+ // +- Sample 0.0, 1.0, false, ...
180+ // +- Subquery parquet_t0
181+ // +- Relation[id#2L] ParquetRelation
182+ checkHiveQl(" SELECT * FROM parquet_t0 TABLESAMPLE(100 PERCENT)" )
183+
184+ // Project [id#21L]
185+ // +- Sample 0.0, 1.0, false, ...
186+ // +- MetastoreRelation default, t0, Some(s)
187+ checkHiveQl(" SELECT s.id FROM t0 TABLESAMPLE(100 PERCENT) s" )
188+
189+ // Project [id#24L]
190+ // +- Sample 0.0, 1.0, false, ...
191+ // +- MetastoreRelation default, t0, None
158192 checkHiveQl(" SELECT * FROM t0 TABLESAMPLE(100 PERCENT)" )
193+
159194 // When a sampling fraction is not 100%, the returned results are random.
160195 // Thus, added an always-false filter here to check if the generated plan can be successfully
161196 // executed.
162- checkHiveQl(" SELECT s.id FROM t0 TABLESAMPLE(0.1 PERCENT) s WHERE 1=0" )
163- checkHiveQl(" SELECT * FROM t0 TABLESAMPLE(0.1 PERCENT) WHERE 1=0" )
197+ checkHiveQl(" SELECT s.id FROM parquet_t0 TABLESAMPLE(0.1 PERCENT) s WHERE 1=0" )
198+ checkHiveQl(" SELECT * FROM parquet_t0 TABLESAMPLE(0.1 PERCENT) WHERE 1=0" )
164199 }
165200
166201 // TODO Enable this
167202 // Query plans transformed by DistinctAggregationRewriter are not recognized yet
168203 ignore(" multi-distinct columns" ) {
169- checkHiveQl(" SELECT a, COUNT(DISTINCT b), COUNT(DISTINCT c), SUM(d) FROM t2 GROUP BY a" )
204+ checkHiveQl(" SELECT a, COUNT(DISTINCT b), COUNT(DISTINCT c), SUM(d) FROM parquet_t2 GROUP BY a" )
170205 }
171206
172207 test(" persisted data source relations" ) {
173208 Seq (" orc" , " json" , " parquet" ).foreach { format =>
174- val tableName = s " ${format}_t0 "
209+ val tableName = s " ${format}_parquet_t0 "
175210 withTable(tableName) {
176211 sqlContext.range(10 ).write.format(format).saveAsTable(tableName)
177212 checkHiveQl(s " SELECT id FROM $tableName" )
0 commit comments