Skip to content

Commit cae7079

Browse files
committed
Merge pull request #3 from yhuai/windowFunctionWIP
Initial commit to add test.
2 parents b4fa747 + 4e8d08b commit cae7079

9 files changed

+238
-3
lines changed

sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
185185
// Hive does not support buckets.
186186
".*bucket.*",
187187

188-
// No window support yet
188+
// We have our own tests based on these query files.
189189
".*window.*",
190190

191191
// Fails in hive with authorization errors.

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
249249
catalog.CreateTables ::
250250
catalog.PreInsertionCasts ::
251251
ExtractPythonUdfs ::
252-
ResolveUdtfsAlias ::
253252
ResolveWindowUdaf ::
254253
sources.PreInsertCastAndRename ::
255254
Nil
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15
2+
Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3
3+
Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06
4+
Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001
5+
Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001
6+
Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001
7+
Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68
8+
Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38
9+
Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001
10+
Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02
11+
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62
12+
Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68
13+
Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95
14+
Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34
15+
Manufacturer#3 almond antique misty red olive 1 4 4 6195.32
16+
Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61
17+
Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67
18+
Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09
19+
Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35
20+
Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27
21+
Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001
22+
Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69
23+
Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004
24+
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
25+
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
26+
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
Manufacturer#1 almond antique burnished rose metallic 2 1173.15 4529.5 1173.15 1173.15 1509.8333333333333
2+
Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 5943.92 1753.76 1753.76 1485.98
3+
Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 7576.58 1602.59 1602.59 1515.316
4+
Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 6403.43 1414.42 1414.42 1600.8575
5+
Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 4649.67 1632.66 1632.66 1549.89
6+
Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 5523.360000000001 1690.68 1690.68 1841.1200000000001
7+
Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 7222.02 1800.7 1800.7 1805.505
8+
Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 8923.62 2031.98 2031.98 1784.7240000000002
9+
Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 7232.9400000000005 1698.66 1698.66 1808.2350000000001
10+
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5432.24 1701.6 1701.6 1810.7466666666667
11+
Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 4272.34 1671.68 1671.68 1424.1133333333335
12+
Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 6195.32 1190.27 1190.27 1548.83
13+
Manufacturer#3 almond antique metallic orange dim 19 1410.39 7532.61 1410.39 1410.39 1506.522
14+
Manufacturer#3 almond antique misty red olive 1 1922.98 5860.929999999999 1922.98 1922.98 1465.2324999999998
15+
Manufacturer#3 almond antique olive coral navajo 45 1337.29 4670.66 1337.29 1337.29 1556.8866666666665
16+
Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 4202.35 1620.67 1620.67 1400.7833333333335
17+
Manufacturer#4 almond antique violet mint lemon 39 1375.42 6047.27 1375.42 1375.42 1511.8175
18+
Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 7337.620000000001 1206.26 1206.26 1467.5240000000001
19+
Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 5716.950000000001 1844.92 1844.92 1429.2375000000002
20+
Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 4341.530000000001 1290.35 1290.35 1447.176666666667
21+
Manufacturer#5 almond antique blue firebrick mint 31 1789.69 5190.08 1789.69 1789.69 1730.0266666666666
22+
Manufacturer#5 almond antique medium spring khaki 6 1611.66 6208.18 1611.66 1611.66 1552.045
23+
Manufacturer#5 almond antique sky peru orange 2 1788.73 7672.66 1788.73 1788.73 1534.532
24+
Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 5882.970000000001 1018.1 1018.1 1470.7425000000003
25+
Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 4271.3099999999995 1464.48 1464.48 1423.7699999999998
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0
2+
Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32
3+
Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28
4+
Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22
5+
Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14
6+
Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0
7+
Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26
8+
Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38
9+
Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23
10+
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7
11+
Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0
12+
Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3
13+
Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5
14+
Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18
15+
Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44
16+
Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0
17+
Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29
18+
Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12
19+
Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20
20+
Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5
21+
Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0
22+
Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25
23+
Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4
24+
Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44
25+
Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0
2+
Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32
3+
Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28
4+
Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22
5+
Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14
6+
Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0
7+
Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26
8+
Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38
9+
Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23
10+
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7
11+
Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0
12+
Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3
13+
Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5
14+
Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18
15+
Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44
16+
Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0
17+
Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29
18+
Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12
19+
Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20
20+
Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5
21+
Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0
22+
Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25
23+
Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4
24+
Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44
25+
Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.execution
1919

2020
import java.io._
2121

22+
import org.apache.spark.sql.AnalysisException
2223
import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen}
2324

2425
import org.apache.spark.Logging
@@ -300,7 +301,8 @@ abstract class HiveComparisonTest
300301

301302
val hiveQueries = queryList.map(new TestHive.HiveQLQueryExecution(_))
302303
// Make sure we can at least parse everything before attempting hive execution.
303-
hiveQueries.foreach(_.analyzed)
304+
hiveQueries.foreach(_.logical)
305+
304306
val computedResults = (queryList.zipWithIndex, hiveQueries, hiveCacheFiles).zipped.map {
305307
case ((queryString, i), hiveQuery, cachedAnswerFile)=>
306308
try {
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.hive.execution
19+
20+
import java.util.{Locale, TimeZone}
21+
22+
import org.apache.spark.sql.hive.test.TestHive
23+
import org.apache.spark.sql.hive.test.TestHive._
24+
import org.apache.spark.util.Utils
25+
import org.scalatest.BeforeAndAfter
26+
27+
/**
28+
* The test suite for window functions. To actually compare results with Hive,
29+
* every test should be created by `createQueryTest`. Because we are reusing tables
30+
* for different tests and there are a few properties needed to let Hive generate golden
31+
* files, every `createQueryTest` calls should explicitly set `reset` to `false`.
32+
*/
33+
class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfter {
34+
private val originalTimeZone = TimeZone.getDefault
35+
private val originalLocale = Locale.getDefault
36+
private val testTempDir = Utils.createTempDir()
37+
import org.apache.spark.sql.hive.test.TestHive.implicits._
38+
39+
override def beforeAll() {
40+
TestHive.cacheTables = true
41+
// Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
42+
TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
43+
// Add Locale setting
44+
Locale.setDefault(Locale.US)
45+
46+
// Create the table used in windowing.q
47+
sql("DROP TABLE IF EXISTS part")
48+
sql(
49+
"""
50+
|CREATE TABLE part(
51+
| p_partkey INT,
52+
| p_name STRING,
53+
| p_mfgr STRING,
54+
| p_brand STRING,
55+
| p_type STRING,
56+
| p_size INT,
57+
| p_container STRING,
58+
| p_retailprice DOUBLE,
59+
| p_comment STRING)
60+
""".stripMargin)
61+
val testData = TestHive.getHiveFile("data/files/part_tiny.txt").getCanonicalPath
62+
sql(
63+
s"""
64+
|LOAD DATA LOCAL INPATH '$testData' overwrite into table part
65+
""".stripMargin)
66+
// The following settings are used for generating golden files with Hive.
67+
// We have to use kryo to correctly let Hive serialize plans with window functions.
68+
// This is used to generate golden files.
69+
sql("set hive.plan.serialization.format=kryo")
70+
// Explicitly set fs to local fs.
71+
sql(s"set fs.default.name=file://$testTempDir/")
72+
//sql(s"set mapred.working.dir=${testTempDir}")
73+
// Ask Hive to run jobs in-process as a single map and reduce task.
74+
sql("set mapred.job.tracker=local")
75+
}
76+
77+
override def afterAll() {
78+
TestHive.cacheTables = false
79+
TimeZone.setDefault(originalTimeZone)
80+
Locale.setDefault(originalLocale)
81+
TestHive.reset()
82+
}
83+
84+
/////////////////////////////////////////////////////////////////////////////
85+
// Tests from windowing.q
86+
/////////////////////////////////////////////////////////////////////////////
87+
createQueryTest("windowing.q -- 1. testWindowing",
88+
s"""
89+
|select p_mfgr, p_name, p_size,
90+
|rank() over(distribute by p_mfgr sort by p_name) as r,
91+
|dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
92+
|sum(p_retailprice) over
93+
|(distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1
94+
|from part
95+
""".stripMargin, reset = false)
96+
97+
createQueryTest("windowing.q -- 2. testGroupByWithPartitioning",
98+
s"""
99+
|select p_mfgr, p_name, p_size,
100+
|min(p_retailprice),
101+
|rank() over(distribute by p_mfgr sort by p_name)as r,
102+
|dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
103+
|p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
104+
|from part
105+
|group by p_mfgr, p_name, p_size
106+
""".stripMargin, reset = false)
107+
108+
createQueryTest("windowing.q -- 19. testUDAFsWithGBY",
109+
"""
110+
|
111+
|select p_mfgr,p_name, p_size, p_retailprice,
112+
|sum(p_retailprice) over w1 as s,
113+
|min(p_retailprice) as mi ,
114+
|max(p_retailprice) as ma ,
115+
|avg(p_retailprice) over w1 as ag
116+
|from part
117+
|group by p_mfgr,p_name, p_size, p_retailprice
118+
|window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
119+
|
120+
""".stripMargin, reset = false)
121+
122+
createQueryTest("windowing.q -- 26. testGroupByHavingWithSWQAndAlias",
123+
"""
124+
|select p_mfgr, p_name, p_size, min(p_retailprice) as mi,
125+
|rank() over(distribute by p_mfgr sort by p_name) as r,
126+
|dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
127+
|p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
128+
|from part
129+
|group by p_mfgr, p_name, p_size
130+
|having p_size > 0
131+
""".stripMargin, reset = false)
132+
}

0 commit comments

Comments
 (0)