Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
// Hive does not support buckets.
".*bucket.*",

// No window support yet
// We have our own tests based on these query files.
".*window.*",

// Fails in hive with authorization errors.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
catalog.CreateTables ::
catalog.PreInsertionCasts ::
ExtractPythonUdfs ::
ResolveUdtfsAlias ::
ResolveWindowUdaf ::
sources.PreInsertCastAndRename ::
Nil
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15
Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3
Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06
Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001
Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001
Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001
Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68
Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38
Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001
Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62
Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68
Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95
Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34
Manufacturer#3 almond antique misty red olive 1 4 4 6195.32
Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61
Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67
Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09
Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35
Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27
Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001
Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69
Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Manufacturer#1 almond antique burnished rose metallic 2 1173.15 4529.5 1173.15 1173.15 1509.8333333333333
Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 5943.92 1753.76 1753.76 1485.98
Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 7576.58 1602.59 1602.59 1515.316
Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 6403.43 1414.42 1414.42 1600.8575
Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 4649.67 1632.66 1632.66 1549.89
Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 5523.360000000001 1690.68 1690.68 1841.1200000000001
Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 7222.02 1800.7 1800.7 1805.505
Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 8923.62 2031.98 2031.98 1784.7240000000002
Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 7232.9400000000005 1698.66 1698.66 1808.2350000000001
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5432.24 1701.6 1701.6 1810.7466666666667
Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 4272.34 1671.68 1671.68 1424.1133333333335
Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 6195.32 1190.27 1190.27 1548.83
Manufacturer#3 almond antique metallic orange dim 19 1410.39 7532.61 1410.39 1410.39 1506.522
Manufacturer#3 almond antique misty red olive 1 1922.98 5860.929999999999 1922.98 1922.98 1465.2324999999998
Manufacturer#3 almond antique olive coral navajo 45 1337.29 4670.66 1337.29 1337.29 1556.8866666666665
Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 4202.35 1620.67 1620.67 1400.7833333333335
Manufacturer#4 almond antique violet mint lemon 39 1375.42 6047.27 1375.42 1375.42 1511.8175
Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 7337.620000000001 1206.26 1206.26 1467.5240000000001
Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 5716.950000000001 1844.92 1844.92 1429.2375000000002
Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 4341.530000000001 1290.35 1290.35 1447.176666666667
Manufacturer#5 almond antique blue firebrick mint 31 1789.69 5190.08 1789.69 1789.69 1730.0266666666666
Manufacturer#5 almond antique medium spring khaki 6 1611.66 6208.18 1611.66 1611.66 1552.045
Manufacturer#5 almond antique sky peru orange 2 1788.73 7672.66 1788.73 1788.73 1534.532
Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 5882.970000000001 1018.1 1018.1 1470.7425000000003
Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 4271.3099999999995 1464.48 1464.48 1423.7699999999998
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0
Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32
Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28
Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22
Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14
Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0
Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26
Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38
Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7
Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0
Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3
Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5
Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18
Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44
Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0
Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29
Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12
Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20
Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5
Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0
Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25
Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0
Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32
Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28
Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22
Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14
Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0
Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26
Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38
Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7
Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0
Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3
Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5
Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18
Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44
Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0
Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29
Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12
Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20
Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5
Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0
Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25
Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.execution

import java.io._

import org.apache.spark.sql.AnalysisException
import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen}

import org.apache.spark.Logging
Expand Down Expand Up @@ -300,7 +301,8 @@ abstract class HiveComparisonTest

val hiveQueries = queryList.map(new TestHive.HiveQLQueryExecution(_))
// Make sure we can at least parse everything before attempting hive execution.
hiveQueries.foreach(_.analyzed)
hiveQueries.foreach(_.logical)

val computedResults = (queryList.zipWithIndex, hiveQueries, hiveCacheFiles).zipped.map {
case ((queryString, i), hiveQuery, cachedAnswerFile)=>
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.hive.execution

import java.util.{Locale, TimeZone}

import org.apache.spark.sql.hive.test.TestHive
import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.util.Utils
import org.scalatest.BeforeAndAfter

/**
* The test suite for window functions. To actually compare results with Hive,
* every test should be created by `createQueryTest`. Because we are reusing tables
* for different tests and there are a few properties needed to let Hive generate golden
* files, every `createQueryTest` calls should explicitly set `reset` to `false`.
*/
class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfter {
private val originalTimeZone = TimeZone.getDefault
private val originalLocale = Locale.getDefault
private val testTempDir = Utils.createTempDir()
import org.apache.spark.sql.hive.test.TestHive.implicits._

override def beforeAll() {
TestHive.cacheTables = true
// Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
// Add Locale setting
Locale.setDefault(Locale.US)

// Create the table used in windowing.q
sql("DROP TABLE IF EXISTS part")
sql(
"""
|CREATE TABLE part(
| p_partkey INT,
| p_name STRING,
| p_mfgr STRING,
| p_brand STRING,
| p_type STRING,
| p_size INT,
| p_container STRING,
| p_retailprice DOUBLE,
| p_comment STRING)
""".stripMargin)
val testData = TestHive.getHiveFile("data/files/part_tiny.txt").getCanonicalPath
sql(
s"""
|LOAD DATA LOCAL INPATH '$testData' overwrite into table part
""".stripMargin)
// The following settings are used for generating golden files with Hive.
// We have to use kryo to correctly let Hive serialize plans with window functions.
// This is used to generate golden files.
sql("set hive.plan.serialization.format=kryo")
// Explicitly set fs to local fs.
sql(s"set fs.default.name=file://$testTempDir/")
//sql(s"set mapred.working.dir=${testTempDir}")
// Ask Hive to run jobs in-process as a single map and reduce task.
sql("set mapred.job.tracker=local")
}

override def afterAll() {
TestHive.cacheTables = false
TimeZone.setDefault(originalTimeZone)
Locale.setDefault(originalLocale)
TestHive.reset()
}

/////////////////////////////////////////////////////////////////////////////
// Tests from windowing.q
/////////////////////////////////////////////////////////////////////////////
createQueryTest("windowing.q -- 1. testWindowing",
s"""
|select p_mfgr, p_name, p_size,
|rank() over(distribute by p_mfgr sort by p_name) as r,
|dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
|sum(p_retailprice) over
|(distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1
|from part
""".stripMargin, reset = false)

createQueryTest("windowing.q -- 2. testGroupByWithPartitioning",
s"""
|select p_mfgr, p_name, p_size,
|min(p_retailprice),
|rank() over(distribute by p_mfgr sort by p_name)as r,
|dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
|p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
|from part
|group by p_mfgr, p_name, p_size
""".stripMargin, reset = false)

createQueryTest("windowing.q -- 19. testUDAFsWithGBY",
"""
|
|select p_mfgr,p_name, p_size, p_retailprice,
|sum(p_retailprice) over w1 as s,
|min(p_retailprice) as mi ,
|max(p_retailprice) as ma ,
|avg(p_retailprice) over w1 as ag
|from part
|group by p_mfgr,p_name, p_size, p_retailprice
|window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
|
""".stripMargin, reset = false)

createQueryTest("windowing.q -- 26. testGroupByHavingWithSWQAndAlias",
"""
|select p_mfgr, p_name, p_size, min(p_retailprice) as mi,
|rank() over(distribute by p_mfgr sort by p_name) as r,
|dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
|p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
|from part
|group by p_mfgr, p_name, p_size
|having p_size > 0
""".stripMargin, reset = false)
}