| 
 | 1 | +/*  | 
 | 2 | + * Licensed to the Apache Software Foundation (ASF) under one or more  | 
 | 3 | + * contributor license agreements.  See the NOTICE file distributed with  | 
 | 4 | + * this work for additional information regarding copyright ownership.  | 
 | 5 | + * The ASF licenses this file to You under the Apache License, Version 2.0  | 
 | 6 | + * (the "License"); you may not use this file except in compliance with  | 
 | 7 | + * the License.  You may obtain a copy of the License at  | 
 | 8 | + *  | 
 | 9 | + *    http://www.apache.org/licenses/LICENSE-2.0  | 
 | 10 | + *  | 
 | 11 | + * Unless required by applicable law or agreed to in writing, software  | 
 | 12 | + * distributed under the License is distributed on an "AS IS" BASIS,  | 
 | 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
 | 14 | + * See the License for the specific language governing permissions and  | 
 | 15 | + * limitations under the License.  | 
 | 16 | + */  | 
 | 17 | + | 
 | 18 | +package org.apache.spark.sql.hive.execution  | 
 | 19 | + | 
 | 20 | +import java.util.{Locale, TimeZone}  | 
 | 21 | + | 
 | 22 | +import org.apache.spark.sql.hive.test.TestHive  | 
 | 23 | +import org.apache.spark.sql.hive.test.TestHive._  | 
 | 24 | +import org.apache.spark.util.Utils  | 
 | 25 | +import org.scalatest.BeforeAndAfter  | 
 | 26 | + | 
 | 27 | +/**  | 
 | 28 | + * The test suite for window functions. To actually compare results with Hive,  | 
 | 29 | + * every test should be created by `createQueryTest`. Because we are reusing tables  | 
 | 30 | + * for different tests and there are a few properties needed to let Hive generate golden  | 
 | 31 | + * files, every `createQueryTest` calls should explicitly set `reset` to `false`.  | 
 | 32 | + */  | 
 | 33 | +class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfter {  | 
 | 34 | +  private val originalTimeZone = TimeZone.getDefault  | 
 | 35 | +  private val originalLocale = Locale.getDefault  | 
 | 36 | +  private val testTempDir = Utils.createTempDir()  | 
 | 37 | +  import org.apache.spark.sql.hive.test.TestHive.implicits._  | 
 | 38 | + | 
 | 39 | +  override def beforeAll() {  | 
 | 40 | +    TestHive.cacheTables = true  | 
 | 41 | +    // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)  | 
 | 42 | +    TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))  | 
 | 43 | +    // Add Locale setting  | 
 | 44 | +    Locale.setDefault(Locale.US)  | 
 | 45 | + | 
 | 46 | +    // Create the table used in windowing.q  | 
 | 47 | +    sql("DROP TABLE IF EXISTS part")  | 
 | 48 | +    sql(  | 
 | 49 | +      """  | 
 | 50 | +        |CREATE TABLE part(  | 
 | 51 | +        |  p_partkey INT,  | 
 | 52 | +        |  p_name STRING,  | 
 | 53 | +        |  p_mfgr STRING,  | 
 | 54 | +        |  p_brand STRING,  | 
 | 55 | +        |  p_type STRING,  | 
 | 56 | +        |  p_size INT,  | 
 | 57 | +        |  p_container STRING,  | 
 | 58 | +        |  p_retailprice DOUBLE,  | 
 | 59 | +        |  p_comment STRING)  | 
 | 60 | +      """.stripMargin)  | 
 | 61 | +    val testData = TestHive.getHiveFile("data/files/part_tiny.txt").getCanonicalPath  | 
 | 62 | +    sql(  | 
 | 63 | +      s"""  | 
 | 64 | +        |LOAD DATA LOCAL INPATH '$testData' overwrite into table part  | 
 | 65 | +      """.stripMargin)  | 
 | 66 | +    // The following settings are used for generating golden files with Hive.  | 
 | 67 | +    // We have to use kryo to correctly let Hive serialize plans with window functions.  | 
 | 68 | +    // This is used to generate golden files.  | 
 | 69 | +    sql("set hive.plan.serialization.format=kryo")  | 
 | 70 | +    // Explicitly set fs to local fs.  | 
 | 71 | +    sql(s"set fs.default.name=file://$testTempDir/")  | 
 | 72 | +    //sql(s"set mapred.working.dir=${testTempDir}")  | 
 | 73 | +    // Ask Hive to run jobs in-process as a single map and reduce task.  | 
 | 74 | +    sql("set mapred.job.tracker=local")  | 
 | 75 | +  }  | 
 | 76 | + | 
 | 77 | +  override def afterAll() {  | 
 | 78 | +    TestHive.cacheTables = false  | 
 | 79 | +    TimeZone.setDefault(originalTimeZone)  | 
 | 80 | +    Locale.setDefault(originalLocale)  | 
 | 81 | +    TestHive.reset()  | 
 | 82 | +  }  | 
 | 83 | + | 
 | 84 | +  /////////////////////////////////////////////////////////////////////////////  | 
 | 85 | +  // Tests from windowing.q  | 
 | 86 | +  /////////////////////////////////////////////////////////////////////////////  | 
 | 87 | +  createQueryTest("windowing.q -- 1. testWindowing",  | 
 | 88 | +    s"""  | 
 | 89 | +      |select p_mfgr, p_name, p_size,  | 
 | 90 | +      |rank() over(distribute by p_mfgr sort by p_name) as r,  | 
 | 91 | +      |dense_rank() over(distribute by p_mfgr sort by p_name) as dr,  | 
 | 92 | +      |sum(p_retailprice) over  | 
 | 93 | +      |(distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1  | 
 | 94 | +      |from part  | 
 | 95 | +    """.stripMargin, reset = false)  | 
 | 96 | + | 
 | 97 | +  createQueryTest("windowing.q -- 2. testGroupByWithPartitioning",  | 
 | 98 | +    s"""  | 
 | 99 | +      |select p_mfgr, p_name, p_size,  | 
 | 100 | +      |min(p_retailprice),  | 
 | 101 | +      |rank() over(distribute by p_mfgr sort by p_name)as r,  | 
 | 102 | +      |dense_rank() over(distribute by p_mfgr sort by p_name) as dr,  | 
 | 103 | +      |p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz  | 
 | 104 | +      |from part  | 
 | 105 | +      |group by p_mfgr, p_name, p_size  | 
 | 106 | +    """.stripMargin, reset = false)  | 
 | 107 | + | 
 | 108 | +  createQueryTest("windowing.q -- 19. testUDAFsWithGBY",  | 
 | 109 | +    """  | 
 | 110 | +      |  | 
 | 111 | +      |select  p_mfgr,p_name, p_size, p_retailprice,  | 
 | 112 | +      |sum(p_retailprice) over w1 as s,  | 
 | 113 | +      |min(p_retailprice) as mi ,  | 
 | 114 | +      |max(p_retailprice) as ma ,  | 
 | 115 | +      |avg(p_retailprice) over w1 as ag  | 
 | 116 | +      |from part  | 
 | 117 | +      |group by p_mfgr,p_name, p_size, p_retailprice  | 
 | 118 | +      |window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);  | 
 | 119 | +      |  | 
 | 120 | +    """.stripMargin, reset = false)  | 
 | 121 | + | 
 | 122 | +  createQueryTest("windowing.q -- 26. testGroupByHavingWithSWQAndAlias",  | 
 | 123 | +    """  | 
 | 124 | +      |select p_mfgr, p_name, p_size, min(p_retailprice) as mi,  | 
 | 125 | +      |rank() over(distribute by p_mfgr sort by p_name) as r,  | 
 | 126 | +      |dense_rank() over(distribute by p_mfgr sort by p_name) as dr,  | 
 | 127 | +      |p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz  | 
 | 128 | +      |from part  | 
 | 129 | +      |group by p_mfgr, p_name, p_size  | 
 | 130 | +      |having p_size > 0  | 
 | 131 | +    """.stripMargin, reset = false)  | 
 | 132 | +}  | 
0 commit comments