Skip to content

Commit

Permalink
Merge pull request #3 from yhuai/windowFunctionWIP
Browse files Browse the repository at this point in the history
Initial commit to add test.
  • Loading branch information
guowei2 committed Apr 24, 2015
2 parents b4fa747 + 4e8d08b commit cae7079
Show file tree
Hide file tree
Showing 9 changed files with 238 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
// Hive does not support buckets.
".*bucket.*",

// No window support yet
// We have our own tests based on these query files.
".*window.*",

// Fails in hive with authorization errors.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
catalog.CreateTables ::
catalog.PreInsertionCasts ::
ExtractPythonUdfs ::
ResolveUdtfsAlias ::
ResolveWindowUdaf ::
sources.PreInsertCastAndRename ::
Nil
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15
Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3
Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06
Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001
Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001
Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001
Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68
Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38
Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001
Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62
Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68
Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95
Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34
Manufacturer#3 almond antique misty red olive 1 4 4 6195.32
Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61
Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67
Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09
Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35
Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27
Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001
Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69
Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004
Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08
Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18
Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Manufacturer#1 almond antique burnished rose metallic 2 1173.15 4529.5 1173.15 1173.15 1509.8333333333333
Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 5943.92 1753.76 1753.76 1485.98
Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 7576.58 1602.59 1602.59 1515.316
Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 6403.43 1414.42 1414.42 1600.8575
Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 4649.67 1632.66 1632.66 1549.89
Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 5523.360000000001 1690.68 1690.68 1841.1200000000001
Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 7222.02 1800.7 1800.7 1805.505
Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 8923.62 2031.98 2031.98 1784.7240000000002
Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 7232.9400000000005 1698.66 1698.66 1808.2350000000001
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5432.24 1701.6 1701.6 1810.7466666666667
Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 4272.34 1671.68 1671.68 1424.1133333333335
Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 6195.32 1190.27 1190.27 1548.83
Manufacturer#3 almond antique metallic orange dim 19 1410.39 7532.61 1410.39 1410.39 1506.522
Manufacturer#3 almond antique misty red olive 1 1922.98 5860.929999999999 1922.98 1922.98 1465.2324999999998
Manufacturer#3 almond antique olive coral navajo 45 1337.29 4670.66 1337.29 1337.29 1556.8866666666665
Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 4202.35 1620.67 1620.67 1400.7833333333335
Manufacturer#4 almond antique violet mint lemon 39 1375.42 6047.27 1375.42 1375.42 1511.8175
Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 7337.620000000001 1206.26 1206.26 1467.5240000000001
Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 5716.950000000001 1844.92 1844.92 1429.2375000000002
Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 4341.530000000001 1290.35 1290.35 1447.176666666667
Manufacturer#5 almond antique blue firebrick mint 31 1789.69 5190.08 1789.69 1789.69 1730.0266666666666
Manufacturer#5 almond antique medium spring khaki 6 1611.66 6208.18 1611.66 1611.66 1552.045
Manufacturer#5 almond antique sky peru orange 2 1788.73 7672.66 1788.73 1788.73 1534.532
Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 5882.970000000001 1018.1 1018.1 1470.7425000000003
Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 4271.3099999999995 1464.48 1464.48 1423.7699999999998
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0
Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32
Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28
Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22
Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14
Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0
Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26
Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38
Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7
Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0
Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3
Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5
Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18
Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44
Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0
Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29
Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12
Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20
Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5
Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0
Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25
Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0
Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32
Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28
Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22
Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14
Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0
Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26
Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38
Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23
Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7
Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0
Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3
Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5
Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18
Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44
Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0
Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29
Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12
Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20
Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5
Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0
Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25
Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4
Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44
Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.execution

import java.io._

import org.apache.spark.sql.AnalysisException
import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen}

import org.apache.spark.Logging
Expand Down Expand Up @@ -300,7 +301,8 @@ abstract class HiveComparisonTest

val hiveQueries = queryList.map(new TestHive.HiveQLQueryExecution(_))
// Make sure we can at least parse everything before attempting hive execution.
hiveQueries.foreach(_.analyzed)
hiveQueries.foreach(_.logical)

val computedResults = (queryList.zipWithIndex, hiveQueries, hiveCacheFiles).zipped.map {
case ((queryString, i), hiveQuery, cachedAnswerFile)=>
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.hive.execution

import java.util.{Locale, TimeZone}

import org.apache.spark.sql.hive.test.TestHive
import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.util.Utils
import org.scalatest.BeforeAndAfter

/**
* The test suite for window functions. To actually compare results with Hive,
* every test should be created by `createQueryTest`. Because we are reusing tables
* for different tests and there are a few properties needed to let Hive generate golden
* files, every `createQueryTest` calls should explicitly set `reset` to `false`.
*/
class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfter {
private val originalTimeZone = TimeZone.getDefault
private val originalLocale = Locale.getDefault
private val testTempDir = Utils.createTempDir()
import org.apache.spark.sql.hive.test.TestHive.implicits._

override def beforeAll() {
TestHive.cacheTables = true
// Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
// Add Locale setting
Locale.setDefault(Locale.US)

// Create the table used in windowing.q
sql("DROP TABLE IF EXISTS part")
sql(
"""
|CREATE TABLE part(
| p_partkey INT,
| p_name STRING,
| p_mfgr STRING,
| p_brand STRING,
| p_type STRING,
| p_size INT,
| p_container STRING,
| p_retailprice DOUBLE,
| p_comment STRING)
""".stripMargin)
val testData = TestHive.getHiveFile("data/files/part_tiny.txt").getCanonicalPath
sql(
s"""
|LOAD DATA LOCAL INPATH '$testData' overwrite into table part
""".stripMargin)
// The following settings are used for generating golden files with Hive.
// We have to use kryo to correctly let Hive serialize plans with window functions.
// This is used to generate golden files.
sql("set hive.plan.serialization.format=kryo")
// Explicitly set fs to local fs.
sql(s"set fs.default.name=file://$testTempDir/")
//sql(s"set mapred.working.dir=${testTempDir}")
// Ask Hive to run jobs in-process as a single map and reduce task.
sql("set mapred.job.tracker=local")
}

override def afterAll() {
TestHive.cacheTables = false
TimeZone.setDefault(originalTimeZone)
Locale.setDefault(originalLocale)
TestHive.reset()
}

/////////////////////////////////////////////////////////////////////////////
// Tests from windowing.q
/////////////////////////////////////////////////////////////////////////////
createQueryTest("windowing.q -- 1. testWindowing",
s"""
|select p_mfgr, p_name, p_size,
|rank() over(distribute by p_mfgr sort by p_name) as r,
|dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
|sum(p_retailprice) over
|(distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1
|from part
""".stripMargin, reset = false)

createQueryTest("windowing.q -- 2. testGroupByWithPartitioning",
s"""
|select p_mfgr, p_name, p_size,
|min(p_retailprice),
|rank() over(distribute by p_mfgr sort by p_name)as r,
|dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
|p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
|from part
|group by p_mfgr, p_name, p_size
""".stripMargin, reset = false)

createQueryTest("windowing.q -- 19. testUDAFsWithGBY",
"""
|
|select p_mfgr,p_name, p_size, p_retailprice,
|sum(p_retailprice) over w1 as s,
|min(p_retailprice) as mi ,
|max(p_retailprice) as ma ,
|avg(p_retailprice) over w1 as ag
|from part
|group by p_mfgr,p_name, p_size, p_retailprice
|window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following);
|
""".stripMargin, reset = false)

createQueryTest("windowing.q -- 26. testGroupByHavingWithSWQAndAlias",
"""
|select p_mfgr, p_name, p_size, min(p_retailprice) as mi,
|rank() over(distribute by p_mfgr sort by p_name) as r,
|dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
|p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz
|from part
|group by p_mfgr, p_name, p_size
|having p_size > 0
""".stripMargin, reset = false)
}

0 comments on commit cae7079

Please sign in to comment.