From f6084a88f0fe69111df8a016bc81c9884d3d3402 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sun, 24 Dec 2017 01:16:12 +0900 Subject: [PATCH 1/2] [HOTFIX] Fix Scala style checks ## What changes were proposed in this pull request? This PR fixes a style that broke the build. ## How was this patch tested? Manually tested. Author: hyukjinkwon Closes #20065 from HyukjinKwon/minor-style. --- .../org/apache/spark/examples/sql/hive/SparkHiveExample.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala index 51df5dd8e3600..b193bd595127c 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala @@ -135,7 +135,7 @@ object SparkHiveExample { hiveTableDF.coalesce(10).write.mode(SaveMode.Overwrite) .partitionBy("key").parquet(hiveExternalTableLocation) // $example off:spark_hive$ - + spark.stop() } } From aeb45df668a97a2d48cfd4079ed62601390979ba Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sun, 24 Dec 2017 01:18:11 +0900 Subject: [PATCH 2/2] [SPARK-22844][R] Adds date_trunc in R API ## What changes were proposed in this pull request? This PR adds `date_trunc` in R API as below: ```r > df <- createDataFrame(list(list(a = as.POSIXlt("2012-12-13 12:34:00")))) > head(select(df, date_trunc("hour", df$a))) date_trunc(hour, a) 1 2012-12-13 12:00:00 ``` ## How was this patch tested? Unit tests added in `R/pkg/tests/fulltests/test_sparkSQL.R`. Author: hyukjinkwon Closes #20031 from HyukjinKwon/r-datetrunc. --- R/pkg/NAMESPACE | 1 + R/pkg/R/functions.R | 34 +++++++++++++++++++++++---- R/pkg/R/generics.R | 5 ++++ R/pkg/tests/fulltests/test_sparkSQL.R | 3 +++ 4 files changed, 39 insertions(+), 4 deletions(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 57838f52eac3f..dce64e1e607c8 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -230,6 +230,7 @@ exportMethods("%<=>%", "date_add", "date_format", "date_sub", + "date_trunc", "datediff", "dayofmonth", "dayofweek", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 237ef061e8071..3a96f94e269f4 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -40,10 +40,17 @@ NULL #' #' @param x Column to compute on. In \code{window}, it must be a time Column of #' \code{TimestampType}. -#' @param format For \code{to_date} and \code{to_timestamp}, it is the string to use to parse -#' Column \code{x} to DateType or TimestampType. For \code{trunc}, it is the string -#' to use to specify the truncation method. For example, "year", "yyyy", "yy" for -#' truncate by year, or "month", "mon", "mm" for truncate by month. +#' @param format The format for the given dates or timestamps in Column \code{x}. See the +#' format used in the following methods: +#' \itemize{ +#' \item \code{to_date} and \code{to_timestamp}: it is the string to use to parse +#' Column \code{x} to DateType or TimestampType. +#' \item \code{trunc}: it is the string to use to specify the truncation method. +#' For example, "year", "yyyy", "yy" for truncate by year, or "month", "mon", +#' "mm" for truncate by month. +#' \item \code{date_trunc}: it is similar with \code{trunc}'s but additionally +#' supports "day", "dd", "second", "minute", "hour", "week" and "quarter". +#' } #' @param ... additional argument(s). #' @name column_datetime_functions #' @rdname column_datetime_functions @@ -3478,3 +3485,22 @@ setMethod("trunc", x@jc, as.character(format)) column(jc) }) + +#' @details +#' \code{date_trunc}: Returns timestamp truncated to the unit specified by the format. +#' +#' @rdname column_datetime_functions +#' @aliases date_trunc date_trunc,character,Column-method +#' @export +#' @examples +#' +#' \dontrun{ +#' head(select(df, df$time, date_trunc("hour", df$time), date_trunc("minute", df$time), +#' date_trunc("week", df$time), date_trunc("quarter", df$time)))} +#' @note date_trunc since 2.3.0 +setMethod("date_trunc", + signature(format = "character", x = "Column"), + function(format, x) { + jc <- callJStatic("org.apache.spark.sql.functions", "date_trunc", format, x@jc) + column(jc) + }) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 8fcf269087c7d..5ddaa669f9205 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1043,6 +1043,11 @@ setGeneric("date_format", function(y, x) { standardGeneric("date_format") }) #' @name NULL setGeneric("date_sub", function(y, x) { standardGeneric("date_sub") }) +#' @rdname column_datetime_functions +#' @export +#' @name NULL +setGeneric("date_trunc", function(format, x) { standardGeneric("date_trunc") }) + #' @rdname column_datetime_functions #' @export #' @name NULL diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index d87f5d2705732..6cc0188dae95f 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -1418,6 +1418,8 @@ test_that("column functions", { c22 <- not(c) c23 <- trunc(c, "year") + trunc(c, "yyyy") + trunc(c, "yy") + trunc(c, "month") + trunc(c, "mon") + trunc(c, "mm") + c24 <- date_trunc("hour", c) + date_trunc("minute", c) + date_trunc("week", c) + + date_trunc("quarter", c) # Test if base::is.nan() is exposed expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE)) @@ -1729,6 +1731,7 @@ test_that("date functions on a DataFrame", { expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0) expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0) expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"), "yyyy-MM-dd")))[1, 1], 0) + expect_equal(collect(select(df2, month(date_trunc("yyyy", df2$b))))[, 1], c(1, 1)) l3 <- list(list(a = 1000), list(a = -1000)) df3 <- createDataFrame(l3)