From 05da931f746ee660e54dcf0e53c8e37b6f25da58 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 7 Sep 2018 12:12:12 +0200 Subject: [PATCH] initial stab at record_batch --- r/NAMESPACE | 32 -------------------------------- r/R/RcppExports.R | 12 ++++++++++++ r/R/array.R | 14 ++++++++++++++ r/R/status.R | 3 --- r/src/RcppExports.cpp | 36 ++++++++++++++++++++++++++++++++++++ r/src/buffer.cpp | 24 ++++++++++++++++++++++++ 6 files changed, 86 insertions(+), 35 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 5e51cc36043e4..287fac83b4eaa 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -1,10 +1,5 @@ # Generated by roxygen2: do not edit by hand -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> Initial work for type metadata, with tests. S3method("!=","arrow::Array") S3method("!=","arrow::DataType") S3method("!=","arrow::Field") @@ -14,20 +9,6 @@ S3method("==","arrow::DataType") S3method("==","arrow::Field") S3method(length,"arrow::Array") S3method(print,"arrow-enum") -<<<<<<< HEAD -<<<<<<< HEAD -======= -export(DateUnit) -export(Field_initialize) -export(MakeArray) -export(StatusCode) -export(TimeUnit) -export(Type) -export(array_data) ->>>>>>> Initial work for type metadata, with tests. -======= -export(array) ->>>>>>> initial stab at arrow::array export(boolean) export(date32) export(date64) @@ -51,10 +32,6 @@ export(uint32) export(uint64) export(uint8) export(utf8) -<<<<<<< HEAD -======= -exportPattern("^.*$") ->>>>>>> Initial work for type metadata, with tests. importFrom(R6,R6Class) importFrom(Rcpp,sourceCpp) importFrom(assertthat,assert_that) @@ -67,13 +44,4 @@ importFrom(rlang,dots_n) importFrom(rlang,quo_name) importFrom(rlang,seq2) importFrom(rlang,set_names) -<<<<<<< HEAD -======= -importFrom(Rcpp,sourceCpp) ->>>>>>> initial R :package: with travis setup and testthat suite, that links to arrow c++ library and calls arrow::int32() -======= ->>>>>>> Initial work for type metadata, with tests. -======= -importFrom(Rcpp,sourceCpp) ->>>>>>> initial R :package: with travis setup and testthat suite, that links to arrow c++ library and calls arrow::int32() useDynLib(arrow, .registration = TRUE) diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R index 784654f4f5bcc..a9dc86c4020be 100644 --- a/r/R/RcppExports.R +++ b/r/R/RcppExports.R @@ -69,6 +69,18 @@ rvector_to_Array <- function(x) { .Call(`_arrow_rvector_to_Array`, x) } +dataframe_to_RecordBatch <- function(tbl) { + .Call(`_arrow_dataframe_to_RecordBatch`, tbl) +} + +RecordBatch_num_columns <- function(x) { + .Call(`_arrow_RecordBatch_num_columns`, x) +} + +RecordBatch_num_rows <- function(x) { + .Call(`_arrow_RecordBatch_num_rows`, x) +} + Field_initialize <- function(name, type, nullable = TRUE) { .Call(`_arrow_Field_initialize`, name, type, nullable) } diff --git a/r/R/array.R b/r/R/array.R index fd649c9dfcaa6..933b76247da4b 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -87,3 +87,17 @@ array <- function(...){ } +`arrow::RecordBatch` <- R6Class("arrow::RecordBatch", inherit = `arrow::Object`, + public = list( + initialize = function(.data){ + self$set_pointer(dataframe_to_RecordBatch(.data)) + }, + num_columns = function() RecordBatch_num_columns(self), + num_rows = function() RecordBatch_num_rows(self) + ) +) + +#' @export +record_batch <- function(.data){ + `arrow::RecordBatch`$new(.data) +} diff --git a/r/R/status.R b/r/R/status.R index 0a9eaaf5041af..a964164fb07ac 100644 --- a/r/R/status.R +++ b/r/R/status.R @@ -1,4 +1,3 @@ -<<<<<<< HEAD # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -16,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -======= ->>>>>>> Initial work for type metadata, with tests. #' @include R6.R Status <- R6Class("arrow::Status", diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp index a8f273ef1c568..c26e7351357c3 100644 --- a/r/src/RcppExports.cpp +++ b/r/src/RcppExports.cpp @@ -200,6 +200,39 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// dataframe_to_RecordBatch +std::shared_ptr dataframe_to_RecordBatch(DataFrame tbl); +RcppExport SEXP _arrow_dataframe_to_RecordBatch(SEXP tblSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< DataFrame >::type tbl(tblSEXP); + rcpp_result_gen = Rcpp::wrap(dataframe_to_RecordBatch(tbl)); + return rcpp_result_gen; +END_RCPP +} +// RecordBatch_num_columns +int RecordBatch_num_columns(const std::shared_ptr& x); +RcppExport SEXP _arrow_RecordBatch_num_columns(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(RecordBatch_num_columns(x)); + return rcpp_result_gen; +END_RCPP +} +// RecordBatch_num_rows +int RecordBatch_num_rows(const std::shared_ptr& x); +RcppExport SEXP _arrow_RecordBatch_num_rows(SEXP xSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type x(xSEXP); + rcpp_result_gen = Rcpp::wrap(RecordBatch_num_rows(x)); + return rcpp_result_gen; +END_RCPP +} // Field_initialize std::shared_ptr Field_initialize(const std::string& name, const std::shared_ptr& type, bool nullable); RcppExport SEXP _arrow_Field_initialize(SEXP nameSEXP, SEXP typeSEXP, SEXP nullableSEXP) { @@ -1502,6 +1535,9 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_Array_ApproxEquals", (DL_FUNC) &_arrow_Array_ApproxEquals, 2}, {"_arrow_Array_data", (DL_FUNC) &_arrow_Array_data, 1}, {"_arrow_rvector_to_Array", (DL_FUNC) &_arrow_rvector_to_Array, 1}, + {"_arrow_dataframe_to_RecordBatch", (DL_FUNC) &_arrow_dataframe_to_RecordBatch, 1}, + {"_arrow_RecordBatch_num_columns", (DL_FUNC) &_arrow_RecordBatch_num_columns, 1}, + {"_arrow_RecordBatch_num_rows", (DL_FUNC) &_arrow_RecordBatch_num_rows, 1}, {"_arrow_Field_initialize", (DL_FUNC) &_arrow_Field_initialize, 3}, {"_arrow_Field_ToString", (DL_FUNC) &_arrow_Field_ToString, 1}, {"_arrow_Field_name", (DL_FUNC) &_arrow_Field_name, 1}, diff --git a/r/src/buffer.cpp b/r/src/buffer.cpp index 229d4dd552685..c82af8fc9f018 100644 --- a/r/src/buffer.cpp +++ b/r/src/buffer.cpp @@ -80,5 +80,29 @@ std::shared_ptr rvector_to_Array(SEXP x){ return nullptr; } +// [[Rcpp::export]] +std::shared_ptr dataframe_to_RecordBatch(DataFrame tbl){ + CharacterVector names = tbl.names(); + + std::vector> fields; + std::vector> arrays; + + int nc = tbl.size(); + for(int i=0; i(std::string(names[i]), arrays[i]->type())); + } + auto schema = std::make_shared(std::move(fields)); + + return arrow::RecordBatch::Make(schema, tbl.nrow(), std::move(arrays)); +} +// [[Rcpp::export]] +int RecordBatch_num_columns(const std::shared_ptr& x){ + return x->num_columns(); +} +// [[Rcpp::export]] +int RecordBatch_num_rows(const std::shared_ptr& x){ + return x->num_rows(); +}