diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 0cb529fb690f4..9f2e73813d3a1 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -24,3 +24,7 @@ members = [ "integration-testing", "benchmarks", ] +default-members= [ + "arrow", + "datafusion" +] diff --git a/rust/arrow-flight/Cargo.toml b/rust/arrow-flight/Cargo.toml index 1593a27ce3dd6..fc3208c07cfd4 100644 --- a/rust/arrow-flight/Cargo.toml +++ b/rust/arrow-flight/Cargo.toml @@ -31,7 +31,7 @@ bytes = "0.5" prost = "0.6" prost-derive = "0.6" tokio = {version = "0.2", features = ["macros"]} -futures = { version = "0.3", default-features = false, features = ["alloc"]} +futures = { version = "0.3.2", default-features = false, features = ["alloc"]} [build-dependencies] tonic-build = "0.2" diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml index dac331b44dd99..f441a9bfcb97c 100644 --- a/rust/arrow/Cargo.toml +++ b/rust/arrow/Cargo.toml @@ -39,17 +39,19 @@ path = "src/lib.rs" serde = { version = "1.0", features = ["rc"] } serde_derive = "1.0" serde_json = { version = "1.0", features = ["preserve_order"] } -indexmap = "1.4" -rand = "0.7" -csv = "1.1" num = "0.3" regex = "1.3" -lazy_static = "1.4" packed_simd = { version = "0.3", optional = true } chrono = "0.4" flatbuffers = "0.6" hex = "0.4" -arrow-flight = { path = "../arrow-flight", optional = true, version = "1.0.0-SNAPSHOT" } + +[target.'cfg(not(target_arch="wasm32"))'.dependencies] +indexmap = "1.3" +rand = "0.7" +csv = "1.1" +lazy_static = "1.4" +arrow-flight = { path = "../arrow-flight", optional = true } prettytable-rs = { version = "0.8.0", optional = true } [features] @@ -58,7 +60,7 @@ flight = ["arrow-flight"] prettyprint = ["prettytable-rs"] default = ["flight", "prettyprint"] -[dev-dependencies] +[target.'cfg(not(target_wasm="wasm32"))'.dev-dependencies] criterion = "0.3" lazy_static = "1" flate2 = "1" diff --git a/rust/arrow/examples/read_csv.rs b/rust/arrow/examples/read_csv.rs index f826028ce6555..e67d3eca8091e 100644 --- a/rust/arrow/examples/read_csv.rs +++ b/rust/arrow/examples/read_csv.rs @@ -20,12 +20,17 @@ extern crate arrow; use std::fs::File; use std::sync::Arc; +#[cfg(not(target_arch="wasm32"))] use arrow::csv; use arrow::datatypes::{DataType, Field, Schema}; use arrow::error::Result; -#[cfg(feature = "prettyprint")] +#[cfg(all(feature="prettyprint"), not(target_arch="wasm32"))] use arrow::util::pretty::print_batches; +#[cfg(target_arch="wasm32")] +fn main() {} + +#[cfg(not(target_arch="wasm32"))] fn main() -> Result<()> { let schema = Schema::new(vec![ Field::new("city", DataType::Utf8, false), diff --git a/rust/arrow/examples/read_csv_infer_schema.rs b/rust/arrow/examples/read_csv_infer_schema.rs index 6ddfcc277fee2..a1ae6cbebf273 100644 --- a/rust/arrow/examples/read_csv_infer_schema.rs +++ b/rust/arrow/examples/read_csv_infer_schema.rs @@ -17,12 +17,19 @@ extern crate arrow; +#[cfg(not(target_arch="wasm32"))] use arrow::csv; use arrow::error::Result; -#[cfg(feature = "prettyprint")] +#[cfg(all(feature="prettyprint", not(target_arch="wasm32")))] use arrow::util::pretty::print_batches; use std::fs::File; +#[cfg(target_arch="wasm32")] +fn main() { +} + + +#[cfg(not(target_arch="wasm32"))] fn main() -> Result<()> { let file = File::open("test/data/uk_cities_with_headers.csv").unwrap(); let builder = csv::ReaderBuilder::new() diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs index 79d3353af7e8e..3143ab74bedb6 100644 --- a/rust/arrow/src/array/array.rs +++ b/rust/arrow/src/array/array.rs @@ -23,6 +23,7 @@ use std::iter::{FromIterator, IntoIterator}; use std::mem; use std::sync::Arc; +#[cfg(not(target_arch="wasm32"))] use chrono::prelude::*; use super::*; @@ -502,6 +503,7 @@ impl PrimitiveArray { } } +#[cfg(not(target_arch="wasm32"))] impl PrimitiveArray where i64: std::convert::From, @@ -554,6 +556,7 @@ where /// Returns value as a chrono `NaiveDate` by using `Self::datetime()` /// /// If a data type cannot be converted to `NaiveDate`, a `None` is returned + #[cfg(not(target_arch="wasm32"))] pub fn value_as_date(&self, i: usize) -> Option { self.value_as_datetime(i).map(|datetime| datetime.date()) } @@ -561,6 +564,7 @@ where /// Returns a value as a chrono `NaiveTime` /// /// `Date32` and `Date64` return UTC midnight as they do not have time resolution + #[cfg(not(target_arch="wasm32"))] pub fn value_as_time(&self, i: usize) -> Option { match self.data_type() { DataType::Time32(unit) => { @@ -637,6 +641,7 @@ impl fmt::Debug for PrimitiveArray { } } +#[cfg(not(target_arch="wasm32"))] impl fmt::Debug for PrimitiveArray where i64: std::convert::From, @@ -2401,6 +2406,7 @@ mod tests { } #[test] + #[cfg(not(target_arch="wasm32"))] fn test_date64_array_from_vec_option() { // Test building a primitive array with null values // we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions @@ -2428,6 +2434,7 @@ mod tests { } #[test] + #[cfg(not(target_arch="wasm32"))] fn test_time32_millisecond_array_from_vec() { // 1: 00:00:00.001 // 37800005: 10:30:00.005 @@ -2448,6 +2455,7 @@ mod tests { } #[test] + #[cfg(not(target_arch="wasm32"))] fn test_time64_nanosecond_array_from_vec() { // Test building a primitive array with null values // we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index d95afd9c50bbd..f6bfa9a716d5d 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -269,7 +269,7 @@ impl> From for Buffer { } /// Helper function for SIMD `BitAnd` and `BitOr` implementations -#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] fn bitwise_bin_op_simd_helper(left: &Buffer, right: &Buffer, op: F) -> Buffer where F: Fn(u8x64, u8x64) -> u8x64, @@ -300,7 +300,7 @@ impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer { } // SIMD implementation if available - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] { return Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a & b)); } @@ -334,7 +334,7 @@ impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer { } // SIMD implementation if available - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] { return Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a | b)); } @@ -362,7 +362,7 @@ impl Not for &Buffer { fn not(self) -> Buffer { // SIMD implementation if available - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] { let mut result = MutableBuffer::new(self.len()).with_bitset(self.len(), false); diff --git a/rust/arrow/src/compute/kernels/arithmetic.rs b/rust/arrow/src/compute/kernels/arithmetic.rs index 94a66cd503ac3..c2ea543d00e24 100644 --- a/rust/arrow/src/compute/kernels/arithmetic.rs +++ b/rust/arrow/src/compute/kernels/arithmetic.rs @@ -38,7 +38,7 @@ use crate::buffer::Buffer; #[cfg(feature = "simd")] use crate::buffer::MutableBuffer; use crate::compute::util::apply_bin_op_to_option_bitmap; -#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] use crate::compute::util::simd_load_set_invalid; use crate::datatypes; use crate::datatypes::ToByteSlice; @@ -99,8 +99,8 @@ where } /// SIMD vectorized version of `math_op` above. -#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] -fn simd_math_op( +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] +pub fn simd_math_op( left: &PrimitiveArray, right: &PrimitiveArray, op: F, @@ -158,7 +158,7 @@ where /// SIMD vectorized version of `divide`, the divide kernel needs it's own implementation as there /// is a need to handle situations where a divide by `0` occurs. This is complicated by `NULL` /// slots and padding. -#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] fn simd_divide( left: &PrimitiveArray, right: &PrimitiveArray, @@ -234,7 +234,7 @@ where + Div + Zero, { - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] return simd_math_op(&left, &right, |a, b| a + b); #[allow(unreachable_code)] @@ -255,7 +255,7 @@ where + Div + Zero, { - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] return simd_math_op(&left, &right, |a, b| a - b); #[allow(unreachable_code)] @@ -276,7 +276,7 @@ where + Div + Zero, { - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] return simd_math_op(&left, &right, |a, b| a * b); #[allow(unreachable_code)] @@ -299,7 +299,7 @@ where + Zero + One, { - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] return simd_divide(&left, &right); #[allow(unreachable_code)] diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index 60c8c86d5e978..4e04f39fb1d1b 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -257,7 +257,7 @@ pub fn gt_eq_utf8_scalar(left: &StringArray, right: &str) -> Result( left: &PrimitiveArray, right: &PrimitiveArray, @@ -379,11 +379,11 @@ pub fn eq(left: &PrimitiveArray, right: &PrimitiveArray) -> Result(left: &PrimitiveArray, right: &PrimitiveArray) -> Result(left: &PrimitiveArray, right: &PrimitiveArray) -> Result( where T: ArrowNumericType, { - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] return simd_compare_op(left, right, T::le); #[cfg(any( - not(any(target_arch = "x86", target_arch = "x86_64")), + not(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32")), not(feature = "simd") ))] compare_op!(left, right, |a, b| a <= b) @@ -507,11 +507,11 @@ pub fn gt(left: &PrimitiveArray, right: &PrimitiveArray) -> Result b) @@ -542,11 +542,11 @@ pub fn gt_eq( where T: ArrowNumericType, { - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] return simd_compare_op(left, right, T::ge); #[cfg(any( - not(any(target_arch = "x86", target_arch = "x86_64")), + not(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32")), not(feature = "simd") ))] compare_op!(left, right, |a, b| a >= b) diff --git a/rust/arrow/src/compute/kernels/temporal.rs b/rust/arrow/src/compute/kernels/temporal.rs index 4319294e2de9a..a135d8c41ecb7 100644 --- a/rust/arrow/src/compute/kernels/temporal.rs +++ b/rust/arrow/src/compute/kernels/temporal.rs @@ -17,6 +17,7 @@ //! Defines temporal kernels for time and date related functions. +#[cfg(not(target_arch="wasm32"))] use chrono::Timelike; use crate::array::*; @@ -24,6 +25,7 @@ use crate::datatypes::*; use crate::error::Result; /// Extracts the hours of a given temporal array as an array of integers +#[cfg(not(target_arch="wasm32"))] pub fn hour(array: &PrimitiveArray) -> Result where T: ArrowTemporalType + ArrowNumericType, @@ -57,6 +59,7 @@ mod tests { use super::*; #[test] + #[cfg(not(target_arch="wasm32"))] fn test_temporal_array_date64_hour() { let a: PrimitiveArray = vec![Some(1514764800000), None, Some(1550636625000)].into(); @@ -69,6 +72,7 @@ mod tests { } #[test] + #[cfg(not(target_arch="wasm32"))] fn test_temporal_array_time32_second_hour() { let a: PrimitiveArray = vec![37800, 86339].into(); diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs index 05e7490f206e1..c57802e858aff 100644 --- a/rust/arrow/src/compute/util.rs +++ b/rust/arrow/src/compute/util.rs @@ -99,7 +99,7 @@ pub(super) fn take_value_indices_from_list( /// Lanes of the SIMD mask can be set to 'valid' (`true`) if the corresponding array slot is not /// `NULL`, as indicated by it's `Bitmap`, and is within the length of the array. Lanes outside the /// length represent padding and are set to 'invalid' (`false`). -#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] unsafe fn is_valid( bitmap: &Option, i: usize, @@ -137,7 +137,7 @@ where /// Note that `array` below has it's own `Bitmap` separate from the `bitmap` argument. This /// function is used to prepare `array`'s for binary operations. The `bitmap` argument is the /// `Bitmap` after the binary operation. -#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] pub(super) unsafe fn simd_load_set_invalid( array: &PrimitiveArray, bitmap: &Option, @@ -229,7 +229,7 @@ mod tests { } #[test] - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] fn test_is_valid() { let a = Int32Array::from(vec![ Some(15), @@ -257,7 +257,7 @@ mod tests { } #[test] - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] fn test_simd_load_set_invalid() { let a = Int64Array::from(vec![None, Some(15), Some(5), Some(0)]); let new_bitmap = &Some(Bitmap::from(Buffer::from([0b00001010]))); diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index df8e8e2a1d592..6905db92dc9f3 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -510,7 +510,7 @@ impl ArrowDictionaryKeyType for UInt64Type {} /// A subtype of primitive type that represents numeric values. /// /// SIMD operations are defined in this trait if available on the target system. -#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] pub trait ArrowNumericType: ArrowPrimitiveType where Self::Simd: Add @@ -584,14 +584,14 @@ where } #[cfg(any( - not(any(target_arch = "x86", target_arch = "x86_64")), + not(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32")), not(feature = "simd") ))] pub trait ArrowNumericType: ArrowPrimitiveType {} macro_rules! make_numeric_type { ($impl_ty:ty, $native_ty:ty, $simd_ty:ident, $simd_mask_ty:ident) => { - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] impl ArrowNumericType for $impl_ty { type Simd = $simd_ty; @@ -678,7 +678,7 @@ macro_rules! make_numeric_type { } } #[cfg(any( - not(any(target_arch = "x86", target_arch = "x86_64")), + not(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32")), not(feature = "simd") ))] impl ArrowNumericType for $impl_ty {} diff --git a/rust/arrow/src/error.rs b/rust/arrow/src/error.rs index f428b4f0f99e4..4269554b18f12 100644 --- a/rust/arrow/src/error.rs +++ b/rust/arrow/src/error.rs @@ -18,6 +18,7 @@ //! Defines `ArrowError` for representing failures in various Arrow operations. use std::fmt::{Debug, Display, Formatter}; +#[cfg(not(target_arch="wasm32"))] use csv as csv_crate; use std::error::Error; @@ -53,6 +54,7 @@ impl From<::std::io::Error> for ArrowError { } } +#[cfg(not(target_arch="wasm32"))] impl From for ArrowError { fn from(error: csv_crate::Error) -> Self { match error.kind() { diff --git a/rust/arrow/src/ipc/writer.rs b/rust/arrow/src/ipc/writer.rs index f47a565ebdd0e..0266fa6f03583 100644 --- a/rust/arrow/src/ipc/writer.rs +++ b/rust/arrow/src/ipc/writer.rs @@ -144,6 +144,7 @@ impl FileWriter { Ok(()) } + } /// Finish the file if it is not 'finished' when it goes out of scope @@ -165,6 +166,7 @@ pub struct StreamWriter { } impl StreamWriter { + /// Try create a new writer, with the schema written as part of the header pub fn try_new(writer: W, schema: &Schema) -> Result { let mut writer = BufWriter::new(writer); diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs index 10ac4dee78430..6ec5f3ebca116 100644 --- a/rust/arrow/src/lib.rs +++ b/rust/arrow/src/lib.rs @@ -31,10 +31,11 @@ pub mod array; pub mod bitmap; pub mod buffer; pub mod compute; +#[cfg(not(target_arch="wasm32"))] pub mod csv; pub mod datatypes; pub mod error; -#[cfg(feature = "flight")] +#[cfg(all(feature = "flight",not(target_arch="wasm32")))] pub mod flight; #[allow(clippy::redundant_closure)] #[allow(clippy::needless_lifetimes)] @@ -42,6 +43,7 @@ pub mod flight; #[allow(clippy::redundant_static_lifetimes)] #[allow(clippy::redundant_field_names)] pub mod ipc; +#[cfg(not(target_arch="wasm32"))] pub mod json; pub mod memory; pub mod record_batch; diff --git a/rust/arrow/src/util/bit_util.rs b/rust/arrow/src/util/bit_util.rs index a2ada2c03237d..2584e19207184 100644 --- a/rust/arrow/src/util/bit_util.rs +++ b/rust/arrow/src/util/bit_util.rs @@ -169,7 +169,7 @@ pub fn ceil(value: usize, divisor: usize) -> usize { /// Note that each slice should be 64 bytes and it is the callers responsibility to ensure /// that this is the case. If passed slices larger than 64 bytes the operation will only /// be performed on the first 64 bytes. Slices less than 64 bytes will panic. -#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] pub unsafe fn bitwise_bin_op_simd(left: &[u8], right: &[u8], result: &mut [u8], op: F) where F: Fn(u8x64, u8x64) -> u8x64, @@ -182,6 +182,7 @@ where #[cfg(test)] mod tests { + #[cfg(not(target_arch="wasm32"))] use rand::{thread_rng, Rng}; use std::collections::HashSet; @@ -225,6 +226,7 @@ mod tests { } #[test] + #[cfg(not(target_arch="wasm32"))] fn test_get_bit_raw() { const NUM_BYTE: usize = 10; let mut buf = vec![0; NUM_BYTE]; @@ -258,6 +260,7 @@ mod tests { } #[test] + #[cfg(not(target_arch="wasm32"))] fn test_set_bit_raw() { const NUM_BYTE: usize = 10; let mut buf = vec![0; NUM_BYTE]; @@ -282,6 +285,7 @@ mod tests { } #[test] + #[cfg(not(target_arch="wasm32"))] fn test_set_bits_raw() { const NUM_BYTE: usize = 64; const NUM_BLOCKS: usize = 12; @@ -313,6 +317,7 @@ mod tests { } #[test] + #[cfg(not(target_arch="wasm32"))] fn test_get_set_bit_roundtrip() { const NUM_BYTES: usize = 10; const NUM_SETS: usize = 10; @@ -366,13 +371,13 @@ mod tests { assert_eq!(ceil(8, 8), 1); assert_eq!(ceil(9, 8), 2); assert_eq!(ceil(9, 9), 1); - assert_eq!(ceil(10000000000, 10), 1000000000); - assert_eq!(ceil(10, 10000000000), 1); - assert_eq!(ceil(10000000000, 1000000000), 10); + assert_eq!(ceil(10000000, 10), 1000000); + assert_eq!(ceil(10, 10000000), 1); + assert_eq!(ceil(10000000, 1000000), 10); } #[test] - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] fn test_bitwise_and_simd() { let buf1 = [0b00110011u8; 64]; let buf2 = [0b11110000u8; 64]; @@ -384,7 +389,7 @@ mod tests { } #[test] - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64", target_arch="wasm32"), feature = "simd"))] fn test_bitwise_or_simd() { let buf1 = [0b00110011u8; 64]; let buf2 = [0b11110000u8; 64]; diff --git a/rust/arrow/src/util/mod.rs b/rust/arrow/src/util/mod.rs index f3bcc23a1d4a6..6d6dd8f352a92 100644 --- a/rust/arrow/src/util/mod.rs +++ b/rust/arrow/src/util/mod.rs @@ -16,7 +16,8 @@ // under the License. pub mod bit_util; -pub mod integration_util; +#[cfg(not(target_arch="wasm32"))] +pub(crate) mod integration_util; #[cfg(feature = "prettyprint")] pub mod pretty; pub mod string_writer; diff --git a/rust/arrow/src/util/pretty.rs b/rust/arrow/src/util/pretty.rs index 4d3c64408a70b..d7a4b137dad6a 100644 --- a/rust/arrow/src/util/pretty.rs +++ b/rust/arrow/src/util/pretty.rs @@ -21,23 +21,28 @@ use crate::array; use crate::datatypes::{DataType, TimeUnit}; use crate::record_batch::RecordBatch; +#[cfg(not(target_arch="wasm32"))] use prettytable::format; +#[cfg(not(target_arch="wasm32"))] use prettytable::{Cell, Row, Table}; use crate::error::{ArrowError, Result}; ///! Create a visual representation of record batches +#[cfg(not(target_arch="wasm32"))] pub fn pretty_format_batches(results: &[RecordBatch]) -> Result { Ok(create_table(results)?.to_string()) } ///! Prints a visual representation of record batches to stdout +#[cfg(not(target_arch="wasm32"))] pub fn print_batches(results: &[RecordBatch]) -> Result<()> { create_table(results)?.printstd(); Ok(()) } ///! Convert a series of record batches into a table +#[cfg(not(target_arch="wasm32"))] fn create_table(results: &[RecordBatch]) -> Result { let mut table = Table::new(); table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE); diff --git a/rust/arrow/src/util/test_util.rs b/rust/arrow/src/util/test_util.rs index 44f7074ee3ab9..02686ae5425fb 100644 --- a/rust/arrow/src/util/test_util.rs +++ b/rust/arrow/src/util/test_util.rs @@ -17,10 +17,13 @@ //! Utils to make testing easier + +#[cfg(not(target_arch="wasm32"))] use rand::{thread_rng, Rng}; use std::{env, fs, io::Write}; /// Returns a vector of size `n`, filled with randomly generated bytes. +#[cfg(not(target_arch="wasm32"))] pub fn random_bytes(n: usize) -> Vec { let mut result = vec![]; let mut rng = thread_rng(); diff --git a/rust/datafusion/Cargo.toml b/rust/datafusion/Cargo.toml index d31e0032b364a..5580736486b1a 100644 --- a/rust/datafusion/Cargo.toml +++ b/rust/datafusion/Cargo.toml @@ -46,18 +46,20 @@ cli = ["rustyline"] [dependencies] fnv = "1.0" arrow = { path = "../arrow", version = "1.0.0-SNAPSHOT" } -parquet = { path = "../parquet", version = "1.0.0-SNAPSHOT" } sqlparser = "0.2.6" -clap = "2.33" -prettytable-rs = "0.8.0" +paste = "0.1" + +[target.'cfg(not(target_arch="wasm32"))'.dependencies] +parquet = { path = "../parquet", version = "1.0.0-SNAPSHOT" } rustyline = {version = "6.0", optional = true} +prettytable-rs = "0.8.0" +clap = "2.33" crossbeam = "0.7" -paste = "0.1" + [dev-dependencies] criterion = "0.3" tempdir = "0.3" -futures = "0.3" prost = "0.6" tokio = { version = "0.2", features = ["macros"] } tonic = "0.2" diff --git a/rust/datafusion/src/bin/main.rs b/rust/datafusion/src/bin/main.rs index deb5b796b2d69..6364af306ae56 100644 --- a/rust/datafusion/src/bin/main.rs +++ b/rust/datafusion/src/bin/main.rs @@ -16,10 +16,10 @@ // under the License. // Only bring in dependencies for the repl when the cli feature is enabled. -#[cfg(feature = "cli")] +#[cfg(all(feature = "cli", not(target_arch="wasm32")))] mod repl; pub fn main() { - #[cfg(feature = "cli")] + #[cfg(all(feature = "cli", not(target_arch="wasm32")))] repl::main() } diff --git a/rust/datafusion/src/bin/repl.rs b/rust/datafusion/src/bin/repl.rs index 4a8464b600e0a..0234094f5b244 100644 --- a/rust/datafusion/src/bin/repl.rs +++ b/rust/datafusion/src/bin/repl.rs @@ -17,15 +17,29 @@ #![allow(bare_trait_objects)] +#[cfg(not(target_arch="wasm32"))] use arrow::util::pretty; +#[cfg(not(target_arch="wasm32"))] use clap::{crate_version, App, Arg}; +#[cfg(not(target_arch="wasm32"))] use datafusion::error::Result; +#[cfg(not(target_arch="wasm32"))] use datafusion::execution::context::ExecutionContext; +#[cfg(not(target_arch="wasm32"))] use rustyline::Editor; +#[cfg(not(target_arch="wasm32"))] use std::env; +#[cfg(not(target_arch="wasm32"))] use std::path::Path; +#[cfg(not(target_arch="wasm32"))] use std::time::Instant; + +#[cfg(target_arch="wasm32")] +pub fn main() { +} + +#[cfg(not(target_arch="wasm32"))] pub fn main() { let matches = App::new("DataFusion") .version(crate_version!()) @@ -94,11 +108,13 @@ pub fn main() { rl.save_history(".history").ok(); } +#[cfg(not(target_arch="wasm32"))] fn is_exit_command(line: &str) -> bool { let line = line.trim_end().to_lowercase(); line == "quit" || line == "exit" } +#[cfg(not(target_arch="wasm32"))] fn exec_and_print( ctx: &mut ExecutionContext, sql: String, diff --git a/rust/datafusion/src/datasource/mod.rs b/rust/datafusion/src/datasource/mod.rs index 7ecc4a18d6dfb..7167a36e7fb64 100644 --- a/rust/datafusion/src/datasource/mod.rs +++ b/rust/datafusion/src/datasource/mod.rs @@ -17,11 +17,16 @@ //! DataFusion data sources +#[cfg(not(target_arch="wasm32"))] pub mod csv; + pub mod datasource; pub mod memory; + +#[cfg(not(target_arch="wasm32"))] pub mod parquet; +#[cfg(not(target_arch="wasm32"))] pub use self::csv::{CsvBatchIterator, CsvFile, CsvReadOptions}; pub use self::datasource::{ScanResult, TableProvider}; pub use self::memory::MemTable; diff --git a/rust/datafusion/src/error.rs b/rust/datafusion/src/error.rs index 4ecb30c92ca42..71ede2dca4d5a 100644 --- a/rust/datafusion/src/error.rs +++ b/rust/datafusion/src/error.rs @@ -23,6 +23,8 @@ use std::io::Error; use std::result; use arrow::error::ArrowError; + +#[cfg(not(target_arch="wasm32"))] use parquet::errors::ParquetError; use sqlparser::sqlparser::ParserError; @@ -37,6 +39,7 @@ pub enum ExecutionError { /// Wraps an error from the Arrow crate ArrowError(ArrowError), /// Wraps an error from the Parquet crate + #[cfg(not(target_arch="wasm32"))] ParquetError(ParquetError), /// I/O error IoError(Error), @@ -85,6 +88,7 @@ impl From for ExecutionError { } } +#[cfg(not(target_arch="wasm32"))] impl From for ExecutionError { fn from(e: ParquetError) -> Self { ExecutionError::ParquetError(e) @@ -101,6 +105,7 @@ impl Display for ExecutionError { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match *self { ExecutionError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc), + #[cfg(not(target_arch="wasm32"))] ExecutionError::ParquetError(ref desc) => { write!(f, "Parquet error: {}", desc) } diff --git a/rust/datafusion/src/execution/context.rs b/rust/datafusion/src/execution/context.rs index 29568b7de330f..3962b9c54449c 100644 --- a/rust/datafusion/src/execution/context.rs +++ b/rust/datafusion/src/execution/context.rs @@ -24,15 +24,19 @@ use std::string::String; use std::sync::Arc; use std::thread::{self, JoinHandle}; +#[cfg(not(target_arch="wasm32"))] use arrow::csv; use arrow::datatypes::*; use arrow::record_batch::RecordBatch; +#[cfg(not(target_arch="wasm32"))] use crate::datasource::csv::CsvFile; +#[cfg(not(target_arch="wasm32"))] use crate::datasource::parquet::ParquetTable; use crate::datasource::TableProvider; use crate::error::{ExecutionError, Result}; use crate::execution::physical_plan::common; +#[cfg(not(target_arch="wasm32"))] use crate::execution::physical_plan::csv::{CsvExec, CsvReadOptions}; use crate::execution::physical_plan::datasource::DatasourceExec; use crate::execution::physical_plan::expressions::{ @@ -44,6 +48,7 @@ use crate::execution::physical_plan::limit::LimitExec; use crate::execution::physical_plan::math_expressions::register_math_functions; use crate::execution::physical_plan::memory::MemoryExec; use crate::execution::physical_plan::merge::MergeExec; +#[cfg(not(target_arch="wasm32"))] use crate::execution::physical_plan::parquet::ParquetExec; use crate::execution::physical_plan::projection::ProjectionExec; use crate::execution::physical_plan::selection::SelectionExec; @@ -61,6 +66,7 @@ use crate::sql::planner::{SchemaProvider, SqlToRel}; use crate::table::Table; use sqlparser::sqlast::{SQLColumnDef, SQLType}; + /// Execution context for registering data sources and executing queries pub struct ExecutionContext { datasources: HashMap>, @@ -101,6 +107,7 @@ impl ExecutionContext { ref file_type, ref has_header, } => match file_type { + #[cfg(not(target_arch="wasm32"))] FileType::CSV => { self.register_csv( name, @@ -111,6 +118,7 @@ impl ExecutionContext { )?; Ok(vec![]) } + #[cfg(not(target_arch="wasm32"))] FileType::Parquet => { self.register_parquet(name, location)?; Ok(vec![]) @@ -216,8 +224,10 @@ impl ExecutionContext { ))), } } - + + /// Register a CSV file as a table so that it can be queried from SQL + #[cfg(not(target_arch="wasm32"))] pub fn register_csv( &mut self, name: &str, @@ -229,6 +239,7 @@ impl ExecutionContext { } /// Register a Parquet file as a table so that it can be queried from SQL + #[cfg(not(target_arch="wasm32"))] pub fn register_parquet(&mut self, name: &str, filename: &str) -> Result<()> { let table = ParquetTable::try_new(&filename)?; self.register_table(name, Box::new(table)); @@ -328,6 +339,7 @@ impl ExecutionContext { Arc::new(projected_schema.as_ref().to_owned()), projection.to_owned(), )?)), + #[cfg(not(target_arch="wasm32"))] LogicalPlan::CsvScan { path, schema, @@ -344,6 +356,7 @@ impl ExecutionContext { projection.to_owned(), batch_size, )?)), + #[cfg(not(target_arch="wasm32"))] LogicalPlan::ParquetScan { path, projection, .. } => Ok(Arc::new(ParquetExec::try_new( @@ -579,6 +592,7 @@ impl ExecutionContext { } /// Execute a query and write the results to a partitioned CSV file + #[cfg(not(target_arch="wasm32"))] pub fn write_csv(&self, plan: &dyn ExecutionPlan, path: &str) -> Result<()> { // create directory to contain the CSV files (one per partition) let path = path.to_string(); @@ -1174,6 +1188,7 @@ mod tests { } /// Execute SQL and write results to partitioned csv files + #[cfg(not(target_arch="wasm32"))] fn write_csv(ctx: &mut ExecutionContext, sql: &str, out_dir: &str) -> Result<()> { let logical_plan = ctx.create_logical_plan(sql)?; let logical_plan = ctx.optimize(&logical_plan)?; diff --git a/rust/datafusion/src/execution/physical_plan/mod.rs b/rust/datafusion/src/execution/physical_plan/mod.rs index a3b32eb80cc2e..293fc5d10828a 100644 --- a/rust/datafusion/src/execution/physical_plan/mod.rs +++ b/rust/datafusion/src/execution/physical_plan/mod.rs @@ -88,6 +88,7 @@ pub trait Accumulator { } pub mod common; +#[cfg(not(target_arch="wasm32"))] pub mod csv; pub mod datasource; pub mod expressions; @@ -96,6 +97,7 @@ pub mod limit; pub mod math_expressions; pub mod memory; pub mod merge; +#[cfg(not(target_arch="wasm32"))] pub mod parquet; pub mod projection; pub mod selection; diff --git a/rust/datafusion/src/logicalplan.rs b/rust/datafusion/src/logicalplan.rs index fe711eeadcab3..b1818f3a448ed 100644 --- a/rust/datafusion/src/logicalplan.rs +++ b/rust/datafusion/src/logicalplan.rs @@ -25,7 +25,9 @@ use std::fmt; use arrow::datatypes::{DataType, Field, Schema}; +#[cfg(not(target_arch="wasm32"))] use crate::datasource::csv::{CsvFile, CsvReadOptions}; +#[cfg(not(target_arch="wasm32"))] use crate::datasource::parquet::ParquetTable; use crate::datasource::TableProvider; use crate::error::{ExecutionError, Result}; @@ -803,6 +805,7 @@ impl LogicalPlanBuilder { } /// Scan a CSV data source + #[cfg(not(target_arch="wasm32"))] pub fn scan_csv( path: &str, options: CsvReadOptions, @@ -839,6 +842,7 @@ impl LogicalPlanBuilder { } /// Scan a Parquet data source + #[cfg(not(target_arch="wasm32"))] pub fn scan_parquet(path: &str, projection: Option>) -> Result { let p = ParquetTable::try_new(path)?; let schema = p.schema().as_ref().to_owned();