Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update for JSON schema inference #1

Merged
merged 24 commits into from
Nov 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
1f07735
bump .github/workflows/miri.yaml (#875)
jimexist Oct 27, 2021
c7cf8f7
feat(ipc): Support writing dictionaries nested in structs and unions …
helgikrs Oct 29, 2021
898924f
Remove unpassable cargo publish check from verify-release-candidate.s…
alamb Oct 29, 2021
2310936
Use different caching for MIRI runs (#892)
alamb Oct 31, 2021
f4fdc9c
2018 -> 2021 (#591)
jimexist Nov 1, 2021
81ffa24
allow null array to be cased to all other types (#884)
jimexist Nov 1, 2021
2a0a4c3
portable check for shasums (#887)
bkmgit Nov 1, 2021
f0451bb
fix ffi warning on failed to drop (#893)
jimexist Nov 1, 2021
43d1c85
fix some warning about unused variables in panic tests (#894)
jimexist Nov 1, 2021
06f730e
casting kernel can combine multi-match patterns (#883)
jimexist Nov 1, 2021
a7547d5
test moving out (#895)
jimexist Nov 1, 2021
4666b5f
Fix instances of UB that cause tests to not pass under miri (#878)
saethlin Nov 1, 2021
0892595
Disable cargo / build caching for MIRI runs (#899)
alamb Nov 1, 2021
2cf3178
Fix clippy (#900)
alamb Nov 1, 2021
b79c600
fix some clippy warnings (#896)
jimexist Nov 2, 2021
1d3d5e3
Bump deps (#864)
Nov 2, 2021
67af0d0
Update mod.rs (#909)
kingeasternsun Nov 3, 2021
d5a4bd7
doc example mistype (#904)
kingeasternsun Nov 3, 2021
bb05b00
Mark boolean kernels public (#913)
alamb Nov 4, 2021
62934e9
Automatically retry failed MIRI runs to work around intermittent fail…
alamb Nov 6, 2021
74b520c
Validate arguments to ArrayData::new and null bit buffer and buffers …
alamb Nov 8, 2021
e20d3fa
feat(ipc): add support for deserializing messages with nested diction…
helgikrs Nov 8, 2021
5d817af
Merge remote-tracking branch 'apache/master' into brianrackle_master
alamb Nov 9, 2021
815ea72
Add test case for 2. and issue link
alamb Nov 9, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/workflows/miri.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash
#
# Script
#
# Must be run with nightly rust for example
# rustup default nightly


export MIRIFLAGS="-Zmiri-disable-isolation"
cargo miri setup
cargo clean

run_miri() {
# Currently only the arrow crate is tested with miri
# IO related tests and some unsupported tests are skipped
cargo miri test -p arrow -- --skip csv --skip ipc --skip json
}

# If MIRI fails, automatically retry
# Seems like miri is occasionally killed by the github runner
# https://github.com/apache/arrow-rs/issues/879
for i in `seq 1 5`; do
echo "Starting Arrow MIRI run..."
run_miri && break
echo "foo" > /tmp/data.txt
done
19 changes: 3 additions & 16 deletions .github/workflows/miri.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,17 @@ on:
pull_request:

jobs:

miri-checks:
name: MIRI
runs-on: ubuntu-latest
strategy:
matrix:
arch: [amd64]
rust: [nightly-2021-07-04]
rust: [nightly-2021-10-23]
steps:
- uses: actions/checkout@v2
with:
submodules: true
- uses: actions/cache@v2
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-miri-${{ hashFiles('**/Cargo.lock') }}
- name: Setup Rust toolchain
run: |
rustup toolchain install ${{ matrix.rust }}
Expand All @@ -50,11 +42,6 @@ jobs:
- name: Run Miri Checks
env:
RUST_BACKTRACE: full
RUST_LOG: 'trace'
RUST_LOG: "trace"
run: |
export MIRIFLAGS="-Zmiri-disable-isolation"
cargo miri setup
cargo clean
# Currently only the arrow crate is tested with miri
# IO related tests and some unsupported tests are skipped
cargo miri test -p arrow -- --skip csv --skip ipc --skip json
bash .github/workflows/miri.sh
7 changes: 7 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,11 @@ jobs:
- uses: actions/checkout@v2
with:
submodules: true
- name: Setup Rust toolchain
run: |
rustup toolchain install ${{ matrix.rust }}
rustup default ${{ matrix.rust }}
rustup component add rustfmt clippy
- name: Cache Cargo
uses: actions/cache@v2
with:
Expand All @@ -287,6 +292,8 @@ jobs:
export ARROW_TEST_DATA=$(pwd)/testing/data
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data

rustup toolchain install stable
rustup default stable
cargo install --version 0.18.2 cargo-tarpaulin
cargo tarpaulin --all --out Xml
- name: Report coverage
Expand Down
13 changes: 7 additions & 6 deletions arrow-flight/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
name = "arrow-flight"
description = "Apache Arrow Flight"
version = "7.0.0-SNAPSHOT"
edition = "2018"
edition = "2021"
rust-version = "1.56"
authors = ["Apache Arrow <[email protected]>"]
homepage = "https://github.com/apache/arrow-rs"
repository = "https://github.com/apache/arrow-rs"
Expand All @@ -28,20 +29,20 @@ license = "Apache-2.0"
[dependencies]
arrow = { path = "../arrow", version = "7.0.0-SNAPSHOT" }
base64 = "0.13"
tonic = "0.5"
tonic = "0.6"
bytes = "1"
prost = "0.8"
prost-derive = "0.8"
prost = "0.9"
prost-derive = "0.9"
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }

[dev-dependencies]
futures = { version = "0.3", default-features = false, features = ["alloc"]}

[build-dependencies]
tonic-build = "0.5"
tonic-build = "0.6"
# Pin specific version of the tonic-build dependencies to avoid auto-generated
# (and checked in) arrow.flight.protocol.rs from changing
proc-macro2 = "=1.0.27"
proc-macro2 = "=1.0.30"

#[lib]
#name = "flight"
Expand Down
11 changes: 2 additions & 9 deletions arrow-flight/src/arrow.flight.protocol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ pub mod flight_service_client {
impl<T> FlightServiceClient<T>
where
T: tonic::client::GrpcService<tonic::body::BoxBody>,
T::ResponseBody: Body + Send + Sync + 'static,
T::ResponseBody: Body + Send + 'static,
T::Error: Into<StdError>,
<T::ResponseBody as Body>::Error: Into<StdError> + Send,
{
Expand Down Expand Up @@ -513,7 +513,6 @@ pub mod flight_service_server {
#[doc = "Server streaming response type for the Handshake method."]
type HandshakeStream: futures_core::Stream<Item = Result<super::HandshakeResponse, tonic::Status>>
+ Send
+ Sync
+ 'static;
#[doc = ""]
#[doc = " Handshake between client and server. Depending on the server, the"]
Expand All @@ -527,7 +526,6 @@ pub mod flight_service_server {
#[doc = "Server streaming response type for the ListFlights method."]
type ListFlightsStream: futures_core::Stream<Item = Result<super::FlightInfo, tonic::Status>>
+ Send
+ Sync
+ 'static;
#[doc = ""]
#[doc = " Get a list of available streams given a particular criteria. Most flight"]
Expand Down Expand Up @@ -567,7 +565,6 @@ pub mod flight_service_server {
#[doc = "Server streaming response type for the DoGet method."]
type DoGetStream: futures_core::Stream<Item = Result<super::FlightData, tonic::Status>>
+ Send
+ Sync
+ 'static;
#[doc = ""]
#[doc = " Retrieve a single stream associated with a particular descriptor"]
Expand All @@ -581,7 +578,6 @@ pub mod flight_service_server {
#[doc = "Server streaming response type for the DoPut method."]
type DoPutStream: futures_core::Stream<Item = Result<super::PutResult, tonic::Status>>
+ Send
+ Sync
+ 'static;
#[doc = ""]
#[doc = " Push a stream to the flight service associated with a particular"]
Expand All @@ -597,7 +593,6 @@ pub mod flight_service_server {
#[doc = "Server streaming response type for the DoExchange method."]
type DoExchangeStream: futures_core::Stream<Item = Result<super::FlightData, tonic::Status>>
+ Send
+ Sync
+ 'static;
#[doc = ""]
#[doc = " Open a bidirectional data channel for a given descriptor. This"]
Expand All @@ -612,7 +607,6 @@ pub mod flight_service_server {
#[doc = "Server streaming response type for the DoAction method."]
type DoActionStream: futures_core::Stream<Item = Result<super::Result, tonic::Status>>
+ Send
+ Sync
+ 'static;
#[doc = ""]
#[doc = " Flight services can support an arbitrary number of simple actions in"]
Expand All @@ -628,7 +622,6 @@ pub mod flight_service_server {
#[doc = "Server streaming response type for the ListActions method."]
type ListActionsStream: futures_core::Stream<Item = Result<super::ActionType, tonic::Status>>
+ Send
+ Sync
+ 'static;
#[doc = ""]
#[doc = " A flight service exposes all of the available action types that it has"]
Expand Down Expand Up @@ -674,7 +667,7 @@ pub mod flight_service_server {
impl<T, B> tonic::codegen::Service<http::Request<B>> for FlightServiceServer<T>
where
T: FlightService,
B: Body + Send + Sync + 'static,
B: Body + Send + 'static,
B::Error: Into<StdError> + Send + 'static,
{
type Response = http::Response<tonic::body::BoxBody>;
Expand Down
3 changes: 2 additions & 1 deletion arrow-pyarrow-integration-testing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ repository = "https://github.com/apache/arrow-rs"
authors = ["Apache Arrow <[email protected]>"]
license = "Apache-2.0"
keywords = [ "arrow" ]
edition = "2018"
edition = "2021"
rust-version = "1.56"

[lib]
name = "arrow_pyarrow_integration_testing"
Expand Down
3 changes: 2 additions & 1 deletion arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ include = [
"src/**/*.rs",
"Cargo.toml",
]
edition = "2018"
edition = "2021"
rust-version = "1.56"

[lib]
name = "arrow"
Expand Down
45 changes: 30 additions & 15 deletions arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -891,10 +891,18 @@ mod tests {
assert!(binary_array.is_valid(i));
assert!(!binary_array.is_null(i));
}
}

#[test]
fn test_binary_array_with_offsets() {
let values: [u8; 12] = [
b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
];
let offsets: [i32; 4] = [0, 5, 5, 12];

// Test binary array with offset
let array_data = ArrayData::builder(DataType::Binary)
.len(4)
.len(2)
.offset(1)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
Expand Down Expand Up @@ -947,10 +955,18 @@ mod tests {
assert!(binary_array.is_valid(i));
assert!(!binary_array.is_null(i));
}
}

#[test]
fn test_large_binary_array_with_offsets() {
let values: [u8; 12] = [
b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
];
let offsets: [i64; 4] = [0, 5, 5, 12];

// Test binary array with offset
let array_data = ArrayData::builder(DataType::LargeBinary)
.len(4)
.len(2)
.offset(1)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
Expand Down Expand Up @@ -1138,7 +1154,7 @@ mod tests {
.build()
.unwrap();
let list_array = ListArray::from(array_data);
BinaryArray::from(list_array);
drop(BinaryArray::from(list_array));
}

#[test]
Expand Down Expand Up @@ -1196,28 +1212,27 @@ mod tests {

#[test]
#[should_panic(
expected = "FixedSizeBinaryArray can only be created from list array of u8 values \
(i.e. FixedSizeList<PrimitiveArray<u8>>)."
expected = "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays"
)]
fn test_fixed_size_binary_array_from_incorrect_list_array() {
let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
let values_data = ArrayData::builder(DataType::UInt32)
.len(12)
.add_buffer(Buffer::from_slice_ref(&values))
.add_child_data(ArrayData::builder(DataType::Boolean).build().unwrap())
.build()
.unwrap();

let array_data = ArrayData::builder(DataType::FixedSizeList(
Box::new(Field::new("item", DataType::Binary, false)),
4,
))
.len(3)
.add_child_data(values_data)
.build()
.unwrap();
let array_data = unsafe {
ArrayData::builder(DataType::FixedSizeList(
Box::new(Field::new("item", DataType::Binary, false)),
4,
))
.len(3)
.add_child_data(values_data)
.build_unchecked()
};
let list_array = FixedSizeListArray::from(array_data);
FixedSizeBinaryArray::from(list_array);
drop(FixedSizeBinaryArray::from(list_array));
}

#[test]
Expand Down
11 changes: 6 additions & 5 deletions arrow/src/array/array_boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,10 +332,11 @@ mod tests {
#[should_panic(expected = "BooleanArray data should contain a single buffer only \
(values buffer)")]
fn test_boolean_array_invalid_buffer_len() {
let data = ArrayData::builder(DataType::Boolean)
.len(5)
.build()
.unwrap();
BooleanArray::from(data);
let data = unsafe {
ArrayData::builder(DataType::Boolean)
.len(5)
.build_unchecked()
};
drop(BooleanArray::from(data));
}
}
Loading