Skip to content

Commit

Permalink
Merge branch 'apache:main' into dev0
Browse files Browse the repository at this point in the history
  • Loading branch information
comphead authored Nov 13, 2023
2 parents d0606c1 + 4fb4b21 commit 138e977
Show file tree
Hide file tree
Showing 392 changed files with 28,693 additions and 14,999 deletions.
2 changes: 1 addition & 1 deletion .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ If there are user-facing changes then we may require documentation to be updated

<!--
If there are any breaking changes to public APIs, please add the `api change` label.
-->
-->
4 changes: 2 additions & 2 deletions .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v3
- uses: actions/setup-node@v4
with:
node-version: "14"
node-version: "20"
- name: Prettier check
run: |
# if you encounter error, rerun the command below and commit the changes
Expand Down
23 changes: 5 additions & 18 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,19 +139,7 @@ jobs:
# test datafusion-sql examples
cargo run --example sql
# test datafusion-examples
cargo run --example avro_sql --features=datafusion/avro
cargo run --example csv_sql
cargo run --example custom_datasource
cargo run --example dataframe
cargo run --example dataframe_in_memory
cargo run --example deserialize_to_struct
cargo run --example expr_api
cargo run --example parquet_sql
cargo run --example parquet_sql_multiple_files
cargo run --example memtable
cargo run --example rewrite_expr
cargo run --example simple_udf
cargo run --example simple_udaf
ci/scripts/rust_example.sh
- name: Verify Working Directory Clean
run: git diff --exit-code

Expand Down Expand Up @@ -505,12 +493,11 @@ jobs:

- name: Check Cargo.toml formatting
run: |
# if you encounter error, try rerun the command below, finally run 'git diff' to
# check which Cargo.toml introduces formatting violation
# if you encounter an error, try running 'cargo tomlfmt -p path/to/Cargo.toml' to fix the formatting automatically.
# If the error still persists, you need to manually edit the Cargo.toml file, which introduces formatting violation.
#
# ignore ./Cargo.toml because putting workspaces in multi-line lists make it easy to read
ci/scripts/rust_toml_fmt.sh
git diff --exit-code
config-docs-check:
name: check configs.md is up-to-date
Expand All @@ -526,9 +513,9 @@ jobs:
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- uses: actions/setup-node@v3
- uses: actions/setup-node@v4
with:
node-version: "14"
node-version: "20"
- name: Check if configs.md has been modified
run: |
# If you encounter an error, run './dev/update_config_docs.sh' and commit
Expand Down
51 changes: 43 additions & 8 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ members = [
"datafusion/substrait",
"datafusion/wasmtest",
"datafusion-examples",
"docs",
"test-utils",
"benchmarks",
]
Expand All @@ -45,17 +46,50 @@ license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/apache/arrow-datafusion"
rust-version = "1.70"
version = "32.0.0"
version = "33.0.0"

[workspace.dependencies]
arrow = { version = "47.0.0", features = ["prettyprint"] }
arrow-array = { version = "47.0.0", default-features = false, features = ["chrono-tz"] }
arrow-buffer = { version = "47.0.0", default-features = false }
arrow-flight = { version = "47.0.0", features = ["flight-sql-experimental"] }
arrow-schema = { version = "47.0.0", default-features = false }
parquet = { version = "47.0.0", features = ["arrow", "async", "object_store"] }
sqlparser = { version = "0.38.0", features = ["visitor"] }
arrow = { version = "~48.0.1", features = ["prettyprint"] }
arrow-array = { version = "~48.0.1", default-features = false, features = ["chrono-tz"] }
arrow-buffer = { version = "~48.0.1", default-features = false }
arrow-flight = { version = "~48.0.1", features = ["flight-sql-experimental"] }
arrow-ord = { version = "~48.0.1", default-features = false }
arrow-schema = { version = "~48.0.1", default-features = false }
async-trait = "0.1.73"
bigdecimal = "0.4.1"
bytes = "1.4"
ctor = "0.2.0"
datafusion = { path = "datafusion/core" }
datafusion-common = { path = "datafusion/common" }
datafusion-expr = { path = "datafusion/expr" }
datafusion-sql = { path = "datafusion/sql" }
datafusion-optimizer = { path = "datafusion/optimizer" }
datafusion-physical-expr = { path = "datafusion/physical-expr" }
datafusion-physical-plan = { path = "datafusion/physical-plan" }
datafusion-execution = { path = "datafusion/execution" }
datafusion-proto = { path = "datafusion/proto" }
datafusion-sqllogictest = { path = "datafusion/sqllogictest" }
datafusion-substrait = { path = "datafusion/substrait" }
dashmap = "5.4.0"
doc-comment = "0.3"
env_logger = "0.10"
futures = "0.3"
half = "2.2.1"
indexmap = "2.0.0"
itertools = "0.11"
log = "^0.4"
num_cpus = "1.13.0"
object_store = { version = "0.7.0", default-features = false }
parking_lot = "0.12"
parquet = { version = "~48.0.1", default-features = false, features = ["arrow", "async", "object_store"] }
rand = "0.8"
rstest = "0.18.0"
serde_json = "1"
sqlparser = { version = "0.39.0", features = ["visitor"] }
tempfile = "3"
thiserror = "1.0.44"
chrono = { version = "0.4.31", default-features = false }
url = "2.2"

[profile.release]
codegen-units = 1
Expand All @@ -74,3 +108,4 @@ opt-level = 3
overflow-checks = false
panic = 'unwind'
rpath = false

20 changes: 14 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,25 @@ Here are links to some important information
DataFusion is great for building projects such as domain specific query engines, new database platforms and data pipelines, query languages and more.
It lets you start quickly from a fully working engine, and then customize those features specific to your use. [Click Here](https://arrow.apache.org/datafusion/user-guide/introduction.html#known-users) to see a list known users.

## Contributing to DataFusion

Please see the [developer’s guide] for contributing and [communication] for getting in touch with us.

[developer’s guide]: https://arrow.apache.org/datafusion/contributor-guide/index.html#developer-s-guide
[communication]: https://arrow.apache.org/datafusion/contributor-guide/communication.html

## Crate features

This crate has several [features] which can be specified in your `Cargo.toml`.

[features]: https://doc.rust-lang.org/cargo/reference/features.html

Default features:

- `compression`: reading files compressed with `xz2`, `bzip2`, `flate2`, and `zstd`
- `crypto_expressions`: cryptographic functions such as `md5` and `sha256`
- `encoding_expressions`: `encode` and `decode` functions
- `parquet`: support for reading the [Apache Parquet] format
- `regex_expressions`: regular expression functions, such as `regexp_match`
- `unicode_expressions`: Include unicode aware functions such as `character_length`

Expand All @@ -55,16 +67,12 @@ Optional features:
- `avro`: support for reading the [Apache Avro] format
- `backtrace`: include backtrace information in error messages
- `pyarrow`: conversions between PyArrow and DataFusion types
- `serde`: enable arrow-schema's `serde` feature
- `simd`: enable arrow-rs's manual `SIMD` kernels (requires Rust `nightly`)

[apache avro]: https://avro.apache.org/
[apache parquet]: https://parquet.apache.org/

## Rust Version Compatibility

This crate is tested with the latest stable version of Rust. We do not currently test against other, older versions of the Rust compiler.

## Contributing to DataFusion

The [developer’s guide] contains information on how to contribute.

[developer’s guide]: https://arrow.apache.org/datafusion/contributor-guide/index.html#developer-s-guide
20 changes: 10 additions & 10 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
[package]
name = "datafusion-benchmarks"
description = "DataFusion Benchmarks"
version = "32.0.0"
version = "33.0.0"
edition = { workspace = true }
authors = ["Apache Arrow <[email protected]>"]
homepage = "https://github.com/apache/arrow-datafusion"
Expand All @@ -34,20 +34,20 @@ snmalloc = ["snmalloc-rs"]

[dependencies]
arrow = { workspace = true }
datafusion = { path = "../datafusion/core", version = "32.0.0" }
datafusion-common = { path = "../datafusion/common", version = "32.0.0" }
env_logger = "0.10"
futures = "0.3"
log = "^0.4"
datafusion = { path = "../datafusion/core", version = "33.0.0" }
datafusion-common = { path = "../datafusion/common", version = "33.0.0" }
env_logger = { workspace = true }
futures = { workspace = true }
log = { workspace = true }
mimalloc = { version = "0.1", optional = true, default-features = false }
num_cpus = "1.13.0"
parquet = { workspace = true }
num_cpus = { workspace = true }
parquet = { workspace = true, default-features = true }
serde = { version = "1.0.136", features = ["derive"] }
serde_json = "1.0.78"
serde_json = { workspace = true }
snmalloc-rs = { version = "0.3", optional = true }
structopt = { version = "0.3", default-features = false }
test-utils = { path = "../test-utils/", version = "0.1.0" }
tokio = { version = "^1.0", features = ["macros", "rt", "rt-multi-thread", "parking_lot"] }

[dev-dependencies]
datafusion-proto = { path = "../datafusion/proto", version = "32.0.0" }
datafusion-proto = { path = "../datafusion/proto", version = "33.0.0" }
35 changes: 35 additions & 0 deletions ci/scripts/rust_example.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -ex
cd datafusion-examples/examples/
cargo fmt --all -- --check

files=$(ls .)
for filename in $files
do
example_name=`basename $filename ".rs"`
# Skip tests that rely on external storage and flight
# todo: Currently, catalog.rs is placed in the external-dependence directory because there is a problem parsing
# the parquet file of the external parquet-test that it currently relies on.
# We will wait for this issue[https://github.com/apache/arrow-datafusion/issues/8041] to be resolved.
if [ ! -d $filename ]; then
cargo run --example $example_name
fi
done
8 changes: 7 additions & 1 deletion ci/scripts/rust_toml_fmt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,11 @@
# specific language governing permissions and limitations
# under the License.

# Run cargo-tomlfmt with flag `-d` in dry run to check formatting
# without overwritng the file. If any error occur, you may want to
# rerun 'cargo tomlfmt -p path/to/Cargo.toml' without '-d' to fix
# the formatting automatically.
set -ex
find . -mindepth 2 -name 'Cargo.toml' -exec cargo tomlfmt -p {} \;
for toml in $(find . -mindepth 2 -name 'Cargo.toml'); do
cargo tomlfmt -d -p $toml
done
Loading

0 comments on commit 138e977

Please sign in to comment.