Skip to content

Commit

Permalink
Merge branch 'main' into find_files_pub
Browse files Browse the repository at this point in the history
  • Loading branch information
yjshen authored Jul 25, 2023
2 parents d585838 + f4a4341 commit 93d5a63
Show file tree
Hide file tree
Showing 19 changed files with 2,094 additions and 2,164 deletions.
14 changes: 9 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,10 @@ jobs:
runs-on: ${{ matrix.os }}
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: -C debuginfo=1
RUSTFLAGS: -C debuginfo=line-tables-only
# Disable incremental builds by cargo for CI which should save disk space
# and hopefully avoid final link "No space left on device"
CARGO_INCREMENTAL: 0

steps:
- uses: actions/checkout@v3
Expand All @@ -89,9 +91,10 @@ jobs:
name: Integration Tests
runs-on: ubuntu-latest
env:
CARGO_INCREMENTAL: 0
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
# <https://doc.rust-lang.org/cargo/reference/profiles.html>
RUSTFLAGS: "-C debuginfo=line-tables-only"
# https://github.com/rust-lang/cargo/issues/10280
CARGO_NET_GIT_FETCH_WITH_CLI: "true"
RUST_BACKTRACE: "1"
Expand Down Expand Up @@ -145,7 +148,8 @@ jobs:
parquet2_test:
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=0"
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0

steps:
- uses: actions/checkout@v3
Expand Down
15 changes: 10 additions & 5 deletions .github/workflows/python_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ jobs:
name: Python Build (Python 3.7 PyArrow 8.0.0)
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=0"
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0

# use the same environment we have for python release
container: quay.io/pypa/manylinux2014_x86_64:2022-09-24-4f086d0
Expand Down Expand Up @@ -72,7 +73,7 @@ jobs:
# Install minimum PyArrow version
pip install -e .[pandas,devel] pyarrow==8.0.0
env:
RUSTFLAGS: "-C debuginfo=0"
RUSTFLAGS: "-C debuginfo=line-tables-only"

- name: Run tests
run: |
Expand All @@ -88,6 +89,7 @@ jobs:
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=1"
CARGO_INCREMENTAL: 0

steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -139,7 +141,8 @@ jobs:
name: Python Benchmark
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=0"
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0

steps:
- uses: actions/checkout@v2
Expand Down Expand Up @@ -188,7 +191,8 @@ jobs:
name: PySpark Integration Tests
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=0"
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0

steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -227,7 +231,8 @@ jobs:
name: Running with Python ${{ matrix.python-version }}
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=0"
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0

strategy:
matrix:
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## [rust-v0.13.1](https://github.com/delta-io/delta-rs/tree/rust-v0.13.1) (2023-07-18)

**Fixed bugs:**

* Revert premature merge of an attempted fix for binary column statistics [\#1544](https://github.com/delta-io/delta-rs/pull/1544)

## [rust-v0.13.0](https://github.com/delta-io/delta-rs/tree/rust-v0.13.0) (2023-07-15)

[Full Changelog](https://github.com/delta-io/delta-rs/compare/rust-v0.12.0...rust-v0.13.0)
Expand Down
6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,9 @@ exclude = ["proofs", "delta-inspect"]
[profile.release-with-debug]
inherits = "release"
debug = true

# Reducing the debuginfo for the test profile in order to trim the disk and RAM
# usage during development
# <https://github.com/delta-io/delta-rs/issues/1550?
[profile.test]
debug = "line-tables-only"
2 changes: 1 addition & 1 deletion python/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "deltalake-python"
version = "0.10.0"
version = "0.10.1"
authors = ["Qingping Hou <[email protected]>"]
homepage = "https://github.com/delta-io/delta-rs"
license = "Apache-2.0"
Expand Down
4 changes: 1 addition & 3 deletions rust/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "deltalake"
version = "0.13.0"
version = "0.13.1"
rust-version = "1.64"
authors = ["Qingping Hou <[email protected]>"]
homepage = "https://github.com/delta-io/delta.rs"
Expand Down Expand Up @@ -91,8 +91,6 @@ tempdir = { version = "0", optional = true }
dynamodb_lock = { version = "0", default-features = false, optional = true }

[dev-dependencies]
anyhow = "1"
criterion = "0"
dotenvy = "0"
maplit = "1"
pretty_assertions = "1.2.1"
Expand Down
6 changes: 4 additions & 2 deletions rust/examples/recordbatch-writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ use std::sync::Arc;
* example code for writing to Delta tables
*/
#[tokio::main]
async fn main() -> Result<(), anyhow::Error> {
async fn main() -> Result<(), DeltaTableError> {
info!("Logger initialized");

let table_uri = std::env::var("TABLE_URI")?;
let table_uri = std::env::var("TABLE_URI").map_err(|e| DeltaTableError::GenericError {
source: Box::new(e),
})?;
info!("Using the location of: {:?}", table_uri);

let table_path = Path::from(table_uri.as_ref());
Expand Down
29 changes: 1 addition & 28 deletions rust/src/action/checkpoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,33 +283,6 @@ pub async fn cleanup_expired_logs_for(
}
}

/// Filter binary from the schema so that it isn't serialized into JSON,
/// as arrow currently does not support this.
fn filter_binary(schema: &Schema) -> Schema {
Schema::new(
schema
.get_fields()
.iter()
.flat_map(|f| match f.get_type() {
SchemaDataType::primitive(p) => {
if p != "binary" {
Some(f.clone())
} else {
None
}
}
SchemaDataType::r#struct(s) => Some(SchemaField::new(
f.get_name().to_string(),
SchemaDataType::r#struct(filter_binary(&Schema::new(s.get_fields().clone()))),
f.is_nullable(),
f.get_metadata().clone(),
)),
_ => Some(f.clone()),
})
.collect::<Vec<_>>(),
)
}

fn parquet_bytes_from_state(
state: &DeltaTableState,
) -> Result<(CheckPoint, bytes::Bytes), ProtocolError> {
Expand Down Expand Up @@ -384,7 +357,7 @@ fn parquet_bytes_from_state(

// Create the arrow schema that represents the Checkpoint parquet file.
let arrow_schema = delta_log_schema_for_table(
<ArrowSchema as TryFrom<&Schema>>::try_from(&filter_binary(&current_metadata.schema))?,
<ArrowSchema as TryFrom<&Schema>>::try_from(&current_metadata.schema)?,
current_metadata.partition_columns.as_slice(),
use_extended_remove_schema,
);
Expand Down
Loading

0 comments on commit 93d5a63

Please sign in to comment.