Skip to content

Commit

Permalink
test: run some more dat tests
Browse files Browse the repository at this point in the history
Signed-off-by: Robert Pack <[email protected]>
  • Loading branch information
roeap committed Jan 16, 2025
1 parent cef0acd commit 4b78477
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ runs:
- name: load DAT
shell: bash
run: |
rm -rf {{ inputs.target-directory }}
rm -rf ${{ inputs.target-directory }}
curl -OL https://github.com/delta-incubator/dat/releases/download/v${{ inputs.version }}/deltalake-dat-v${{ inputs.version }}.tar.gz
mkdir -p {{ inputs.target-directory }}
tar --no-same-permissions -xzf deltalake-dat-v${{ inputs.version }}.tar.gz --directory {{ inputs.target-directory }}
mkdir -p ${{ inputs.target-directory }}
tar --no-same-permissions -xzf deltalake-dat-v${{ inputs.version }}.tar.gz --directory ${{ inputs.target-directory }}
rm deltalake-dat-v${{ inputs.version }}.tar.gz
17 changes: 11 additions & 6 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: '1.81'
toolchain: "1.81"
override: true

- name: Format
Expand All @@ -42,7 +42,7 @@ jobs:
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: '1.81'
toolchain: "1.81"
override: true

- name: build and lint with clippy
Expand Down Expand Up @@ -79,9 +79,12 @@ jobs:
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: '1.81'
toolchain: "1.81"
override: true

- name: Load DAT data
uses: ./.github/actions/load-dat

- name: Run tests
run: cargo test --verbose --features ${{ env.DEFAULT_FEATURES }}

Expand Down Expand Up @@ -114,7 +117,7 @@ jobs:
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: '1.81'
toolchain: "1.81"
override: true

# Install Java and Hadoop for HDFS integration tests
Expand All @@ -129,6 +132,9 @@ jobs:
tar -xf hadoop-3.4.0.tar.gz -C $GITHUB_WORKSPACE
echo "$GITHUB_WORKSPACE/hadoop-3.4.0/bin" >> $GITHUB_PATH
- name: Load DAT data
uses: ./.github/actions/load-dat

- name: Start emulated services
run: docker compose up -d

Expand Down Expand Up @@ -160,7 +166,7 @@ jobs:
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: '1.81'
toolchain: "1.81"
override: true

- name: Download Lakectl
Expand All @@ -175,4 +181,3 @@ jobs:
- name: Run tests with rustls (default)
run: |
cargo test --features integration_test_lakefs,lakefs,datafusion
14 changes: 7 additions & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@ debug = true
debug = "line-tables-only"

[workspace.dependencies]
#delta_kernel = { version = "=0.6.0", features = ["default-engine"] }
delta_kernel = { path = "../delta-kernel-rs/kernel", features = [
"default-engine",
"developer-visibility",
] }
# delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "fcc43b50dafdc5e6b84c206492bbde8ed1115529", features = [
# delta_kernel = { version = "=0.6.0", features = ["default-engine"] }
# delta_kernel = { path = "../delta-kernel-rs/kernel", features = [
# "default-engine",
# "developer-visibility",
# ] }
delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "caeb70ab78e4d5f3b56b5105fd3587c1046d1e1b", features = [
"default-engine",
"developer-visibility",
] }

# arrow
arrow = { version = "53" }
Expand All @@ -48,7 +48,7 @@ arrow-ord = { version = "53" }
arrow-row = { version = "53" }
arrow-schema = { version = "53" }
arrow-select = { version = "53" }
object_store = { version = "0.11.2" , features = ["cloud"]}
object_store = { version = "0.11.2", features = ["cloud"] }
parquet = { version = "53" }

# datafusion
Expand Down
6 changes: 5 additions & 1 deletion crates/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ datafusion-functions-aggregate = { workspace = true, optional = true }
# serde
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
strum = { workspace = true}
strum = { workspace = true }

# "stdlib"
bytes = { workspace = true }
Expand Down Expand Up @@ -132,3 +132,7 @@ datafusion = [
datafusion-ext = ["datafusion"]
json = ["parquet/json"]
python = ["arrow/pyarrow"]

[[test]]
name = "dat"
harness = false
99 changes: 99 additions & 0 deletions crates/core/tests/dat.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
use std::path::Path;
use std::sync::Arc;

use delta_kernel::Table;
use deltalake_core::kernel::snapshot_next::{LazySnapshot, Snapshot};
use deltalake_test::acceptance::read_dat_case;

static SKIPPED_TESTS: &[&str; 1] = &["iceberg_compat_v1"];

fn reader_test_lazy(path: &Path) -> datatest_stable::Result<()> {
let root_dir = format!(
"{}/{}",
env!["CARGO_MANIFEST_DIR"],
path.parent().unwrap().to_str().unwrap()
);
for skipped in SKIPPED_TESTS {
if root_dir.ends_with(skipped) {
println!("Skipping test: {}", skipped);
return Ok(());
}
}

tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()?
.block_on(async {
let case = read_dat_case(root_dir).unwrap();

let table = Table::try_from_uri(case.table_root().unwrap()).expect("table");
let snapshot = LazySnapshot::try_new(
table,
Arc::new(object_store::local::LocalFileSystem::default()),
None,
)
.await
.unwrap();

let table_info = case.table_summary().expect("load summary");
assert_eq!(snapshot.version(), table_info.version);
assert_eq!(
(
snapshot.protocol().min_reader_version(),
snapshot.protocol().min_writer_version()
),
(table_info.min_reader_version, table_info.min_writer_version)
);
});
Ok(())
}

fn reader_test_eager(path: &Path) -> datatest_stable::Result<()> {
let root_dir = format!(
"{}/{}",
env!["CARGO_MANIFEST_DIR"],
path.parent().unwrap().to_str().unwrap()
);
for skipped in SKIPPED_TESTS {
if root_dir.ends_with(skipped) {
println!("Skipping test: {}", skipped);
return Ok(());
}
}

tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()?
.block_on(async {
let case = read_dat_case(root_dir).unwrap();

let table = Table::try_from_uri(case.table_root().unwrap()).expect("table");
let snapshot = LazySnapshot::try_new(
table,
Arc::new(object_store::local::LocalFileSystem::default()),
None,
)
.await
.unwrap();

let table_info = case.table_summary().expect("load summary");
assert_eq!(snapshot.version(), table_info.version);
assert_eq!(
(
snapshot.protocol().min_reader_version(),
snapshot.protocol().min_writer_version()
),
(table_info.min_reader_version, table_info.min_writer_version)
);
});
Ok(())
}

datatest_stable::harness!(
reader_test_lazy,
"../../dat/out/reader_tests/generated/",
r"test_case_info\.json",
reader_test_eager,
"../../dat/out/reader_tests/generated/",
r"test_case_info\.json"
);

0 comments on commit 4b78477

Please sign in to comment.