Skip to content

Commit

Permalink
Use item as list root name, enable a bunch of tests
Browse files Browse the repository at this point in the history
  • Loading branch information
nicklan committed Aug 6, 2024
1 parent e0a2e5e commit 0b3029a
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 16 deletions.
2 changes: 1 addition & 1 deletion kernel/src/engine/arrow_conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use itertools::Itertools;
use crate::error::Error;
use crate::schema::{ArrayType, DataType, MapType, PrimitiveType, StructField, StructType};

pub(crate) const LIST_ARRAY_ROOT: &str = "item";
pub(crate) const LIST_ARRAY_ROOT: &str = "element";
pub(crate) const MAP_ROOT_DEFAULT: &str = "key_value";
pub(crate) const MAP_KEY_DEFAULT: &str = "key";
pub(crate) const MAP_VALUE_DEFAULT: &str = "value";
Expand Down
43 changes: 28 additions & 15 deletions kernel/tests/golden_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,26 @@ async fn read_expected(path: &Path) -> DeltaResult<RecordBatch> {

// copied from DAT
fn sort_record_batch(batch: RecordBatch) -> DeltaResult<RecordBatch> {
// Sort by all columns
if batch.num_rows() < 2 {
// 0 or 1 rows doesn't need sorting
return Ok(batch);
}
// Sort by as many columns as possible
let mut sort_columns = vec![];
for col in batch.columns() {
match col.data_type() {
DataType::Struct(_) | DataType::List(_) | DataType::Map(_, _) => {
// can't sort structs, lists, or maps
DataType::Struct(_) | DataType::Map(_, _) => {
// can't sort by structs or maps
}
DataType::List(list_field) => {
let list_dt = list_field.data_type();
if list_dt.is_primitive() {
// we can sort lists of primitives
sort_columns.push(SortColumn {
values: col.clone(),
options: None,
})
}
}
_ => sort_columns.push(SortColumn {
values: col.clone(),
Expand Down Expand Up @@ -129,10 +143,7 @@ fn assert_columns_match(actual: &[Arc<dyn Array>], expected: &[Arc<dyn Array>])
let expected = normalize_col(expected.clone());
// note that array equality includes data_type equality
// See: https://arrow.apache.org/rust/arrow_data/equal/fn.equal.html
assert_eq!(
&actual, &expected,
"Column data didn't match. Got {actual:?}, expected {expected:?}"
);
assert_eq!(&actual, &expected, "Column data didn't match.");
}
}

Expand Down Expand Up @@ -173,7 +184,7 @@ async fn latest_snapshot_test(
let result = concat_batches(&schema, &batches).unwrap();
let result = sort_record_batch(result).unwrap();
let expected = sort_record_batch(expected).unwrap();
assert_columns_match(expected.columns(), result.columns());
assert_columns_match(result.columns(), expected.columns());
assert_schema_fields_match(expected.schema().as_ref(), result.schema().as_ref());
assert!(
expected.num_rows() == result.num_rows(),
Expand Down Expand Up @@ -304,8 +315,8 @@ golden_test!(
);
golden_test!("checkpoint", checkpoint_test);
skip_test!("corrupted-last-checkpoint-kernel": "BUG: should fallback to old commits/checkpoint");
skip_test!("data-reader-array-complex-objects": "list field expected name item but got name element");
skip_test!("data-reader-array-primitives": "list field expected name item but got name element");
golden_test!("data-reader-array-complex-objects", latest_snapshot_test);
golden_test!("data-reader-array-primitives", latest_snapshot_test);
golden_test!("data-reader-date-types-America", latest_snapshot_test);
golden_test!("data-reader-date-types-Asia", latest_snapshot_test);
golden_test!("data-reader-date-types-Etc", latest_snapshot_test);
Expand All @@ -316,9 +327,11 @@ golden_test!("data-reader-date-types-utc", latest_snapshot_test);
golden_test!("data-reader-escaped-chars", latest_snapshot_test);
skip_test!("data-reader-map": "map field named 'entries' vs 'key_value'");
golden_test!("data-reader-nested-struct", latest_snapshot_test);
skip_test!("data-reader-nullable-field-invalid-schema-key":
"list field expected name item but got name element");
skip_test!("data-reader-partition-values": "list field expected name item but got name element");
golden_test!(
"data-reader-nullable-field-invalid-schema-key",
latest_snapshot_test
);
skip_test!("data-reader-partition-values": "Golden data needs to have 2021-09-08T11:11:11+00:00 as expected value for as_timestamp col");
golden_test!("data-reader-primitives", latest_snapshot_test);
golden_test!("data-reader-timestamp_ntz", latest_snapshot_test);
skip_test!("data-reader-timestamp_ntz-id-mode": "id column mapping mode not supported");
Expand Down Expand Up @@ -366,8 +379,8 @@ golden_test!("multi-part-checkpoint", latest_snapshot_test);
golden_test!("only-checkpoint-files", latest_snapshot_test);

// TODO some of the parquet tests use projections
skip_test!("parquet-all-types": "list field expected name item but got name element");
skip_test!("parquet-all-types-legacy-format": "list field expected name item but got name element");
skip_test!("parquet-all-types": "schemas disagree about nullability, need to figure out which is correct and adjust");
skip_test!("parquet-all-types-legacy-format": "legacy parquet has name `array`, we should have adjusted this to `element`");
golden_test!("parquet-decimal-dictionaries", latest_snapshot_test);
golden_test!("parquet-decimal-dictionaries-v1", latest_snapshot_test);
golden_test!("parquet-decimal-dictionaries-v2", latest_snapshot_test);
Expand Down

0 comments on commit 0b3029a

Please sign in to comment.