Skip to content

Commit

Permalink
[Parquet] Read list field correctly (#234)
Browse files Browse the repository at this point in the history
  • Loading branch information
nevi-me authored Apr 29, 2021
1 parent ed00e4d commit 2121150
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
13 changes: 11 additions & 2 deletions parquet/src/arrow/array_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1303,16 +1303,25 @@ impl<'a> TypeVisitor<Option<Box<dyn ArrayReader>>, &'a ArrayReaderBuilderContext
item_type
))),
_ => {
// a list is a group type with a single child. The list child's
// name comes from the child's field name.
let mut list_child = list_type.get_fields().first().ok_or(ArrowError(
"List GroupType should have a field".to_string(),
))?;
// if the child's name is "list" and it has a child, then use this child
if list_child.name() == "list" && !list_child.get_fields().is_empty() {
list_child = list_child.get_fields().first().unwrap();
}
let arrow_type = self
.arrow_schema
.field_with_name(list_type.name())
.ok()
.map(|f| f.data_type().to_owned())
.unwrap_or_else(|| {
ArrowType::List(Box::new(Field::new(
list_type.name(),
list_child.name(),
item_reader_type.clone(),
list_type.is_optional(),
list_child.is_optional(),
)))
});

Expand Down
4 changes: 2 additions & 2 deletions parquet/src/arrow/arrow_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ impl<W: 'static + ParquetWriter> ArrowWriter<W> {
let mut row_group_writer = self.writer.next_row_group()?;
for (array, field) in batch.columns().iter().zip(batch.schema().fields()) {
let mut levels = batch_level.calculate_array_levels(array, field);
// Reverse levels as we pop() them when writing arrays
levels.reverse();
write_leaves(&mut row_group_writer, array, &mut levels)?;
}

Expand Down Expand Up @@ -741,7 +743,6 @@ mod tests {
}

#[test]
#[ignore = "See ARROW-11294, data is correct but list field name is incorrect"]
fn arrow_writer_complex() {
// define schema
let struct_field_d = Field::new("d", DataType::Float64, true);
Expand Down Expand Up @@ -934,7 +935,6 @@ mod tests {
let actual_data = actual_batch.column(i).data();

assert_eq!(expected_data, actual_data);
// assert_eq!(expected_data, actual_data, "L: {:#?}\nR: {:#?}", expected_data, actual_data);
}
}

Expand Down

0 comments on commit 2121150

Please sign in to comment.