Skip to content

Commit

Permalink
Add external variant to ParquetError (#3285) (#3574)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold authored Jan 22, 2023
1 parent 24e5dae commit 892a803
Show file tree
Hide file tree
Showing 9 changed files with 236 additions and 325 deletions.
4 changes: 2 additions & 2 deletions parquet/src/arrow/arrow_writer/levels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,7 @@ mod tests {
.unwrap();

let struct_null_level =
calculate_array_levels(batch.column(0), batch.schema().field(0));
calculate_array_levels(batch.column(0), batch.schema().field(0)).unwrap();

// create second batch
// define schema
Expand All @@ -1108,7 +1108,7 @@ mod tests {
.unwrap();

let struct_non_null_level =
calculate_array_levels(batch.column(0), batch.schema().field(0));
calculate_array_levels(batch.column(0), batch.schema().field(0)).unwrap();

// The 2 levels should not be the same
if struct_non_null_level == struct_null_level {
Expand Down
26 changes: 13 additions & 13 deletions parquet/src/column/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,18 @@
//! repetition levels and read them to verify write/read correctness.
//!
//! ```rust,no_run
//! use std::{fs, path::Path, sync::Arc};
//!
//! use parquet::{
//! column::{reader::ColumnReader, writer::ColumnWriter},
//! data_type::Int32Type,
//! file::{
//! properties::WriterProperties,
//! reader::{FileReader, SerializedFileReader},
//! writer::SerializedFileWriter,
//! },
//! schema::parser::parse_message_type,
//! };
//! # use std::{fs, path::Path, sync::Arc};
//! #
//! # use parquet::{
//! # column::{reader::ColumnReader, writer::ColumnWriter},
//! # data_type::Int32Type,
//! # file::{
//! # properties::WriterProperties,
//! # reader::{FileReader, SerializedFileReader},
//! # writer::SerializedFileWriter,
//! # },
//! # schema::parser::parse_message_type,
//! # };
//!
//! let path = Path::new("/path/to/column_sample.parquet");
//!
Expand Down Expand Up @@ -111,7 +111,7 @@
//! }
//! }
//!
//! assert_eq!(res, Ok((3, 5)));
//! assert_eq!(res.unwrap(), (3, 5));
//! assert_eq!(values, vec![1, 2, 3, 0, 0, 0, 0, 0]);
//! assert_eq!(def_levels, vec![3, 3, 3, 2, 2, 0, 0, 0]);
//! assert_eq!(rep_levels, vec![0, 1, 0, 1, 1, 0, 0, 0]);
Expand Down
7 changes: 4 additions & 3 deletions parquet/src/encodings/decoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1946,11 +1946,12 @@ mod tests {
let decoder = get_decoder::<T>(descr, encoding);
match err {
Some(parquet_error) => {
assert!(decoder.is_err());
assert_eq!(decoder.err().unwrap(), parquet_error);
assert_eq!(
decoder.err().unwrap().to_string(),
parquet_error.to_string()
);
}
None => {
assert!(decoder.is_ok());
assert_eq!(decoder.unwrap().encoding(), encoding);
}
}
Expand Down
11 changes: 5 additions & 6 deletions parquet/src/encodings/encoding/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1081,13 +1081,12 @@ mod tests {
let encoder = get_encoder::<T>(encoding);
match err {
Some(parquet_error) => {
assert!(encoder.is_err());
assert_eq!(encoder.err().unwrap(), parquet_error);
}
None => {
assert!(encoder.is_ok());
assert_eq!(encoder.unwrap().encoding(), encoding);
assert_eq!(
encoder.err().unwrap().to_string(),
parquet_error.to_string()
)
}
None => assert_eq!(encoder.unwrap().encoding(), encoding),
}
}

Expand Down
39 changes: 23 additions & 16 deletions parquet/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@

//! Common Parquet errors and macros.
use std::error::Error;
use std::{cell, io, result, str};

#[cfg(feature = "arrow")]
use arrow_schema::ArrowError;

#[derive(Debug, PartialEq, Clone, Eq)]
#[derive(Debug)]
pub enum ParquetError {
/// General Parquet error.
/// Returned when code violates normal workflow of working with Parquet files.
Expand All @@ -39,66 +40,72 @@ pub enum ParquetError {
/// Returned when reading into arrow or writing from arrow.
ArrowError(String),
IndexOutOfBound(usize, usize),
/// An external error variant
External(Box<dyn Error + Send + Sync>),
}

impl std::fmt::Display for ParquetError {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
match *self {
ParquetError::General(ref message) => {
match &self {
ParquetError::General(message) => {
write!(fmt, "Parquet error: {}", message)
}
ParquetError::NYI(ref message) => write!(fmt, "NYI: {}", message),
ParquetError::EOF(ref message) => write!(fmt, "EOF: {}", message),
ParquetError::NYI(message) => write!(fmt, "NYI: {}", message),
ParquetError::EOF(message) => write!(fmt, "EOF: {}", message),
#[cfg(feature = "arrow")]
ParquetError::ArrowError(ref message) => write!(fmt, "Arrow: {}", message),
ParquetError::IndexOutOfBound(ref index, ref bound) => {
ParquetError::ArrowError(message) => write!(fmt, "Arrow: {}", message),
ParquetError::IndexOutOfBound(index, ref bound) => {
write!(fmt, "Index {} out of bound: {}", index, bound)
}
ParquetError::External(e) => write!(fmt, "External: {}", e),
}
}
}

impl std::error::Error for ParquetError {
fn cause(&self) -> Option<&dyn ::std::error::Error> {
None
impl Error for ParquetError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
ParquetError::External(e) => Some(e.as_ref()),
_ => None,
}
}
}

impl From<io::Error> for ParquetError {
fn from(e: io::Error) -> ParquetError {
ParquetError::General(format!("underlying IO error: {}", e))
ParquetError::External(Box::new(e))
}
}

#[cfg(any(feature = "snap", test))]
impl From<snap::Error> for ParquetError {
fn from(e: snap::Error) -> ParquetError {
ParquetError::General(format!("underlying snap error: {}", e))
ParquetError::External(Box::new(e))
}
}

impl From<thrift::Error> for ParquetError {
fn from(e: thrift::Error) -> ParquetError {
ParquetError::General(format!("underlying Thrift error: {}", e))
ParquetError::External(Box::new(e))
}
}

impl From<cell::BorrowMutError> for ParquetError {
fn from(e: cell::BorrowMutError) -> ParquetError {
ParquetError::General(format!("underlying borrow error: {}", e))
ParquetError::External(Box::new(e))
}
}

impl From<str::Utf8Error> for ParquetError {
fn from(e: str::Utf8Error) -> ParquetError {
ParquetError::General(format!("underlying utf8 error: {}", e))
ParquetError::External(Box::new(e))
}
}

#[cfg(feature = "arrow")]
impl From<ArrowError> for ParquetError {
fn from(e: ArrowError) -> ParquetError {
ParquetError::ArrowError(format!("underlying Arrow error: {}", e))
ParquetError::External(Box::new(e))
}
}

Expand Down
24 changes: 8 additions & 16 deletions parquet/src/file/footer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,47 +156,39 @@ mod tests {
fn test_parse_metadata_size_smaller_than_footer() {
let test_file = tempfile::tempfile().unwrap();
let reader_result = parse_metadata(&test_file);
assert!(reader_result.is_err());
assert_eq!(
reader_result.err().unwrap(),
general_err!("Invalid Parquet file. Size is smaller than footer")
reader_result.unwrap_err().to_string(),
"Parquet error: Invalid Parquet file. Size is smaller than footer"
);
}

#[test]
fn test_parse_metadata_corrupt_footer() {
let data = Bytes::from(vec![1, 2, 3, 4, 5, 6, 7, 8]);
let reader_result = parse_metadata(&data);
assert!(reader_result.is_err());
assert_eq!(
reader_result.err().unwrap(),
general_err!("Invalid Parquet file. Corrupt footer")
reader_result.unwrap_err().to_string(),
"Parquet error: Invalid Parquet file. Corrupt footer"
);
}

#[test]
fn test_parse_metadata_invalid_length() {
let test_file = Bytes::from(vec![0, 0, 0, 255, b'P', b'A', b'R', b'1']);
let reader_result = parse_metadata(&test_file);
assert!(reader_result.is_err());
assert_eq!(
reader_result.err().unwrap(),
general_err!(
"Invalid Parquet file. Metadata length is less than zero (-16777216)"
)
reader_result.unwrap_err().to_string(),
"Parquet error: Invalid Parquet file. Metadata length is less than zero (-16777216)"
);
}

#[test]
fn test_parse_metadata_invalid_start() {
let test_file = Bytes::from(vec![255, 0, 0, 0, b'P', b'A', b'R', b'1']);
let reader_result = parse_metadata(&test_file);
assert!(reader_result.is_err());
assert_eq!(
reader_result.err().unwrap(),
general_err!(
"Invalid Parquet file. Reported metadata length of 255 + 8 byte footer, but file is only 8 bytes"
)
reader_result.unwrap_err().to_string(),
"Parquet error: Invalid Parquet file. Reported metadata length of 255 + 8 byte footer, but file is only 8 bytes"
);
}

Expand Down
24 changes: 12 additions & 12 deletions parquet/src/record/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1536,16 +1536,16 @@ mod tests {
]);

assert_eq!(
ParquetError::General("Cannot access Group as Float".to_string()),
row.get_float(0).unwrap_err()
row.get_float(0).unwrap_err().to_string(),
"Parquet error: Cannot access Group as Float"
);
assert_eq!(
ParquetError::General("Cannot access ListInternal as Float".to_string()),
row.get_float(1).unwrap_err()
row.get_float(1).unwrap_err().to_string(),
"Parquet error: Cannot access ListInternal as Float"
);
assert_eq!(
ParquetError::General("Cannot access MapInternal as Float".to_string()),
row.get_float(2).unwrap_err()
row.get_float(2).unwrap_err().to_string(),
"Parquet error: Cannot access MapInternal as Float",
);
}

Expand Down Expand Up @@ -1680,8 +1680,8 @@ mod tests {
("Y".to_string(), Field::Int(2)),
]))]);
assert_eq!(
general_err!("Cannot access Group as Float".to_string()),
list.get_float(0).unwrap_err()
list.get_float(0).unwrap_err().to_string(),
"Parquet error: Cannot access Group as Float"
);

let list = make_list(vec![Field::ListInternal(make_list(vec![
Expand All @@ -1691,8 +1691,8 @@ mod tests {
Field::Int(12),
]))]);
assert_eq!(
general_err!("Cannot access ListInternal as Float".to_string()),
list.get_float(0).unwrap_err()
list.get_float(0).unwrap_err().to_string(),
"Parquet error: Cannot access ListInternal as Float"
);

let list = make_list(vec![Field::MapInternal(make_map(vec![
Expand All @@ -1701,8 +1701,8 @@ mod tests {
(Field::Int(3), Field::Float(2.3)),
]))]);
assert_eq!(
general_err!("Cannot access MapInternal as Float".to_string()),
list.get_float(0).unwrap_err()
list.get_float(0).unwrap_err().to_string(),
"Parquet error: Cannot access MapInternal as Float",
);
}

Expand Down
17 changes: 7 additions & 10 deletions parquet/src/record/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ impl Iterator for ReaderIter {
mod tests {
use super::*;

use crate::errors::{ParquetError, Result};
use crate::errors::Result;
use crate::file::reader::{FileReader, SerializedFileReader};
use crate::record::api::{Field, Row, RowAccessor, RowFormatter};
use crate::schema::parser::parse_message_type;
Expand Down Expand Up @@ -1452,10 +1452,9 @@ mod tests {
";
let schema = parse_message_type(schema).unwrap();
let res = test_file_reader_rows("nested_maps.snappy.parquet", Some(schema));
assert!(res.is_err());
assert_eq!(
res.unwrap_err(),
general_err!("Root schema does not contain projection")
res.unwrap_err().to_string(),
"Parquet error: Root schema does not contain projection"
);
}

Expand All @@ -1469,10 +1468,9 @@ mod tests {
";
let schema = parse_message_type(schema).unwrap();
let res = test_row_group_rows("nested_maps.snappy.parquet", Some(schema));
assert!(res.is_err());
assert_eq!(
res.unwrap_err(),
general_err!("Root schema does not contain projection")
res.unwrap_err().to_string(),
"Parquet error: Root schema does not contain projection"
);
}

Expand Down Expand Up @@ -1542,10 +1540,9 @@ mod tests {
let reader = SerializedFileReader::try_from(path.as_path()).unwrap();
let res = RowIter::from_file_into(Box::new(reader)).project(proj);

assert!(res.is_err());
assert_eq!(
res.err().unwrap(),
general_err!("Root schema does not contain projection")
res.err().unwrap().to_string(),
"Parquet error: Root schema does not contain projection"
);
}

Expand Down
Loading

0 comments on commit 892a803

Please sign in to comment.