Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve JSON decoder errors (#4076) #4079

Merged
merged 3 commits into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions arrow-json/src/reader/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use arrow_data::ArrayData;
use arrow_schema::ArrowError;

use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{tape_error, ArrayDecoder};
use crate::reader::ArrayDecoder;

#[derive(Default)]
pub struct BooleanArrayDecoder {}
Expand All @@ -34,7 +34,7 @@ impl ArrayDecoder for BooleanArrayDecoder {
TapeElement::Null => builder.append_null(),
TapeElement::True => builder.append_value(true),
TapeElement::False => builder.append_value(false),
d => return Err(tape_error(d, "boolean")),
_ => return Err(tape.error(*p, "boolean")),
}
}

Expand Down
4 changes: 2 additions & 2 deletions arrow-json/src/reader/decimal_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use arrow_data::ArrayData;
use arrow_schema::ArrowError;

use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{tape_error, ArrayDecoder};
use crate::reader::ArrayDecoder;

pub struct DecimalArrayDecoder<D: DecimalType> {
precision: u8,
Expand Down Expand Up @@ -64,7 +64,7 @@ where
let value = parse_decimal::<D>(s, self.precision, self.scale)?;
builder.append_value(value)
}
d => return Err(tape_error(d, "decimal")),
_ => return Err(tape.error(*p, "decimal")),
}
}

Expand Down
8 changes: 3 additions & 5 deletions arrow-json/src/reader/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{make_decoder, tape_error, ArrayDecoder};
use crate::reader::{make_decoder, ArrayDecoder};
use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
use arrow_array::OffsetSizeTrait;
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
Expand Down Expand Up @@ -78,17 +78,15 @@ impl<O: OffsetSizeTrait> ArrayDecoder for ListArrayDecoder<O> {
nulls.append(false);
*p + 1
}
(d, _) => return Err(tape_error(d, "[")),
_ => return Err(tape.error(*p, "[")),
};

let mut cur_idx = *p + 1;
while cur_idx < end_idx {
child_pos.push(cur_idx);

// Advance to next field
cur_idx = tape
.next(cur_idx)
.map_err(|d| tape_error(d, "list value"))?;
cur_idx = tape.next(cur_idx, "list value")?;
}

let offset = O::from_usize(child_pos.len()).ok_or_else(|| {
Expand Down
8 changes: 4 additions & 4 deletions arrow-json/src/reader/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{make_decoder, tape_error, ArrayDecoder};
use crate::reader::{make_decoder, ArrayDecoder};
use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
use arrow_buffer::ArrowNativeType;
Expand Down Expand Up @@ -104,14 +104,14 @@ impl ArrayDecoder for MapArrayDecoder {
nulls.append(false);
p + 1
}
(d, _) => return Err(tape_error(d, "{")),
_ => return Err(tape.error(p, "{")),
};

let mut cur_idx = p + 1;
while cur_idx < end_idx {
let key = cur_idx;
let value = tape.next(key).map_err(|d| tape_error(d, "map key"))?;
cur_idx = tape.next(value).map_err(|d| tape_error(d, "map value"))?;
let value = tape.next(key, "map key")?;
cur_idx = tape.next(value, "map value")?;

key_pos.push(key);
value_pos.push(value);
Expand Down
115 changes: 101 additions & 14 deletions arrow-json/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -632,10 +632,6 @@ fn make_decoder(
}
}

fn tape_error(d: TapeElement, expected: &str) -> ArrowError {
ArrowError::JsonError(format!("expected {expected} got {d}"))
}

#[cfg(test)]
mod tests {
use std::fs::File;
Expand Down Expand Up @@ -962,29 +958,29 @@ mod tests {
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, true)]));

let buf = r#"{"a": 1}"#;
let result = ReaderBuilder::new(schema.clone())
let err = ReaderBuilder::new(schema.clone())
.with_batch_size(1024)
.build(Cursor::new(buf.as_bytes()))
.unwrap()
.read();
.read()
.unwrap_err();

assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"Json error: expected string got number".to_string()
err.to_string(),
"Json error: whilst decoding field 'a': expected string got 1"
);

let buf = r#"{"a": true}"#;
let result = ReaderBuilder::new(schema)
let err = ReaderBuilder::new(schema)
.with_batch_size(1024)
.build(Cursor::new(buf.as_bytes()))
.unwrap()
.read();
.read()
.unwrap_err();

assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"Json error: expected string got true".to_string()
err.to_string(),
"Json error: whilst decoding field 'a': expected string got true"
);
}

Expand Down Expand Up @@ -1866,4 +1862,95 @@ mod tests {
assert_eq!(3, num_batches);
assert_eq!(100000000000011, sum_a);
}

#[test]
fn test_decoder_error() {
let schema = Arc::new(Schema::new(vec![Field::new_struct(
"a",
vec![Field::new("child", DataType::Int32, false)],
true,
)]));

let parse_err = |s: &str| {
ReaderBuilder::new(schema.clone())
.build(Cursor::new(s.as_bytes()))
.unwrap()
.next()
.unwrap()
.unwrap_err()
.to_string()
};

let err = parse_err(r#"{"a": 123}"#);
assert_eq!(
err,
"Json error: whilst decoding field 'a': expected { got 123"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

);

let err = parse_err(r#"{"a": ["bar"]}"#);
assert_eq!(
err,
r#"Json error: whilst decoding field 'a': expected { got ["bar"]"#
);

let err = parse_err(r#"{"a": []}"#);
assert_eq!(
err,
"Json error: whilst decoding field 'a': expected { got []"
);

let err = parse_err(r#"{"a": [{"child": 234}]}"#);
assert_eq!(
err,
r#"Json error: whilst decoding field 'a': expected { got [{"child": 234}]"#
);

let err = parse_err(r#"{"a": [{"child": {"foo": [{"foo": ["bar"]}]}}]}"#);
assert_eq!(
err,
r#"Json error: whilst decoding field 'a': expected { got [{"child": {"foo": [{"foo": ["bar"]}]}}]"#
);

let err = parse_err(r#"{"a": true}"#);
assert_eq!(
err,
"Json error: whilst decoding field 'a': expected { got true"
);

let err = parse_err(r#"{"a": false}"#);
assert_eq!(
err,
"Json error: whilst decoding field 'a': expected { got false"
);

let err = parse_err(r#"{"a": "foo"}"#);
assert_eq!(
err,
"Json error: whilst decoding field 'a': expected { got \"foo\""
);

let err = parse_err(r#"{"a": {"child": false}}"#);
assert_eq!(
err,
"Json error: whilst decoding field 'a': whilst decoding field 'child': expected primitive got false"
);

let err = parse_err(r#"{"a": {"child": []}}"#);
assert_eq!(
err,
"Json error: whilst decoding field 'a': whilst decoding field 'child': expected primitive got []"
);

let err = parse_err(r#"{"a": {"child": [123]}}"#);
assert_eq!(
err,
"Json error: whilst decoding field 'a': whilst decoding field 'child': expected primitive got [123]"
);

let err = parse_err(r#"{"a": {"child": [123, 3465346]}}"#);
assert_eq!(
err,
"Json error: whilst decoding field 'a': whilst decoding field 'child': expected primitive got [123, 3465346]"
);
}
}
4 changes: 2 additions & 2 deletions arrow-json/src/reader/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};

use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{tape_error, ArrayDecoder};
use crate::reader::ArrayDecoder;

/// A trait for JSON-specific primitive parsing logic
///
Expand Down Expand Up @@ -116,7 +116,7 @@ where

builder.append_value(value)
}
d => return Err(tape_error(d, "primitive")),
_ => return Err(tape.error(*p, "primitive")),
}
}

Expand Down
4 changes: 2 additions & 2 deletions arrow-json/src/reader/string_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use arrow_schema::ArrowError;
use std::marker::PhantomData;

use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{tape_error, ArrayDecoder};
use crate::reader::ArrayDecoder;

const TRUE: &str = "true";
const FALSE: &str = "false";
Expand Down Expand Up @@ -61,7 +61,7 @@ impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
TapeElement::Number(idx) if coerce_primitive => {
data_capacity += tape.get_string(idx).len();
}
d => return Err(tape_error(d, "string")),
_ => return Err(tape.error(*p, "string")),
}
}

Expand Down
21 changes: 14 additions & 7 deletions arrow-json/src/reader/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::reader::tape::{Tape, TapeElement};
use crate::reader::{make_decoder, tape_error, ArrayDecoder};
use crate::reader::{make_decoder, ArrayDecoder};
use arrow_array::builder::BooleanBufferBuilder;
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
Expand Down Expand Up @@ -74,15 +74,15 @@ impl ArrayDecoder for StructArrayDecoder {
nulls.append(false);
continue;
}
(d, _) => return Err(tape_error(d, "{")),
_ => return Err(tape.error(*p, "{")),
};

let mut cur_idx = *p + 1;
while cur_idx < end_idx {
// Read field name
let field_name = match tape.get(cur_idx) {
TapeElement::String(s) => tape.get_string(s),
d => return Err(tape_error(d, "field name")),
_ => return Err(tape.error(cur_idx, "field name")),
};

// Update child pos if match found
Expand All @@ -93,17 +93,24 @@ impl ArrayDecoder for StructArrayDecoder {
}

// Advance to next field
cur_idx = tape
.next(cur_idx + 1)
.map_err(|d| tape_error(d, "field value"))?;
cur_idx = tape.next(cur_idx + 1, "field value")?;
}
}

let child_data = self
.decoders
.iter_mut()
.zip(child_pos)
.map(|(d, pos)| d.decode(tape, &pos))
.zip(fields)
.map(|((d, pos), f)| {
d.decode(tape, &pos).map_err(|e| match e {
ArrowError::JsonError(s) => ArrowError::JsonError(format!(
"whilst decoding field '{}': {s}",
f.name()
)),
e => e,
})
})
.collect::<Result<Vec<_>, ArrowError>>()?;

let nulls = nulls
Expand Down
Loading