From 7d6832f90cb095f07bbc44b2f86f5482187e3243 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 18 Jul 2022 08:26:54 -0400 Subject: [PATCH 1/2] Remove preserve_order feature from serde_json (#2095) --- arrow/Cargo.toml | 2 +- integration-testing/Cargo.toml | 2 +- parquet/Cargo.toml | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 7b3d4c64ad71..151cd2987938 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -41,7 +41,7 @@ bench = false ahash = { version = "0.7", default-features = false } serde = { version = "1.0", default-features = false } serde_derive = { version = "1.0", default-features = false } -serde_json = { version = "1.0", default-features = false, features = ["preserve_order"] } +serde_json = { version = "1.0", default-features = false, features = ["std"] } indexmap = { version = "1.9", default-features = false, features = ["std"] } rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true } num = { version = "0.4", default-features = false, features = ["std"] } diff --git a/integration-testing/Cargo.toml b/integration-testing/Cargo.toml index 4cff73aa7011..897c7cfa5a51 100644 --- a/integration-testing/Cargo.toml +++ b/integration-testing/Cargo.toml @@ -40,7 +40,7 @@ hex = { version = "0.4", default-features = false } prost = { version = "0.10", default-features = false } serde = { version = "1.0", default-features = false, features = ["rc"] } serde_derive = { version = "1.0", default-features = false } -serde_json = { version = "1.0", default-features = false, features = ["preserve_order"] } +serde_json = { version = "1.0", default-features = false, features = ["std"] } tokio = { version = "1.0", default-features = false } tonic = { version = "0.7", default-features = false } tracing-subscriber = { version = "0.3.1", default-features = false, features = ["fmt"], optional = true } diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index 64819077a744..498c85441120 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -45,9 +45,9 @@ num-bigint = { version = "0.4", default-features = false } arrow = { path = "../arrow", version = "18.0.0", optional = true, default-features = false, features = ["ipc"] } base64 = { version = "0.13", default-features = false, features = ["std"], optional = true } clap = { version = "3", default-features = false, features = ["std", "derive", "env"], optional = true } -serde_json = { version = "1.0", default-features = false, optional = true } +serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } -futures = { version = "0.3", default-features = false, features = ["std" ], optional = true } +futures = { version = "0.3", default-features = false, features = ["std"], optional = true } tokio = { version = "1.0", optional = true, default-features = false, features = ["macros", "fs", "rt", "io-util"] } [dev-dependencies] @@ -55,11 +55,11 @@ base64 = { version = "0.13", default-features = false, features = ["std"] } criterion = { version = "0.3", default-features = false } snap = { version = "1.0", default-features = false } tempfile = { version = "3.0", default-features = false } -brotli = { version = "3.3", default-features = false, features = [ "std" ] } -flate2 = { version = "1.0", default-features = false, features = [ "rust_backend" ] } +brotli = { version = "3.3", default-features = false, features = ["std"] } +flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] } lz4 = { version = "1.23", default-features = false } zstd = { version = "0.11", default-features = false } -serde_json = { version = "1.0", default-features = false, features = ["preserve_order"] } +serde_json = { version = "1.0", features = ["std"], default-features = false } arrow = { path = "../arrow", version = "18.0.0", default-features = false, features = ["ipc", "test_utils", "prettyprint"] } [package.metadata.docs.rs] @@ -70,7 +70,7 @@ default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"] # Enable arrow reader/writer APIs arrow = ["dep:arrow", "base64"] # Enable CLI tools -cli = ["serde_json", "base64", "clap","arrow/csv"] +cli = ["serde_json", "base64", "clap", "arrow/csv"] # Enable internal testing APIs test_common = ["arrow/test_utils"] # Experimental, unstable functionality primarily used for testing From a2ec7cb0dd6f08fe5f87691894ce5c03aa423c83 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 18 Jul 2022 14:09:29 -0400 Subject: [PATCH 2/2] Fix tests --- arrow/src/json/writer.rs | 93 +++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 39 deletions(-) diff --git a/arrow/src/json/writer.rs b/arrow/src/json/writer.rs index 0755a5758e4e..f21dad04313b 100644 --- a/arrow/src/json/writer.rs +++ b/arrow/src/json/writer.rs @@ -745,6 +745,21 @@ mod tests { use super::*; + /// Asserts that the NDJSON `input` is semantically identical to `expected` + fn assert_json_eq(input: &[u8], expected: &str) { + let expected: Vec> = expected + .split('\n') + .map(|s| (!s.is_empty()).then(|| serde_json::from_str(s).unwrap())) + .collect(); + + let actual: Vec> = input + .split(|b| *b == b'\n') + .map(|s| (!s.is_empty()).then(|| serde_json::from_slice(s).unwrap())) + .collect(); + + assert_eq!(expected, actual); + } + #[test] fn write_simple_rows() { let schema = Schema::new(vec![ @@ -765,14 +780,14 @@ mod tests { writer.write_batches(&[batch]).unwrap(); } - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"c1":1,"c2":"a"} {"c1":2,"c2":"b"} {"c1":3,"c2":"c"} {"c2":"d"} {"c1":5} -"# +"#, ); } @@ -796,14 +811,14 @@ mod tests { writer.write_batches(&[batch]).unwrap(); } - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"c1":"a","c2":"a"} {"c2":"b"} {"c1":"c"} {"c1":"d","c2":"d"} {} -"# +"#, ); } @@ -846,14 +861,14 @@ mod tests { writer.write_batches(&[batch]).unwrap(); } - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"c1":"cupcakes","c2":"sdsd"} {"c1":"foo","c2":"sdsd"} {"c1":"foo"} {"c2":"sd"} {"c1":"cupcakes","c2":"sdsd"} -"# +"#, ); } @@ -905,11 +920,11 @@ mod tests { writer.write_batches(&[batch]).unwrap(); } - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"nanos":"2018-11-13 17:11:10.011375885","micros":"2018-11-13 17:11:10.011375","millis":"2018-11-13 17:11:10.011","secs":"2018-11-13 17:11:10","name":"a"} {"name":"b"} -"# +"#, ); } @@ -951,11 +966,11 @@ mod tests { writer.write_batches(&[batch]).unwrap(); } - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"date32":"2018-11-13","date64":"2018-11-13","name":"a"} {"name":"b"} -"# +"#, ); } @@ -994,11 +1009,11 @@ mod tests { writer.write_batches(&[batch]).unwrap(); } - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"time32sec":"00:02:00","time32msec":"00:00:00.120","time64usec":"00:00:00.000120","time64nsec":"00:00:00.000000120","name":"a"} {"name":"b"} -"# +"#, ); } @@ -1037,11 +1052,11 @@ mod tests { writer.write_batches(&[batch]).unwrap(); } - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"duration_sec":"PT120S","duration_msec":"PT0.120S","duration_usec":"PT0.000120S","duration_nsec":"PT0.000000120S","name":"a"} {"name":"b"} -"# +"#, ); } @@ -1093,12 +1108,12 @@ mod tests { writer.write_batches(&[batch]).unwrap(); } - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"c1":{"c11":1,"c12":{"c121":"e"}},"c2":"a"} {"c1":{"c12":{"c121":"f"}},"c2":"b"} {"c1":{"c11":5,"c12":{"c121":"g"}},"c2":"c"} -"# +"#, ); } @@ -1136,14 +1151,14 @@ mod tests { writer.write_batches(&[batch]).unwrap(); } - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"c1":["a","a1"],"c2":1} {"c1":["b"],"c2":2} {"c1":["c"],"c2":3} {"c1":["d"],"c2":4} {"c1":["e"],"c2":5} -"# +"#, ); } @@ -1196,12 +1211,12 @@ mod tests { writer.write_batches(&[batch]).unwrap(); } - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"c1":[[1,2],[3]],"c2":"foo"} {"c1":[],"c2":"bar"} {"c1":[[4,5,6]]} -"# +"#, ); } @@ -1271,12 +1286,12 @@ mod tests { writer.write_batches(&[batch]).unwrap(); } - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"c1":[{"c11":1,"c12":{"c121":"e"}},{"c12":{"c121":"f"}}],"c2":1} {"c2":2} {"c1":[{"c11":5,"c12":{"c121":"g"}}],"c2":3} -"# +"#, ); } @@ -1396,15 +1411,15 @@ mod tests { // that implementations differ on the treatment of a null struct. // It would be more accurate to return a null struct, so this can be done // as a follow up. - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"list":[{"ints":1}]} {"list":[{}]} {"list":[]} {} {"list":[{}]} {"list":[{}]} -"# +"#, ); } @@ -1455,15 +1470,15 @@ mod tests { writer.write_batches(&[batch]).unwrap(); } - assert_eq!( - String::from_utf8(buf).unwrap(), + assert_json_eq( + &buf, r#"{"map":{"foo":10}} {"map":null} {"map":{}} {"map":{"bar":20,"baz":30,"qux":40}} {"map":{"quux":50}} {"map":{}} -"# +"#, ); }