From 7d6832f90cb095f07bbc44b2f86f5482187e3243 Mon Sep 17 00:00:00 2001
From: Raphael Taylor-Davies <r.taylordavies@googlemail.com>
Date: Mon, 18 Jul 2022 08:26:54 -0400
Subject: [PATCH 1/2] Remove preserve_order feature from serde_json (#2095)

---
 arrow/Cargo.toml               |  2 +-
 integration-testing/Cargo.toml |  2 +-
 parquet/Cargo.toml             | 12 ++++++------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 7b3d4c64ad71..151cd2987938 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -41,7 +41,7 @@ bench = false
 ahash = { version = "0.7", default-features = false }
 serde = { version = "1.0", default-features = false }
 serde_derive = { version = "1.0", default-features = false }
-serde_json = { version = "1.0", default-features = false, features = ["preserve_order"] }
+serde_json = { version = "1.0", default-features = false, features = ["std"] }
 indexmap = { version = "1.9", default-features = false, features = ["std"] }
 rand = { version = "0.8", default-features = false, features =  ["std", "std_rng"], optional = true }
 num = { version = "0.4", default-features = false, features = ["std"] }
diff --git a/integration-testing/Cargo.toml b/integration-testing/Cargo.toml
index 4cff73aa7011..897c7cfa5a51 100644
--- a/integration-testing/Cargo.toml
+++ b/integration-testing/Cargo.toml
@@ -40,7 +40,7 @@ hex = { version = "0.4", default-features = false }
 prost = { version = "0.10", default-features = false }
 serde = { version = "1.0", default-features = false, features = ["rc"] }
 serde_derive = { version = "1.0", default-features = false }
-serde_json = { version = "1.0", default-features = false, features = ["preserve_order"] }
+serde_json = { version = "1.0", default-features = false, features = ["std"] }
 tokio = { version = "1.0", default-features = false }
 tonic = { version = "0.7", default-features = false }
 tracing-subscriber = { version = "0.3.1", default-features = false, features = ["fmt"], optional = true }
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index 64819077a744..498c85441120 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -45,9 +45,9 @@ num-bigint = { version = "0.4", default-features = false }
 arrow = { path = "../arrow", version = "18.0.0", optional = true, default-features = false, features = ["ipc"] }
 base64 = { version = "0.13", default-features = false, features = ["std"], optional = true }
 clap = { version = "3", default-features = false, features = ["std", "derive", "env"], optional = true }
-serde_json = { version = "1.0", default-features = false, optional = true }
+serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }
 rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
-futures = { version = "0.3", default-features = false, features = ["std" ], optional = true }
+futures = { version = "0.3", default-features = false, features = ["std"], optional = true }
 tokio = { version = "1.0", optional = true, default-features = false, features = ["macros", "fs", "rt", "io-util"] }
 
 [dev-dependencies]
@@ -55,11 +55,11 @@ base64 = { version = "0.13", default-features = false, features = ["std"] }
 criterion = { version = "0.3", default-features = false }
 snap = { version = "1.0", default-features = false }
 tempfile = { version = "3.0", default-features = false }
-brotli = { version = "3.3", default-features = false, features = [ "std" ] }
-flate2 = { version = "1.0", default-features = false, features = [ "rust_backend" ] }
+brotli = { version = "3.3", default-features = false, features = ["std"] }
+flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] }
 lz4 = { version = "1.23", default-features = false }
 zstd = { version = "0.11", default-features = false }
-serde_json = { version = "1.0", default-features = false, features = ["preserve_order"] }
+serde_json = { version = "1.0", features = ["std"], default-features = false }
 arrow = { path = "../arrow", version = "18.0.0", default-features = false, features = ["ipc", "test_utils", "prettyprint"] }
 
 [package.metadata.docs.rs]
@@ -70,7 +70,7 @@ default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"]
 # Enable arrow reader/writer APIs
 arrow = ["dep:arrow", "base64"]
 # Enable CLI tools
-cli = ["serde_json", "base64", "clap","arrow/csv"]
+cli = ["serde_json", "base64", "clap", "arrow/csv"]
 # Enable internal testing APIs
 test_common = ["arrow/test_utils"]
 # Experimental, unstable functionality primarily used for testing

From a2ec7cb0dd6f08fe5f87691894ce5c03aa423c83 Mon Sep 17 00:00:00 2001
From: Raphael Taylor-Davies <r.taylordavies@googlemail.com>
Date: Mon, 18 Jul 2022 14:09:29 -0400
Subject: [PATCH 2/2] Fix tests

---
 arrow/src/json/writer.rs | 93 +++++++++++++++++++++++-----------------
 1 file changed, 54 insertions(+), 39 deletions(-)

diff --git a/arrow/src/json/writer.rs b/arrow/src/json/writer.rs
index 0755a5758e4e..f21dad04313b 100644
--- a/arrow/src/json/writer.rs
+++ b/arrow/src/json/writer.rs
@@ -745,6 +745,21 @@ mod tests {
 
     use super::*;
 
+    /// Asserts that the NDJSON `input` is semantically identical to `expected`
+    fn assert_json_eq(input: &[u8], expected: &str) {
+        let expected: Vec<Option<Value>> = expected
+            .split('\n')
+            .map(|s| (!s.is_empty()).then(|| serde_json::from_str(s).unwrap()))
+            .collect();
+
+        let actual: Vec<Option<Value>> = input
+            .split(|b| *b == b'\n')
+            .map(|s| (!s.is_empty()).then(|| serde_json::from_slice(s).unwrap()))
+            .collect();
+
+        assert_eq!(expected, actual);
+    }
+
     #[test]
     fn write_simple_rows() {
         let schema = Schema::new(vec![
@@ -765,14 +780,14 @@ mod tests {
             writer.write_batches(&[batch]).unwrap();
         }
 
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"c1":1,"c2":"a"}
 {"c1":2,"c2":"b"}
 {"c1":3,"c2":"c"}
 {"c2":"d"}
 {"c1":5}
-"#
+"#,
         );
     }
 
@@ -796,14 +811,14 @@ mod tests {
             writer.write_batches(&[batch]).unwrap();
         }
 
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"c1":"a","c2":"a"}
 {"c2":"b"}
 {"c1":"c"}
 {"c1":"d","c2":"d"}
 {}
-"#
+"#,
         );
     }
 
@@ -846,14 +861,14 @@ mod tests {
             writer.write_batches(&[batch]).unwrap();
         }
 
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"c1":"cupcakes","c2":"sdsd"}
 {"c1":"foo","c2":"sdsd"}
 {"c1":"foo"}
 {"c2":"sd"}
 {"c1":"cupcakes","c2":"sdsd"}
-"#
+"#,
         );
     }
 
@@ -905,11 +920,11 @@ mod tests {
             writer.write_batches(&[batch]).unwrap();
         }
 
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"nanos":"2018-11-13 17:11:10.011375885","micros":"2018-11-13 17:11:10.011375","millis":"2018-11-13 17:11:10.011","secs":"2018-11-13 17:11:10","name":"a"}
 {"name":"b"}
-"#
+"#,
         );
     }
 
@@ -951,11 +966,11 @@ mod tests {
             writer.write_batches(&[batch]).unwrap();
         }
 
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"date32":"2018-11-13","date64":"2018-11-13","name":"a"}
 {"name":"b"}
-"#
+"#,
         );
     }
 
@@ -994,11 +1009,11 @@ mod tests {
             writer.write_batches(&[batch]).unwrap();
         }
 
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"time32sec":"00:02:00","time32msec":"00:00:00.120","time64usec":"00:00:00.000120","time64nsec":"00:00:00.000000120","name":"a"}
 {"name":"b"}
-"#
+"#,
         );
     }
 
@@ -1037,11 +1052,11 @@ mod tests {
             writer.write_batches(&[batch]).unwrap();
         }
 
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"duration_sec":"PT120S","duration_msec":"PT0.120S","duration_usec":"PT0.000120S","duration_nsec":"PT0.000000120S","name":"a"}
 {"name":"b"}
-"#
+"#,
         );
     }
 
@@ -1093,12 +1108,12 @@ mod tests {
             writer.write_batches(&[batch]).unwrap();
         }
 
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"c1":{"c11":1,"c12":{"c121":"e"}},"c2":"a"}
 {"c1":{"c12":{"c121":"f"}},"c2":"b"}
 {"c1":{"c11":5,"c12":{"c121":"g"}},"c2":"c"}
-"#
+"#,
         );
     }
 
@@ -1136,14 +1151,14 @@ mod tests {
             writer.write_batches(&[batch]).unwrap();
         }
 
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"c1":["a","a1"],"c2":1}
 {"c1":["b"],"c2":2}
 {"c1":["c"],"c2":3}
 {"c1":["d"],"c2":4}
 {"c1":["e"],"c2":5}
-"#
+"#,
         );
     }
 
@@ -1196,12 +1211,12 @@ mod tests {
             writer.write_batches(&[batch]).unwrap();
         }
 
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"c1":[[1,2],[3]],"c2":"foo"}
 {"c1":[],"c2":"bar"}
 {"c1":[[4,5,6]]}
-"#
+"#,
         );
     }
 
@@ -1271,12 +1286,12 @@ mod tests {
             writer.write_batches(&[batch]).unwrap();
         }
 
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"c1":[{"c11":1,"c12":{"c121":"e"}},{"c12":{"c121":"f"}}],"c2":1}
 {"c2":2}
 {"c1":[{"c11":5,"c12":{"c121":"g"}}],"c2":3}
-"#
+"#,
         );
     }
 
@@ -1396,15 +1411,15 @@ mod tests {
         // that implementations differ on the treatment of a null struct.
         // It would be more accurate to return a null struct, so this can be done
         // as a follow up.
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"list":[{"ints":1}]}
 {"list":[{}]}
 {"list":[]}
 {}
 {"list":[{}]}
 {"list":[{}]}
-"#
+"#,
         );
     }
 
@@ -1455,15 +1470,15 @@ mod tests {
             writer.write_batches(&[batch]).unwrap();
         }
 
-        assert_eq!(
-            String::from_utf8(buf).unwrap(),
+        assert_json_eq(
+            &buf,
             r#"{"map":{"foo":10}}
 {"map":null}
 {"map":{}}
 {"map":{"bar":20,"baz":30,"qux":40}}
 {"map":{"quux":50}}
 {"map":{}}
-"#
+"#,
         );
     }