diff --git a/CMake/resolve_dependency_modules/simdjson.cmake b/CMake/resolve_dependency_modules/simdjson.cmake index a0c55c3758a3..a59ee8401550 100644 --- a/CMake/resolve_dependency_modules/simdjson.cmake +++ b/CMake/resolve_dependency_modules/simdjson.cmake @@ -34,3 +34,5 @@ if(${VELOX_SIMDJSON_SKIPUTF8VALIDATION}) endif() FetchContent_MakeAvailable(simdjson) +target_compile_definitions(simdjson + PUBLIC SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON) diff --git a/velox/functions/prestosql/json/SIMDJsonUtil.cpp b/velox/functions/prestosql/json/SIMDJsonUtil.cpp index c63e5263502d..3d379ca4b4ab 100644 --- a/velox/functions/prestosql/json/SIMDJsonUtil.cpp +++ b/velox/functions/prestosql/json/SIMDJsonUtil.cpp @@ -34,4 +34,10 @@ simdjson::simdjson_result simdjsonParse( return parser.iterate(json); } +simdjson::simdjson_result simdjsonParseIncomplete( + const simdjson::padded_string_view& json) { + thread_local simdjson::ondemand::parser parser; + return parser.iterate_allow_incomplete_json(json); +} + } // namespace facebook::velox diff --git a/velox/functions/prestosql/json/SIMDJsonUtil.h b/velox/functions/prestosql/json/SIMDJsonUtil.h index d510c319c10e..84f8b204c520 100644 --- a/velox/functions/prestosql/json/SIMDJsonUtil.h +++ b/velox/functions/prestosql/json/SIMDJsonUtil.h @@ -47,4 +47,9 @@ void simdjsonErrorsToExceptions( simdjson::simdjson_result simdjsonParse( const simdjson::padded_string_view& json); +/// Parse the input json string using a thread local on demand parser. Allow +/// incomplete json input. +simdjson::simdjson_result simdjsonParseIncomplete( + const simdjson::padded_string_view& json); + } // namespace facebook::velox diff --git a/velox/functions/sparksql/GetJsonObject.h b/velox/functions/sparksql/GetJsonObject.h index 5bb249974338..2a3906197306 100644 --- a/velox/functions/sparksql/GetJsonObject.h +++ b/velox/functions/sparksql/GetJsonObject.h @@ -54,7 +54,7 @@ struct GetJsonObjectFunction { } simdjson::ondemand::document jsonDoc; simdjson::padded_string paddedJson(json.data(), json.size()); - if (simdjsonParse(paddedJson).get(jsonDoc)) { + if (simdjsonParseIncomplete(paddedJson).get(jsonDoc)) { return false; } const auto formattedJsonPath = jsonPath_.has_value() diff --git a/velox/functions/sparksql/tests/GetJsonObjectTest.cpp b/velox/functions/sparksql/tests/GetJsonObjectTest.cpp index 3c370f531864..ec437df7b823 100644 --- a/velox/functions/sparksql/tests/GetJsonObjectTest.cpp +++ b/velox/functions/sparksql/tests/GetJsonObjectTest.cpp @@ -119,5 +119,34 @@ TEST_F(GetJsonObjectTest, nullResult) { std::nullopt); } +TEST_F(GetJsonObjectTest, incompleteJson) { + EXPECT_EQ(getJsonObject(R"({"hello": "3.5"},)", "$.hello"), "3.5"); + EXPECT_EQ(getJsonObject(R"({"hello": "3.5",,,,})", "$.hello"), "3.5"); + EXPECT_EQ( + getJsonObject(R"({"hello": "3.5",,,,"taskSort":"2"})", "$.hello"), "3.5"); + EXPECT_EQ( + getJsonObject( + R"({"hello": "3.5","taskSort":"2",,,,,"taskSort",})", "$.hello"), + "3.5"); + EXPECT_EQ( + getJsonObject(R"({"hello": "3.5","taskSort":"2",,,,,,})", "$.hello"), + "3.5"); + EXPECT_EQ( + getJsonObject(R"({"hello": "boy","taskSort":"2"},,,,,)", "$.hello"), + "boy"); + EXPECT_EQ(getJsonObject(R"({"hello": "boy\n"},)", "$.hello"), "boy\n"); + EXPECT_EQ(getJsonObject(R"({"hello": "boy\n\t"},)", "$.hello"), "boy\n\t"); + EXPECT_EQ( + getJsonObject( + R"([{"my": {"info": {"name": "Alice"}}}, {"other": ["v1", "v2"]}],)", + "$[1].other[1]"), + "v2"); + EXPECT_EQ( + getJsonObject( + R"({"my": {"info": {"name": "Alice", "age": "5", "id": "001"}}},)", + "$['my']['info']"), + R"({"name": "Alice", "age": "5", "id": "001"})"); +} + } // namespace } // namespace facebook::velox::functions::sparksql::test