diff --git a/velox/docs/functions/spark/json.rst b/velox/docs/functions/spark/json.rst index a933297212f7..82d8a3a53683 100644 --- a/velox/docs/functions/spark/json.rst +++ b/velox/docs/functions/spark/json.rst @@ -84,3 +84,18 @@ JSON Functions SELECT json_object_keys(''); -- NULL SELECT json_object_keys(1); -- NULL SELECT json_object_keys('"hello"'); -- NULL + +.. spark:function:: to_json(jsonObject) -> jsonString + + Converts a Json object (ROW, ARRAY or MAP) into a JSON string. + The current implementation has following limitations. + + * Does not support user provided options. :: + + to_json(ROW(1, "a"), map('option', 'value')) + + Examples of valid inputs are listed as below. :: + + SELECT to_json(ROW(1, "a")); -- {"a":1} + SELECT to_json(ARRAY[1, 2, 3]); -- [1,2,3] + SELECT to_json(MAP(ARRAY['x', 'y'], ARRAY[1, 2])); -- {"x":1,"y":2} diff --git a/velox/functions/sparksql/CMakeLists.txt b/velox/functions/sparksql/CMakeLists.txt index f94dade0e618..f81e855e9657 100644 --- a/velox/functions/sparksql/CMakeLists.txt +++ b/velox/functions/sparksql/CMakeLists.txt @@ -30,6 +30,7 @@ velox_add_library( RegexFunctions.cpp Size.cpp String.cpp + ToJson.cpp UnscaledValueFunction.cpp) velox_link_libraries( diff --git a/velox/functions/sparksql/ToJson.cpp b/velox/functions/sparksql/ToJson.cpp new file mode 100644 index 000000000000..3139c4170a45 --- /dev/null +++ b/velox/functions/sparksql/ToJson.cpp @@ -0,0 +1,678 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/functions/prestosql/types/JsonType.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace facebook::velox::functions::sparksql { +namespace { + +template +std::enable_if_t, size_t> append( + T value, + char* const buffer) { + const auto oute = buffer + folly::to_ascii_size_max_decimal + 1; + auto uvalue = value < 0 ? ~static_cast(value) + 1 + : static_cast(value); + size_t p = 0; + char* writtenPosition = buffer; + if (value < 0) { + *writtenPosition++ = '-'; + p += 1; + }; + p += folly::to_ascii_decimal(writtenPosition, oute, uvalue); + return p; +} + +template +std::enable_if_t, size_t> append( + T value, + char* const buffer) { + std::string result; + if (FOLLY_UNLIKELY(std::isinf(value) || std::isnan(value))) { + result = fmt::format( + "\"{}\"", util::Converter::tryCast(value).value()); + } else { + result = util::Converter::tryCast(value).value(); + } + std::memcpy(buffer, result.c_str(), result.size()); + return result.size(); +} + +template +size_t convertToString( + T value, + char* const buffer, + exec::EvalCtx& context, + const TypePtr& type) { + VELOX_FAIL("{} is not supported in to_json.", type->toString()); +} + +template <> +size_t convertToString( + bool value, + char* const buffer, + exec::EvalCtx& context, + const TypePtr& type) { + static const char TRUE[] = "true"; + static const char FALSE[] = "false"; + char* pos = buffer; + const char* res = value ? TRUE : FALSE; + const size_t size = value ? 4 : 5; + std::memcpy(pos, res, size); + return size; +} + +template <> +size_t convertToString( + int8_t value, + char* const buffer, + exec::EvalCtx& context, + const TypePtr& type) { + return append(value, buffer); +} + +template <> +size_t convertToString( + int16_t value, + char* const buffer, + exec::EvalCtx& context, + const TypePtr& type) { + return append(value, buffer); +} + +template <> +size_t convertToString( + int32_t value, + char* const buffer, + exec::EvalCtx& context, + const TypePtr& type) { + if (type->isDate()) { + std::string stringValue = DATE()->toString(value); + return snprintf( + buffer, stringValue.size() + 3, "\"%s\"", stringValue.c_str()); + } else { + return append(value, buffer); + } +} + +template <> +size_t convertToString( + int64_t value, + char* const buffer, + exec::EvalCtx& context, + const TypePtr& type) { + if (type->isDecimal()) { + auto [precision, scale] = getDecimalPrecisionScale(*type); + auto size = DecimalUtil::maxStringViewSize(precision, scale); + return DecimalUtil::castToString(value, scale, size, buffer); + } else { + return append(value, buffer); + } +} + +template <> +size_t convertToString( + int128_t value, + char* const buffer, + exec::EvalCtx& context, + const TypePtr& type) { + const auto oute = buffer + folly::detail::digitsEnough() + 1; + size_t p; + if (value < 0) { + *buffer = '-'; + p = 1 + folly::detail::unsafeTelescope128(buffer + 1, oute, -value); + } else { + p = folly::detail::unsafeTelescope128(buffer, oute, value); + } + return p; +} + +template <> +size_t convertToString( + float value, + char* const buffer, + exec::EvalCtx& context, + const TypePtr& type) { + return append(value, buffer); +} + +template <> +size_t convertToString( + double value, + char* const buffer, + exec::EvalCtx& context, + const TypePtr& type) { + return append(value, buffer); +} + +template <> +size_t convertToString( + StringView value, + char* const buffer, + exec::EvalCtx& context, + const TypePtr& type) { + size_t size = normalizedSizeForJsonCast(value.data(), value.size()); + *buffer = '"'; + normalizeForJsonCast(value.data(), size, buffer + 1); + *(buffer + size + 1) = '"'; + return size + 2; +} + +template <> +size_t convertToString( + Timestamp value, + char* const buffer, + exec::EvalCtx& context, + const TypePtr& type) { + // Spark converts Timestamp in ISO8601 format by default. + static const auto formatter = + functions::buildJodaDateTimeFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSZZ") + .value(); + const auto* timeZone = + getTimeZoneFromConfig(context.execCtx()->queryCtx()->queryConfig()); + const auto maxResultSize = formatter->maxResultSize(timeZone); + *buffer = '"'; + const auto resultSize = + formatter->format(value, timeZone, maxResultSize, buffer + 1, false, "Z"); + *(buffer + resultSize + 1) = '"'; + return resultSize + 2; +} + +template +size_t estimateRowSize(const TypePtr& type) { + if constexpr (std::is_same_v) { + return 5; + } else if constexpr (std::is_integral_v) { + return folly::detail::digitsEnough() + 1; + } else if constexpr (std::is_same_v) { + // yyyy-MM-dd'T'HH:mm:ss.SSSZZ + return 40; + } else if (type->isDate()) { + // yyyy-MM-dd. + return 12; + } else { + // For variable-length types, the initial size is set to 10. + return 10; + } +} + +// Convert primitive-type input vectors to Json string. +template < + TypeKind kind, + typename std::enable_if_t::isPrimitiveType, int> = 0> +void toJson( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + FlatVector& flatResult) { + using T = typename TypeTraits::NativeType; + + // input is guaranteed to be in flat or constant encodings when passed in. + auto inputVector = input.as>(); + + size_t rowSize = estimateRowSize(inputVector->type()); + Buffer* buffer = + flatResult.getBufferWithSpace(rows.countSelected() * rowSize); + char* rawBuffer = buffer->asMutable() + buffer->size(); + context.applyToSelectedNoThrow(rows, [&](auto row) { + if (inputVector->isNullAt(row)) { + flatResult.set(row, "null"); + } else { + auto size = VELOX_DYNAMIC_TYPE_DISPATCH( + convertToString, + kind, + inputVector->valueAt(row), + rawBuffer, + context, + inputVector->type()); + + flatResult.setNoCopy(row, StringView(rawBuffer, size)); + rawBuffer += size; + } + }); + // Update the exact buffer size. + buffer->setSize(rawBuffer - buffer->asMutable()); +} + +// Forward declaration. +void toJsonFromRow( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + FlatVector& flatResult); + +void toJsonFromArray( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + FlatVector& flatResult); + +void toJsonFromMap( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + FlatVector& flatResult); + +// Convert complex-type input vectors to Json string. +template < + TypeKind kind, + typename std::enable_if_t::isPrimitiveType, int> = 0> +void toJson( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + FlatVector& flatResult) { + if constexpr (kind == TypeKind::ROW) { + toJsonFromRow(input, context, rows, flatResult); + } else if constexpr (kind == TypeKind::ARRAY) { + toJsonFromArray(input, context, rows, flatResult); + } else if constexpr (kind == TypeKind::MAP) { + toJsonFromMap(input, context, rows, flatResult); + } else { + VELOX_FAIL("{} is not supported in to_json.", input.type()->toString()); + } +} + +// Helper struct representing the Json vector of input. +struct AsJson { + AsJson( + exec::EvalCtx& context, + const VectorPtr& input, + const SelectivityVector& rows, + const BufferPtr& elementToTopLevelRows) + : decoded_(context) { + VELOX_CHECK(rows.hasSelections()); + + exec::EvalErrorsPtr oldErrors; + context.swapErrors(oldErrors); + if (isJsonType(input->type())) { + json_ = input; + } else { + if (!exec::PeeledEncoding::isPeelable(input->encoding())) { + serialize(context, input, rows, json_); + } else { + exec::withContextSaver([&](exec::ContextSaver& saver) { + exec::LocalSelectivityVector newRowsHodler(*context.execCtx()); + + exec::LocalDecodedVector localDecoded(context); + std::vector peeledVectors; + auto peeledEncoding = exec::PeeledEncoding::peel( + {input}, rows, localDecoded, true, peeledVectors); + VELOX_CHECK_EQ(peeledVectors.size(), 1); + auto newRows = + peeledEncoding->translateToInnerRows(rows, newRowsHodler); + // Save context and set the peel + context.saveAndReset(saver, rows); + context.setPeeledEncoding(peeledEncoding); + + serialize(context, peeledVectors[0], *newRows, json_); + json_ = context.getPeeledEncoding()->wrap( + json_->type(), context.pool(), json_, rows); + }); + } + } + decoded_.get()->decode(*json_, rows); + jsonStrings_ = decoded_->base()->as>(); + + combineErrors(context, rows, elementToTopLevelRows, oldErrors); + } + + StringView at(vector_size_t i) const { + return jsonStrings_->valueAt(decoded_->index(i)); + } + + // Returns the length of the json string of the value at i, when this + // value will be inlined as an element in the json string of an array, map, or + // row. + vector_size_t lengthAt(vector_size_t i) const { + if (decoded_->isNullAt(i)) { + // Null values are inlined as "null". + return 4; + } else { + return this->at(i).size(); + } + } + + // Appends the json string of the value at i to a string writer. + void append(vector_size_t i, exec::StringWriter& proxy) const { + if (decoded_->isNullAt(i)) { + proxy.append("null"); + } else { + proxy.append(this->at(i)); + } + } + + private: + void serialize( + exec::EvalCtx& context, + const VectorPtr& input, + const SelectivityVector& baseRows, + VectorPtr& result) { + context.ensureWritable(baseRows, JSON(), result); + auto flatJsonStrings = result->as>(); + + VELOX_DYNAMIC_TYPE_DISPATCH_ALL( + toJson, input->typeKind(), *input, context, baseRows, *flatJsonStrings); + } + + // Combine exceptions in oldErrors into context.errors_ with a transformation + // of rows mapping provided by elementToTopLevelRows. If there are exceptions + // at the same row in both context.errors_ and oldErrors, the one in oldErrors + // remains. elementToTopLevelRows can be a nullptr, meaning that the rows in + // context.errors_ correspond to rows in oldErrors exactly. + void combineErrors( + exec::EvalCtx& context, + const SelectivityVector& rows, + const BufferPtr& elementToTopLevelRows, + exec::EvalErrorsPtr& oldErrors) { + if (context.errors()) { + if (elementToTopLevelRows) { + context.addElementErrorsToTopLevel( + rows, elementToTopLevelRows, oldErrors); + } else { + context.addErrors(rows, *context.errorsPtr(), oldErrors); + } + } + context.swapErrors(oldErrors); + } + + exec::LocalDecodedVector decoded_; + VectorPtr json_; + const SimpleVector* jsonStrings_; +}; + +void toJsonFromRow( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + FlatVector& flatResult) { + // input is guaranteed to be in flat encoding when passed in. + VELOX_CHECK_EQ(input.encoding(), VectorEncoding::Simple::ROW); + auto inputRow = input.as(); + auto childrenSize = inputRow->childrenSize(); + + auto& rowType = inputRow->type()->asRow(); + VELOX_CHECK_EQ(rowType.size(), childrenSize, "Mismatch in row type size"); + + // Estimates an upperbound of the total length of all Json strings for the + // input according to the length of all children Json strings and the + // delimiters to be added. + size_t childrenStringSize = 0; + std::vector childrenAsJson; + for (int i = 0; i < childrenSize; ++i) { + childrenAsJson.emplace_back(context, inputRow->childAt(i), rows, nullptr); + + context.applyToSelectedNoThrow(rows, [&](auto row) { + if (inputRow->isNullAt(row)) { + // "null" will be inlined in the StringView. + return; + } + childrenStringSize += childrenAsJson[i].lengthAt(row); + }); + } + + // Extra length for commas and brackets. + childrenStringSize += + rows.countSelected() * (childrenSize > 0 ? childrenSize + 1 : 2); + flatResult.getBufferWithSpace(childrenStringSize); + + // Constructs Json string of each row from Json strings of its children. + context.applyToSelectedNoThrow(rows, [&](auto row) { + if (inputRow->isNullAt(row)) { + flatResult.set(row, "null"); + return; + } + + auto proxy = exec::StringWriter(&flatResult, row); + + proxy.append("{"_sv); + for (int i = 0; i < childrenSize; ++i) { + if (i > 0) { + proxy.append(","_sv); + } + + proxy.append("\""_sv); + proxy.append(rowType.nameOf(i)); + proxy.append("\":"_sv); + + childrenAsJson[i].append(row, proxy); + } + proxy.append("}"_sv); + + proxy.finalize(); + }); +} + +void toJsonFromArray( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + FlatVector& flatResult) { + // input is guranteed to be in flat encoding when passed in. + auto inputArray = input.as(); + + auto elements = inputArray->elements(); + auto elementsRows = + functions::toElementRows(elements->size(), rows, inputArray); + if (!elementsRows.hasSelections()) { + // All arrays are null or empty. + context.applyToSelectedNoThrow(rows, [&](auto row) { + if (inputArray->isNullAt(row)) { + flatResult.set(row, "null"); + } else { + VELOX_CHECK_EQ( + inputArray->sizeAt(row), + 0, + "All arrays are expected to be null or empty"); + flatResult.set(row, "[]"); + } + }); + return; + } + + auto elementToTopLevelRows = functions::getElementToTopLevelRows( + elements->size(), rows, inputArray, context.pool()); + AsJson elementsAsJson{context, elements, elementsRows, elementToTopLevelRows}; + + // Estimates an upperbound of the total length of all Json strings for the + // input according to the length of all elements Json strings and the + // delimiters to be added. + size_t elementsStringSize = 0; + context.applyToSelectedNoThrow(rows, [&](auto row) { + if (inputArray->isNullAt(row)) { + // "null" will be inlined in the StringView. + return; + } + + auto offset = inputArray->offsetAt(row); + auto size = inputArray->sizeAt(row); + for (auto i = offset, end = offset + size; i < end; ++i) { + elementsStringSize += elementsAsJson.lengthAt(i); + } + + // Extra length for commas and brackets. + elementsStringSize += size > 0 ? size + 1 : 2; + }); + + flatResult.getBufferWithSpace(elementsStringSize); + + // Constructs the Json string of each array from Json strings of its elements. + context.applyToSelectedNoThrow(rows, [&](auto row) { + if (inputArray->isNullAt(row)) { + flatResult.set(row, "null"); + return; + } + + auto offset = inputArray->offsetAt(row); + auto size = inputArray->sizeAt(row); + + auto proxy = exec::StringWriter(&flatResult, row); + + proxy.append("["_sv); + for (int i = offset, end = offset + size; i < end; ++i) { + if (i > offset) { + proxy.append(","_sv); + } + elementsAsJson.append(i, proxy); + } + proxy.append("]"_sv); + + proxy.finalize(); + }); +} + +void toJsonFromMap( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + FlatVector& flatResult) { + // input is guaranteed to be in flat encoding when passed in. + auto inputMap = input.as(); + auto& mapType = inputMap->type()->asMap(); + + auto mapKeys = inputMap->mapKeys(); + auto mapValues = inputMap->mapValues(); + auto elementsRows = functions::toElementRows(mapKeys->size(), rows, inputMap); + if (!elementsRows.hasSelections()) { + // All maps are null or empty. + context.applyToSelectedNoThrow(rows, [&](auto row) { + if (inputMap->isNullAt(row)) { + flatResult.set(row, "null"); + } else { + VELOX_CHECK_EQ( + inputMap->sizeAt(row), + 0, + "All maps are expected to be null or empty"); + flatResult.set(row, "{}"); + } + }); + return; + } + + auto elementToTopLevelRows = functions::getElementToTopLevelRows( + mapKeys->size(), rows, inputMap, context.pool()); + + AsJson keysAsJson{context, mapKeys, elementsRows, elementToTopLevelRows}; + AsJson valuesAsJson{context, mapValues, elementsRows, elementToTopLevelRows}; + + // Estimates an upperbound of the total length of all Json strings for the + // input according to the length of all elements Json strings and the + // delimiters to be added. + size_t elementsStringSize = 0; + context.applyToSelectedNoThrow(rows, [&](auto row) { + if (inputMap->isNullAt(row)) { + // "null" will be inlined in the StringView. + return; + } + + auto offset = inputMap->offsetAt(row); + auto size = inputMap->sizeAt(row); + for (auto i = offset, end = offset + size; i < end; ++i) { + // The construction of keysAsJson ensured there is no null in keysAsJson + elementsStringSize += keysAsJson.at(i).size() + valuesAsJson.lengthAt(i); + } + + // Extra length for commas, semicolons, and curly braces. + elementsStringSize += size > 0 ? size * 2 + 1 : 2; + }); + + flatResult.getBufferWithSpace(elementsStringSize); + + // Constructs the Json string of each map from Json strings of its keys and + // values. + std::vector> sortedKeys; + context.applyToSelectedNoThrow(rows, [&](auto row) { + if (inputMap->isNullAt(row)) { + flatResult.set(row, "null"); + return; + } + + auto offset = inputMap->offsetAt(row); + auto size = inputMap->sizeAt(row); + + // Sort entries by keys in each map. + sortedKeys.clear(); + for (int i = offset, end = offset + size; i < end; ++i) { + sortedKeys.push_back(std::make_pair(keysAsJson.at(i), i)); + } + std::sort(sortedKeys.begin(), sortedKeys.end()); + + auto proxy = exec::StringWriter(&flatResult, row); + + proxy.append("{"_sv); + for (auto it = sortedKeys.begin(); it != sortedKeys.end(); ++it) { + if (it != sortedKeys.begin()) { + proxy.append(","_sv); + } + std::string keyFormat = + mapType.childAt(0)->isVarchar() ? "{}:" : "\"{}\":"; + proxy.append(fmt::format(keyFormat, it->first)); + valuesAsJson.append(it->second, proxy); + } + proxy.append("}"_sv); + + proxy.finalize(); + }); +} + +class ToJsonFunction final : public exec::VectorFunction { + public: + void apply( + const SelectivityVector& rows, + std::vector& args, // Not using const ref so we can reuse args + const TypePtr& outputType, + exec::EvalCtx& context, + VectorPtr& result) const final { + VELOX_USER_CHECK_EQ(args.size(), 1, "to_json takes one argument."); + auto kind = args[0]->typeKind(); + VELOX_USER_CHECK( + kind == TypeKind::ROW || kind == TypeKind::ARRAY || + kind == TypeKind::MAP, + "to_json only support ROW/ARRAY/MAP inputs."); + context.ensureWritable(rows, outputType, result); + result->clearNulls(rows); + auto* rawResults = result->as>(); + + VELOX_DYNAMIC_TYPE_DISPATCH_ALL( + toJson, kind, *args[0], context, rows, *rawResults); + } + + static std::vector> signatures() { + // T(ROW/ARRAY/MAP) -> varchar + return {exec::FunctionSignatureBuilder() + .typeVariable("T") + .returnType("varchar") + .argumentType("T") + .build()}; + } +}; + +} // namespace + +VELOX_DECLARE_VECTOR_FUNCTION( + udf_to_json, + ToJsonFunction::signatures(), + std::make_unique()); + +} // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/registration/RegisterJson.cpp b/velox/functions/sparksql/registration/RegisterJson.cpp index 340cb8a86eb7..9cb69a164780 100644 --- a/velox/functions/sparksql/registration/RegisterJson.cpp +++ b/velox/functions/sparksql/registration/RegisterJson.cpp @@ -28,6 +28,7 @@ void registerJsonFunctions(const std::string& prefix) { {prefix + "json_object_keys"}); registerFunction( {prefix + "json_array_length"}); + VELOX_REGISTER_VECTOR_FUNCTION(udf_to_json, prefix + "to_json"); } } // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/tests/CMakeLists.txt b/velox/functions/sparksql/tests/CMakeLists.txt index 3b3be8ea0047..a0a018ab24a1 100644 --- a/velox/functions/sparksql/tests/CMakeLists.txt +++ b/velox/functions/sparksql/tests/CMakeLists.txt @@ -58,6 +58,7 @@ add_executable( SplitTest.cpp StringTest.cpp StringToMapTest.cpp + ToJsonTest.cpp UnscaledValueFunctionTest.cpp UuidTest.cpp XxHash64Test.cpp) diff --git a/velox/functions/sparksql/tests/ToJsonTest.cpp b/velox/functions/sparksql/tests/ToJsonTest.cpp new file mode 100644 index 000000000000..1bd527919656 --- /dev/null +++ b/velox/functions/sparksql/tests/ToJsonTest.cpp @@ -0,0 +1,207 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +using namespace facebook::velox::test; + +namespace facebook::velox::functions::sparksql::test { +namespace { +constexpr float kNaNFloat = std::numeric_limits::quiet_NaN(); +constexpr float kInfFloat = std::numeric_limits::infinity(); +constexpr double kNaNDouble = std::numeric_limits::quiet_NaN(); +constexpr double kInfDouble = std::numeric_limits::infinity(); + +class ToJsonTest : public SparkFunctionBaseTest { + protected: + core::CallTypedExprPtr createToJson(const TypePtr& inputType) { + std::vector inputs = { + std::make_shared(inputType, "c0")}; + return std::make_shared( + VARCHAR(), std::move(inputs), "to_json"); + } + + void testToJson(const VectorPtr& input, const VectorPtr& expected) { + auto expr = createToJson(input->type()); + testEncodings(expr, {input}, expected); + } +}; + +TEST_F(ToJsonTest, basicStruct) { + auto input = makeRowVector({"a"}, {makeFlatVector({1, 2, 3})}); + auto expected = + makeFlatVector({R"({"a":1})", R"({"a":2})", R"({"a":3})"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, basicArray) { + auto input = makeArrayVector({{1}, {2, 3}, {}}); + auto expected = makeFlatVector({R"([1])", R"([2,3])", R"([])"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, basicMap) { + auto input = + makeMapVector({{{"a", 1}}, {{"b", 2}}, {{"c", 3}}}); + auto expected = + makeFlatVector({R"({"a":1})", R"({"b":2})", R"({"c":3})"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, basicBool) { + auto data = makeNullableFlatVector({true, false, std::nullopt}); + auto input = makeRowVector({"a"}, {data}); + auto expected = makeFlatVector( + {R"({"a":true})", R"({"a":false})", R"({"a":null})"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, basicString) { + auto data = makeNullableFlatVector( + {"str1", "str2", std::nullopt, "str\"3\"", std::nullopt}); + auto input = makeRowVector({"a"}, {data}); + auto expected = makeFlatVector( + {R"({"a":"str1"})", + R"({"a":"str2"})", + R"({"a":null})", + R"({"a":"str\"3\""})", + R"({"a":null})"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, basicTinyInt) { + auto data = + makeNullableFlatVector({0, 127, 128, -128, -129, std::nullopt}); + auto input = makeRowVector({"a"}, {data}); + auto expected = makeFlatVector( + {R"({"a":0})", + R"({"a":127})", + R"({"a":-128})", + R"({"a":-128})", + R"({"a":127})", + R"({"a":null})"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, basicSmallInt) { + auto data = makeNullableFlatVector({0, 32768, -32769, std::nullopt}); + auto input = makeRowVector({"a"}, {data}); + auto expected = makeFlatVector( + {R"({"a":0})", R"({"a":-32768})", R"({"a":32767})", R"({"a":null})"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, basicInt) { + auto data = makeNullableFlatVector( + {0, 2147483648, -2147483649, std::nullopt}); + auto input = makeRowVector({"a"}, {data}); + auto expected = makeFlatVector( + {R"({"a":0})", + R"({"a":-2147483648})", + R"({"a":2147483647})", + R"({"a":null})"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, basicFloat) { + auto data = makeNullableFlatVector( + {1.0, kNaNFloat, kInfFloat, -kInfFloat, std::nullopt}); + auto input = makeRowVector({"a"}, {data}); + auto expected = makeFlatVector( + {R"({"a":1.0})", + R"({"a":"NaN"})", + R"({"a":"Infinity"})", + R"({"a":"-Infinity"})", + R"({"a":null})"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, basicDouble) { + auto data = makeNullableFlatVector( + {1.0, kNaNDouble, kInfDouble, -kInfDouble, std::nullopt}); + auto input = makeRowVector({"a"}, {data}); + auto expected = makeFlatVector( + {R"({"a":1.0})", + R"({"a":"NaN"})", + R"({"a":"Infinity"})", + R"({"a":"-Infinity"})", + R"({"a":null})"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, basicDecimal) { + auto data = makeNullableFlatVector( + {12345, 0, -67890, std::nullopt}, DECIMAL(10, 2)); + auto input = makeRowVector({"a"}, {data}); + auto expected = makeFlatVector( + {R"({"a":123.45})", + R"({"a":0.00})", + R"({"a":-678.90})", + R"({"a":null})"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, basicTimestamp) { + auto data = makeNullableFlatVector( + {Timestamp(0, 0), + Timestamp(1582934400, 0), + Timestamp(-2208988800, 0), + std::nullopt}); + auto input = makeRowVector({"a"}, {data}); + // UTC time zone. + auto expected = makeFlatVector( + {R"({"a":"1970-01-01T00:00:00.000Z"})", + R"({"a":"2020-02-29T00:00:00.000Z"})", + R"({"a":"1900-01-01T00:00:00.000Z"})", + R"({"a":null})"}); + testToJson(input, expected); + // Los_Angeles time zone. + setTimezone("America/Los_Angeles"); + expected = makeFlatVector( + {R"({"a":"1969-12-31T16:00:00.000-08:00"})", + R"({"a":"2020-02-28T16:00:00.000-08:00"})", + R"({"a":"1899-12-31T16:00:00.000-08:00"})", + R"({"a":null})"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, basicDate) { + auto data = makeNullableFlatVector( + {0, 18321, -25567, 2932896, std::nullopt}, DateType::get()); + auto input = makeRowVector({"a"}, {data}); + auto expected = makeFlatVector( + {R"({"a":"1970-01-01"})", + R"({"a":"2020-02-29"})", + R"({"a":"1900-01-01"})", + R"({"a":"9999-12-31"})", + R"({"a":null})"}); + testToJson(input, expected); +} + +TEST_F(ToJsonTest, nestedComplexType) { + auto data1 = makeNullableFlatVector({"str1", "str2", "str3"}); + auto data2 = + makeNullableArrayVector({{1, 2, 3}, {}, {std::nullopt}}); + auto data3 = makeMapVector( + {{{"key1", 1}}, {{"key2", 2}}, {{"key3", 3}}}); + auto input = makeRowVector({"a", "b", "c"}, {data1, data2, data3}); + auto expected = makeFlatVector( + {R"({"a":"str1","b":[1,2,3],"c":{"key1":1}})", + R"({"a":"str2","b":[],"c":{"key2":2}})", + R"({"a":"str3","b":[null],"c":{"key3":3}})"}); + testToJson(input, expected); +} +} // namespace +} // namespace facebook::velox::functions::sparksql::test