From 04e3aa9ffad64cf6682b5d1677d9df66a44d8f53 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Tue, 2 Jul 2024 09:55:13 -0700 Subject: [PATCH] Remove the (unused) implementation of `host_parse_nested_json` (#16135) Follow-up for #15537 and #15813 to remove some missed code. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Bradley Dice (https://github.com/bdice) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/16135 --- cpp/src/io/json/nested_json_gpu.cu | 125 ----------------------------- 1 file changed, 125 deletions(-) diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index 031edfde4f6..a007754ef4f 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -2244,131 +2244,6 @@ std::pair, std::vector> json_column_to return {}; } -table_with_metadata host_parse_nested_json(device_span d_input, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - // Range of orchestrating/encapsulating function - CUDF_FUNC_RANGE(); - - auto const h_input = cudf::detail::make_std_vector_async(d_input, stream); - - auto const new_line_delimited_json = options.is_enabled_lines(); - - // Get internal JSON column - json_column root_column{}; - std::stack data_path{}; - - constexpr uint32_t row_offset_zero = 0; - constexpr uint32_t token_begin_offset_zero = 0; - constexpr uint32_t token_end_offset_zero = 0; - constexpr uint32_t node_init_child_count_zero = 0; - - // Whether the tokenizer stage should keep quote characters for string values - // If the tokenizer keeps the quote characters, they may be stripped during type casting - constexpr bool include_quote_chars = true; - - // We initialize the very root node and root column, which represent the JSON document being - // parsed. That root node is a list node and that root column is a list column. The column has the - // root node as its only row. The values parsed from the JSON input will be treated as follows: - // (1) For JSON lines: we expect to find a list of JSON values that all - // will be inserted into this root list column. (2) For regular JSON: we expect to have only a - // single value (list, struct, string, number, literal) that will be inserted into this root - // column. - root_column.append_row( - row_offset_zero, json_col_t::ListColumn, token_begin_offset_zero, token_end_offset_zero, 1); - - // Push the root node onto the stack for the data path - data_path.push({&root_column, row_offset_zero, nullptr, node_init_child_count_zero}); - - make_json_column( - root_column, data_path, h_input, d_input, options, include_quote_chars, stream, mr); - - // data_root refers to the root column of the data represented by the given JSON string - auto const& data_root = - new_line_delimited_json ? root_column : root_column.child_columns.begin()->second; - - // Zero row entries - if (data_root.type == json_col_t::ListColumn && data_root.child_columns.empty()) { - return table_with_metadata{std::make_unique(std::vector>{})}; - } - - // Verify that we were in fact given a list of structs (or in JSON speech: an array of objects) - auto constexpr single_child_col_count = 1; - CUDF_EXPECTS(data_root.type == json_col_t::ListColumn and - data_root.child_columns.size() == single_child_col_count and - data_root.child_columns.begin()->second.type == json_col_t::StructColumn, - "Currently the nested JSON parser only supports an array of (nested) objects"); - - // Slice off the root list column, which has only a single row that contains all the structs - auto const& root_struct_col = data_root.child_columns.begin()->second; - - // Initialize meta data to be populated while recursing through the tree of columns - std::vector> out_columns; - std::vector out_column_names; - - // Iterate over the struct's child columns and convert to cudf column - size_type column_index = 0; - for (auto const& col_name : root_struct_col.column_order) { - auto const& json_col = root_struct_col.child_columns.find(col_name)->second; - // Insert this columns name into the schema - out_column_names.emplace_back(col_name); - - std::optional child_schema_element = std::visit( - cudf::detail::visitor_overload{ - [column_index](std::vector const& user_dtypes) -> std::optional { - auto ret = (static_cast(column_index) < user_dtypes.size()) - ? std::optional{{user_dtypes[column_index]}} - : std::optional{}; -#ifdef NJP_DEBUG_PRINT - std::cout << "Column by index: #" << column_index << ", type id: " - << (ret.has_value() ? std::to_string(static_cast(ret->type.id())) : "n/a") - << ", with " << (ret.has_value() ? ret->child_types.size() : 0) << " children" - << "\n"; -#endif - return ret; - }, - [col_name]( - std::map const& user_dtypes) -> std::optional { - auto ret = (user_dtypes.find(col_name) != std::end(user_dtypes)) - ? std::optional{{user_dtypes.find(col_name)->second}} - : std::optional{}; -#ifdef NJP_DEBUG_PRINT - std::cout << "Column by flat name: '" << col_name << "', type id: " - << (ret.has_value() ? std::to_string(static_cast(ret->type.id())) : "n/a") - << ", with " << (ret.has_value() ? ret->child_types.size() : 0) << " children" - << "\n"; -#endif - return ret; - }, - [col_name](std::map const& user_dtypes) - -> std::optional { - auto ret = (user_dtypes.find(col_name) != std::end(user_dtypes)) - ? user_dtypes.find(col_name)->second - : std::optional{}; -#ifdef NJP_DEBUG_PRINT - std::cout << "Column by nested name: #" << col_name << ", type id: " - << (ret.has_value() ? std::to_string(static_cast(ret->type.id())) : "n/a") - << ", with " << (ret.has_value() ? ret->child_types.size() : 0) << " children" - << "\n"; -#endif - return ret; - }}, - options.get_dtypes()); - - // Get this JSON column's cudf column and schema info - auto [cudf_col, col_name_info] = - json_column_to_cudf_column(json_col, d_input, options, child_schema_element, stream, mr); - out_column_names.back().children = std::move(col_name_info); - out_columns.emplace_back(std::move(cudf_col)); - - column_index++; - } - - return table_with_metadata{std::make_unique
(std::move(out_columns)), {out_column_names}}; -} - } // namespace detail } // namespace cudf::io::json