From 1ec96176c95ec6cce2cc024ae4a02a99330d9236 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 11 Jun 2024 19:40:30 +0000 Subject: [PATCH 01/46] added csr data struct --- cpp/src/io/json/json_column.cu | 290 ++++++++++++++++++++++++++++++++ cpp/src/io/json/nested_json.hpp | 30 ++++ 2 files changed, 320 insertions(+) diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 631f8adbd6d..3f3c6286045 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -296,6 +297,295 @@ reduce_to_column_tree(tree_meta_t& tree, std::move(max_row_offsets)}; } +/** + * @brief Returns stable sorted keys and its sorted order + * + * Uses cub stable radix sort. The order is internally generated, hence it saves a copy and memory. + * Since the key and order is returned, using double buffer helps to avoid extra copy to user + * provided output iterator. + * + * @tparam IndexType sorted order type + * @tparam KeyType key type + * @param keys keys to sort + * @param stream CUDA stream used for device memory operations and kernel launches. + * @return Sorted keys and indices producing that sorted order + */ +template +std::pair, rmm::device_uvector> stable_sorted_key_order( + cudf::device_span keys, rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + + // Determine temporary device storage requirements + rmm::device_uvector keys_buffer1(keys.size(), stream); + rmm::device_uvector keys_buffer2(keys.size(), stream); + rmm::device_uvector order_buffer1(keys.size(), stream); + rmm::device_uvector order_buffer2(keys.size(), stream); + cub::DoubleBuffer order_buffer(order_buffer1.data(), order_buffer2.data()); + cub::DoubleBuffer keys_buffer(keys_buffer1.data(), keys_buffer2.data()); + size_t temp_storage_bytes = 0; + cub::DeviceRadixSort::SortPairs( + nullptr, temp_storage_bytes, keys_buffer, order_buffer, keys.size()); + rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); + + thrust::copy(rmm::exec_policy(stream), keys.begin(), keys.end(), keys_buffer1.begin()); + thrust::sequence(rmm::exec_policy(stream), order_buffer1.begin(), order_buffer1.end()); + + cub::DeviceRadixSort::SortPairs(d_temp_storage.data(), + temp_storage_bytes, + keys_buffer, + order_buffer, + keys.size(), + 0, + sizeof(KeyType) * 8, + stream.value()); + + return std::pair{keys_buffer.Current() == keys_buffer1.data() ? std::move(keys_buffer1) + : std::move(keys_buffer2), + order_buffer.Current() == order_buffer1.data() ? std::move(order_buffer1) + : std::move(order_buffer2)}; +} + +/** + * @brief Reduces node tree representation to column tree CSR representation. + * + * @param tree Node tree representation of JSON string + * @param original_col_ids Column ids of nodes + * @param sorted_col_ids Sorted column ids of nodes + * @param ordered_node_ids Node ids of nodes sorted by column ids + * @param row_offsets Row offsets of nodes + * @param is_array_of_arrays Whether the tree is an array of arrays + * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true + * @param stream CUDA stream used for device memory operations and kernel launches + * @return A tuple of column tree representation of JSON string, column ids of columns, and + * max row offsets of columns + */ +std::tuple> +reduce_to_column_tree_csr(tree_meta_t& tree, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, + bool is_array_of_arrays, + NodeIndexT const row_array_parent_col_id, + rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + // 1. column count for allocation + auto const num_columns = + thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end()); + + rmm::device_uvector unique_node_ids(num_columns, stream); + rmm::device_uvector csr_unique_node_ids(num_columns, stream); + rmm::device_uvector column_levels(num_columns, stream); + thrust::unique_by_key_copy(rmm::exec_policy(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + ordered_node_ids.begin(), + thrust::make_discard_iterator(), + unique_node_ids.begin()); + thrust::copy_n(rmm::exec_policy(stream), thrust::make_permutation_iterator(tree.node_levels.begin(), unique_node_ids.begin()), unique_node_ids.size(), column_levels.begin()); + auto [sorted_column_levels, sorted_column_levels_order] = stable_sorted_key_order(column_levels, stream); + + // 2. reduce_by_key {col_id}, {row_offset}, max. + rmm::device_uvector unique_col_ids(num_columns, stream); + rmm::device_uvector max_row_offsets(num_columns, stream); + rmm::device_uvector csr_unique_col_ids(num_columns, stream); + rmm::device_uvector csr_max_row_offsets(num_columns, stream); + auto ordered_row_offsets = + thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()); + thrust::reduce_by_key(rmm::exec_policy(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + ordered_row_offsets, + unique_col_ids.begin(), + max_row_offsets.begin(), + thrust::equal_to(), + thrust::maximum()); + + // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) + rmm::device_uvector column_categories(num_columns, stream); + rmm::device_uvector csr_column_categories(num_columns, stream); + thrust::reduce_by_key( + rmm::exec_policy(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin()), + unique_col_ids.begin(), + column_categories.begin(), + thrust::equal_to(), + [] __device__(NodeT type_a, NodeT type_b) -> NodeT { + auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); + auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); + // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) + // *+*=*, v+v=v + if (type_a == type_b) { + return type_a; + } else if (is_a_leaf) { + // *+v=*, N+V=N + // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR + return type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b); + } else if (is_b_leaf) { + return type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a); + } + // *+#=E + return NC_ERR; + }); + + auto csr_permutation_it = thrust::make_zip_iterator(thrust::make_permutation_iterator(unique_node_ids.begin(), sorted_column_levels_order.begin()), thrust::make_permutation_iterator(unique_col_ids.begin(), sorted_column_levels_order.begin()), thrust::make_permutation_iterator(max_row_offsets.begin(), sorted_column_levels_order.begin()), thrust::make_permutation_iterator(column_categories.begin(), sorted_column_levels_order.begin())); + thrust::copy(rmm::exec_policy(stream), csr_permutation_it, csr_permutation_it + num_columns, thrust::make_zip_iterator(csr_unique_node_ids.begin(), csr_unique_col_ids.begin(), csr_max_row_offsets.begin(), csr_column_categories.begin())); + + // 4. unique_copy parent_node_ids, ranges + rmm::device_uvector csr_parent_col_ids(num_columns, stream); + rmm::device_uvector csr_col_range_begin(num_columns, stream); // Field names + rmm::device_uvector csr_col_range_end(num_columns, stream); + thrust::copy_n( + rmm::exec_policy(stream), + thrust::make_zip_iterator( + thrust::make_permutation_iterator(tree.parent_node_ids.begin(), csr_unique_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_range_begin.begin(), csr_unique_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_range_end.begin(), csr_unique_node_ids.begin())), + csr_unique_node_ids.size(), + thrust::make_zip_iterator( + csr_parent_col_ids.begin(), csr_col_range_begin.begin(), csr_col_range_end.begin())); + + // convert parent_node_ids to parent_col_ids + thrust::transform( + rmm::exec_policy(stream), + csr_parent_col_ids.begin(), + csr_parent_col_ids.end(), + csr_parent_col_ids.begin(), + [col_ids = original_col_ids.begin()] __device__(auto parent_node_id) -> size_type { + return parent_node_id == parent_node_sentinel ? parent_node_sentinel + : col_ids[parent_node_id]; + }); + + /* + CSR construction: + 1. Sort column levels and get their ordering + 2. For each column node coln iterated according to sorted_column_levels; do + a. Find nodes that have coln as the parent node -> set adj_coln + b. row idx[coln] = size of adj_coln + 1 + c. col idx[coln] = adj_coln U {parent_col_id[coln]} + */ + + rmm::device_uvector rowidx(num_columns + 1, stream); + thrust::fill(rmm::exec_policy(stream), rowidx.begin(), rowidx.end(), 0); + auto [sorted_csr_parent_col_ids, sorted_csr_parent_col_ids_order] = stable_sorted_key_order(csr_parent_col_ids, stream); + rmm::device_uvector non_leaf_nodes(num_columns, stream); + rmm::device_uvector non_leaf_adjacency(num_columns, stream); + thrust::reduce_by_key(rmm::exec_policy(stream), sorted_csr_parent_col_ids.begin(), sorted_csr_parent_col_ids.end(), thrust::make_constant_iterator(1), non_leaf_nodes.begin(), non_leaf_adjacency.begin(), thrust::equal_to()); + // Add the non_leaf_adjacency to rowidx at positions non_leaf_nodes + thrust::transform(rmm::exec_policy(stream), non_leaf_nodes.begin(), non_leaf_nodes.end(), thrust::make_constant_iterator(1), non_leaf_nodes.begin(), thrust::plus()); + thrust::scatter(rmm::exec_policy(stream), non_leaf_adjacency.begin(), non_leaf_adjacency.end(), non_leaf_nodes.begin(), rowidx.begin() + 1); + // We are discarding the parent of the root node. + thrust::transform(rmm::exec_policy(stream), rowidx.begin() + 2, rowidx.end(), thrust::make_constant_iterator(1), rowidx.begin() + 1, thrust::plus()); + thrust::inclusive_scan(rmm::exec_policy(stream), rowidx.begin() + 1, rowidx.end(), rowidx.begin() + 1); + + rmm::device_uvector colidx((num_columns - 1) * 2, stream); + thrust::scatter(rmm::exec_policy(stream), csr_parent_col_ids.begin(), csr_parent_col_ids.end(), rowidx.begin() + 1, colidx.begin()); + rmm::device_uvector map((num_columns - 1) * 2, stream); + thrust::sequence(rmm::exec_policy(stream), map.begin(), map.end()); + rmm::device_uvector stencil((num_columns - 1) * 2, stream); + thrust::fill(rmm::exec_policy(stream), stencil.begin(), stencil.end(), 1); + thrust::scatter(rmm::exec_policy(stream), thrust::make_constant_iterator(0), thrust::make_constant_iterator(0) + num_columns, rowidx.begin() + 1, stencil.begin()); + thrust::scatter_if(rmm::exec_policy(stream), sorted_csr_parent_col_ids.begin() + 1, sorted_csr_parent_col_ids.end(), map.begin(), stencil.begin(), colidx.begin()); + + // condition is true if parent is not a list, or sentinel/root + // Special case to return true if parent is a list and is_array_of_arrays is true + auto is_non_list_parent = [column_categories = column_categories.begin(), + is_array_of_arrays, + row_array_parent_col_id] __device__(auto parent_col_id) -> bool { + return !(parent_col_id == parent_node_sentinel || + column_categories[parent_col_id] == NC_LIST && + (!is_array_of_arrays || parent_col_id != row_array_parent_col_id)); + }; + // Mixed types in List children go to different columns, + // so all immediate children of list column should have same max_row_offsets. + // create list's children max_row_offsets array. (initialize to zero) + // atomicMax on children max_row_offsets array. + // gather the max_row_offsets from children row offset array. + { + rmm::device_uvector list_parents_children_max_row_offsets(num_columns, stream); + thrust::fill(rmm::exec_policy(stream), + list_parents_children_max_row_offsets.begin(), + list_parents_children_max_row_offsets.end(), + 0); + thrust::for_each(rmm::exec_policy(stream), + csr_unique_col_ids.begin(), + csr_unique_col_ids.end(), + [csr_column_categories = csr_column_categories.begin(), + csr_parent_col_ids = csr_parent_col_ids.begin(), + csr_max_row_offsets = csr_max_row_offsets.begin(), + list_parents_children_max_row_offsets = + list_parents_children_max_row_offsets.begin()] __device__(auto col_id) { + auto csr_parent_col_id = csr_parent_col_ids[col_id]; + if (csr_parent_col_id != parent_node_sentinel and + csr_column_categories[csr_parent_col_id] == node_t::NC_LIST) { + cuda::atomic_ref ref{ + *(list_parents_children_max_row_offsets + csr_parent_col_id)}; + ref.fetch_max(csr_max_row_offsets[col_id], cuda::std::memory_order_relaxed); + } + }); + thrust::gather_if( + rmm::exec_policy(stream), + csr_parent_col_ids.begin(), + csr_parent_col_ids.end(), + csr_parent_col_ids.begin(), + list_parents_children_max_row_offsets.begin(), + csr_max_row_offsets.begin(), + [csr_column_categories = csr_column_categories.begin()] __device__(size_type parent_col_id) { + return parent_col_id != parent_node_sentinel and + csr_column_categories[parent_col_id] == node_t::NC_LIST; + }); + } + + // copy lists' max_row_offsets to children. + // all structs should have same size. + thrust::transform_if( + rmm::exec_policy(stream), + csr_unique_col_ids.begin(), + csr_unique_col_ids.end(), + csr_max_row_offsets.begin(), + [csr_column_categories = csr_column_categories.begin(), + is_non_list_parent, + csr_parent_col_ids = csr_parent_col_ids.begin(), + csr_max_row_offsets = csr_max_row_offsets.begin()] __device__(size_type col_id) { + auto parent_col_id = csr_parent_col_ids[col_id]; + // condition is true if parent is not a list, or sentinel/root + while (is_non_list_parent(parent_col_id)) { + col_id = parent_col_id; + parent_col_id = csr_parent_col_ids[parent_col_id]; + } + return csr_max_row_offsets[col_id]; + }, + [csr_column_categories = csr_column_categories.begin(), + is_non_list_parent, + parent_col_ids = csr_parent_col_ids.begin()] __device__(size_type col_id) { + auto parent_col_id = parent_col_ids[col_id]; + // condition is true if parent is not a list, or sentinel/root + return is_non_list_parent(parent_col_id); + }); + + // For Struct and List (to avoid copying entire strings when mixed type as string is enabled) + thrust::transform_if( + rmm::exec_policy(stream), + csr_col_range_begin.begin(), + csr_col_range_begin.end(), + csr_column_categories.begin(), + csr_col_range_end.begin(), + [] __device__(auto i) { return i + 1; }, + [] __device__(NodeT type) { return type == NC_STRUCT || type == NC_LIST; }); + + return std::tuple{column_tree_csr{std::move(rowidx), + std::move(colidx), + std::move(csr_unique_col_ids), + std::move(csr_column_categories), + std::move(csr_col_range_begin), + std::move(csr_col_range_end)}, + std::move(csr_max_row_offsets)}; +} + /** * @brief Get the column indices for the values column for array of arrays rows * diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index e12892a2d50..c557cbd1063 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -44,6 +44,20 @@ struct tree_meta_t { rmm::device_uvector node_range_end; }; +struct column_tree_csr { + //position of nnzs + rmm::device_uvector rowidx; + rmm::device_uvector colidx; + //node properties + rmm::device_uvector column_ids; + rmm::device_uvector categories; + rmm::device_uvector range_begin; + rmm::device_uvector range_end; + std::vector ignore_vals; + std::vector is_mixed_type_column; + std::vector is_pruned; +}; + /** * @brief A column type */ @@ -294,6 +308,22 @@ reduce_to_column_tree(tree_meta_t& tree, device_span row_offsets, rmm::cuda_stream_view stream); +/** + * @brief Reduce node tree into column tree by aggregating each property of column. + * + * @param tree json node tree to reduce (modified in-place, but restored to original state) + * @param col_ids column ids of each node (modified in-place, but restored to original state) + * @param row_offsets row offsets of each node (modified in-place, but restored to original state) + * @param stream The CUDA stream to which kernels are dispatched + * @return A tuple containing the column tree, identifier for each column and the maximum row index + * in each column + */ +std::tuple> +reduce_to_column_tree_csr(tree_meta_t& tree, + device_span col_ids, + device_span row_offsets, + rmm::cuda_stream_view stream); + /** * @brief Retrieves the parse_options to be used for type inference and type casting * From 022d7ce15302cf016e8e598aa843e4131895e07f Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 11 Jun 2024 19:47:05 +0000 Subject: [PATCH 02/46] formatting --- cpp/src/io/json/json_column.cu | 108 +++++++++++++++++++++++--------- cpp/src/io/json/nested_json.hpp | 14 ++--- 2 files changed, 87 insertions(+), 35 deletions(-) diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 3f3c6286045..6e9c590e501 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -360,15 +360,15 @@ std::pair, rmm::device_uvector> stable_s * @return A tuple of column tree representation of JSON string, column ids of columns, and * max row offsets of columns */ -std::tuple> -reduce_to_column_tree_csr(tree_meta_t& tree, - device_span original_col_ids, - device_span sorted_col_ids, - device_span ordered_node_ids, - device_span row_offsets, - bool is_array_of_arrays, - NodeIndexT const row_array_parent_col_id, - rmm::cuda_stream_view stream) +std::tuple> reduce_to_column_tree_csr( + tree_meta_t& tree, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, + bool is_array_of_arrays, + NodeIndexT const row_array_parent_col_id, + rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); // 1. column count for allocation @@ -384,8 +384,13 @@ reduce_to_column_tree_csr(tree_meta_t& tree, ordered_node_ids.begin(), thrust::make_discard_iterator(), unique_node_ids.begin()); - thrust::copy_n(rmm::exec_policy(stream), thrust::make_permutation_iterator(tree.node_levels.begin(), unique_node_ids.begin()), unique_node_ids.size(), column_levels.begin()); - auto [sorted_column_levels, sorted_column_levels_order] = stable_sorted_key_order(column_levels, stream); + thrust::copy_n( + rmm::exec_policy(stream), + thrust::make_permutation_iterator(tree.node_levels.begin(), unique_node_ids.begin()), + unique_node_ids.size(), + column_levels.begin()); + auto [sorted_column_levels, sorted_column_levels_order] = + stable_sorted_key_order(column_levels, stream); // 2. reduce_by_key {col_id}, {row_offset}, max. rmm::device_uvector unique_col_ids(num_columns, stream); @@ -432,8 +437,19 @@ reduce_to_column_tree_csr(tree_meta_t& tree, return NC_ERR; }); - auto csr_permutation_it = thrust::make_zip_iterator(thrust::make_permutation_iterator(unique_node_ids.begin(), sorted_column_levels_order.begin()), thrust::make_permutation_iterator(unique_col_ids.begin(), sorted_column_levels_order.begin()), thrust::make_permutation_iterator(max_row_offsets.begin(), sorted_column_levels_order.begin()), thrust::make_permutation_iterator(column_categories.begin(), sorted_column_levels_order.begin())); - thrust::copy(rmm::exec_policy(stream), csr_permutation_it, csr_permutation_it + num_columns, thrust::make_zip_iterator(csr_unique_node_ids.begin(), csr_unique_col_ids.begin(), csr_max_row_offsets.begin(), csr_column_categories.begin())); + auto csr_permutation_it = thrust::make_zip_iterator( + thrust::make_permutation_iterator(unique_node_ids.begin(), sorted_column_levels_order.begin()), + thrust::make_permutation_iterator(unique_col_ids.begin(), sorted_column_levels_order.begin()), + thrust::make_permutation_iterator(max_row_offsets.begin(), sorted_column_levels_order.begin()), + thrust::make_permutation_iterator(column_categories.begin(), + sorted_column_levels_order.begin())); + thrust::copy(rmm::exec_policy(stream), + csr_permutation_it, + csr_permutation_it + num_columns, + thrust::make_zip_iterator(csr_unique_node_ids.begin(), + csr_unique_col_ids.begin(), + csr_max_row_offsets.begin(), + csr_column_categories.begin())); // 4. unique_copy parent_node_ids, ranges rmm::device_uvector csr_parent_col_ids(num_columns, stream); @@ -471,25 +487,60 @@ reduce_to_column_tree_csr(tree_meta_t& tree, rmm::device_uvector rowidx(num_columns + 1, stream); thrust::fill(rmm::exec_policy(stream), rowidx.begin(), rowidx.end(), 0); - auto [sorted_csr_parent_col_ids, sorted_csr_parent_col_ids_order] = stable_sorted_key_order(csr_parent_col_ids, stream); + auto [sorted_csr_parent_col_ids, sorted_csr_parent_col_ids_order] = + stable_sorted_key_order(csr_parent_col_ids, stream); rmm::device_uvector non_leaf_nodes(num_columns, stream); rmm::device_uvector non_leaf_adjacency(num_columns, stream); - thrust::reduce_by_key(rmm::exec_policy(stream), sorted_csr_parent_col_ids.begin(), sorted_csr_parent_col_ids.end(), thrust::make_constant_iterator(1), non_leaf_nodes.begin(), non_leaf_adjacency.begin(), thrust::equal_to()); + thrust::reduce_by_key(rmm::exec_policy(stream), + sorted_csr_parent_col_ids.begin(), + sorted_csr_parent_col_ids.end(), + thrust::make_constant_iterator(1), + non_leaf_nodes.begin(), + non_leaf_adjacency.begin(), + thrust::equal_to()); // Add the non_leaf_adjacency to rowidx at positions non_leaf_nodes - thrust::transform(rmm::exec_policy(stream), non_leaf_nodes.begin(), non_leaf_nodes.end(), thrust::make_constant_iterator(1), non_leaf_nodes.begin(), thrust::plus()); - thrust::scatter(rmm::exec_policy(stream), non_leaf_adjacency.begin(), non_leaf_adjacency.end(), non_leaf_nodes.begin(), rowidx.begin() + 1); + thrust::transform(rmm::exec_policy(stream), + non_leaf_nodes.begin(), + non_leaf_nodes.end(), + thrust::make_constant_iterator(1), + non_leaf_nodes.begin(), + thrust::plus()); + thrust::scatter(rmm::exec_policy(stream), + non_leaf_adjacency.begin(), + non_leaf_adjacency.end(), + non_leaf_nodes.begin(), + rowidx.begin() + 1); // We are discarding the parent of the root node. - thrust::transform(rmm::exec_policy(stream), rowidx.begin() + 2, rowidx.end(), thrust::make_constant_iterator(1), rowidx.begin() + 1, thrust::plus()); - thrust::inclusive_scan(rmm::exec_policy(stream), rowidx.begin() + 1, rowidx.end(), rowidx.begin() + 1); + thrust::transform(rmm::exec_policy(stream), + rowidx.begin() + 2, + rowidx.end(), + thrust::make_constant_iterator(1), + rowidx.begin() + 1, + thrust::plus()); + thrust::inclusive_scan( + rmm::exec_policy(stream), rowidx.begin() + 1, rowidx.end(), rowidx.begin() + 1); rmm::device_uvector colidx((num_columns - 1) * 2, stream); - thrust::scatter(rmm::exec_policy(stream), csr_parent_col_ids.begin(), csr_parent_col_ids.end(), rowidx.begin() + 1, colidx.begin()); + thrust::scatter(rmm::exec_policy(stream), + csr_parent_col_ids.begin(), + csr_parent_col_ids.end(), + rowidx.begin() + 1, + colidx.begin()); rmm::device_uvector map((num_columns - 1) * 2, stream); thrust::sequence(rmm::exec_policy(stream), map.begin(), map.end()); rmm::device_uvector stencil((num_columns - 1) * 2, stream); thrust::fill(rmm::exec_policy(stream), stencil.begin(), stencil.end(), 1); - thrust::scatter(rmm::exec_policy(stream), thrust::make_constant_iterator(0), thrust::make_constant_iterator(0) + num_columns, rowidx.begin() + 1, stencil.begin()); - thrust::scatter_if(rmm::exec_policy(stream), sorted_csr_parent_col_ids.begin() + 1, sorted_csr_parent_col_ids.end(), map.begin(), stencil.begin(), colidx.begin()); + thrust::scatter(rmm::exec_policy(stream), + thrust::make_constant_iterator(0), + thrust::make_constant_iterator(0) + num_columns, + rowidx.begin() + 1, + stencil.begin()); + thrust::scatter_if(rmm::exec_policy(stream), + sorted_csr_parent_col_ids.begin() + 1, + sorted_csr_parent_col_ids.end(), + map.begin(), + stencil.begin(), + colidx.begin()); // condition is true if parent is not a list, or sentinel/root // Special case to return true if parent is a list and is_array_of_arrays is true @@ -524,7 +575,8 @@ reduce_to_column_tree_csr(tree_meta_t& tree, csr_column_categories[csr_parent_col_id] == node_t::NC_LIST) { cuda::atomic_ref ref{ *(list_parents_children_max_row_offsets + csr_parent_col_id)}; - ref.fetch_max(csr_max_row_offsets[col_id], cuda::std::memory_order_relaxed); + ref.fetch_max(csr_max_row_offsets[col_id], + cuda::std::memory_order_relaxed); } }); thrust::gather_if( @@ -578,11 +630,11 @@ reduce_to_column_tree_csr(tree_meta_t& tree, [] __device__(NodeT type) { return type == NC_STRUCT || type == NC_LIST; }); return std::tuple{column_tree_csr{std::move(rowidx), - std::move(colidx), - std::move(csr_unique_col_ids), - std::move(csr_column_categories), - std::move(csr_col_range_begin), - std::move(csr_col_range_end)}, + std::move(colidx), + std::move(csr_unique_col_ids), + std::move(csr_column_categories), + std::move(csr_col_range_begin), + std::move(csr_col_range_end)}, std::move(csr_max_row_offsets)}; } diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index c557cbd1063..2997858839a 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -45,10 +45,10 @@ struct tree_meta_t { }; struct column_tree_csr { - //position of nnzs + // position of nnzs rmm::device_uvector rowidx; rmm::device_uvector colidx; - //node properties + // node properties rmm::device_uvector column_ids; rmm::device_uvector categories; rmm::device_uvector range_begin; @@ -318,11 +318,11 @@ reduce_to_column_tree(tree_meta_t& tree, * @return A tuple containing the column tree, identifier for each column and the maximum row index * in each column */ -std::tuple> -reduce_to_column_tree_csr(tree_meta_t& tree, - device_span col_ids, - device_span row_offsets, - rmm::cuda_stream_view stream); +std::tuple> reduce_to_column_tree_csr( + tree_meta_t& tree, + device_span col_ids, + device_span row_offsets, + rmm::cuda_stream_view stream); /** * @brief Retrieves the parse_options to be used for type inference and type casting From 382633f816ba1aa34d7bb0be6a1e3881a617fe3a Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 25 Jun 2024 00:55:25 +0000 Subject: [PATCH 03/46] added test --- cpp/src/io/json/json_column.cu | 50 +-------- cpp/src/io/json/json_tree.cu | 51 +--------- cpp/src/io/json/json_utils.hpp | 81 +++++++++++++++ cpp/src/io/json/nested_json.hpp | 19 +++- cpp/tests/CMakeLists.txt | 1 + cpp/tests/io/json_tree_csr.cu | 174 ++++++++++++++++++++++++++++++++ 6 files changed, 276 insertions(+), 100 deletions(-) create mode 100644 cpp/src/io/json/json_utils.hpp create mode 100644 cpp/tests/io/json_tree_csr.cu diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 6e9c590e501..854c3796245 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -17,6 +17,7 @@ #include "io/utilities/parsing_utils.cuh" #include "io/utilities/string_parsing.hpp" #include "nested_json.hpp" +#include "json_utils.hpp" #include #include @@ -297,55 +298,6 @@ reduce_to_column_tree(tree_meta_t& tree, std::move(max_row_offsets)}; } -/** - * @brief Returns stable sorted keys and its sorted order - * - * Uses cub stable radix sort. The order is internally generated, hence it saves a copy and memory. - * Since the key and order is returned, using double buffer helps to avoid extra copy to user - * provided output iterator. - * - * @tparam IndexType sorted order type - * @tparam KeyType key type - * @param keys keys to sort - * @param stream CUDA stream used for device memory operations and kernel launches. - * @return Sorted keys and indices producing that sorted order - */ -template -std::pair, rmm::device_uvector> stable_sorted_key_order( - cudf::device_span keys, rmm::cuda_stream_view stream) -{ - CUDF_FUNC_RANGE(); - - // Determine temporary device storage requirements - rmm::device_uvector keys_buffer1(keys.size(), stream); - rmm::device_uvector keys_buffer2(keys.size(), stream); - rmm::device_uvector order_buffer1(keys.size(), stream); - rmm::device_uvector order_buffer2(keys.size(), stream); - cub::DoubleBuffer order_buffer(order_buffer1.data(), order_buffer2.data()); - cub::DoubleBuffer keys_buffer(keys_buffer1.data(), keys_buffer2.data()); - size_t temp_storage_bytes = 0; - cub::DeviceRadixSort::SortPairs( - nullptr, temp_storage_bytes, keys_buffer, order_buffer, keys.size()); - rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); - - thrust::copy(rmm::exec_policy(stream), keys.begin(), keys.end(), keys_buffer1.begin()); - thrust::sequence(rmm::exec_policy(stream), order_buffer1.begin(), order_buffer1.end()); - - cub::DeviceRadixSort::SortPairs(d_temp_storage.data(), - temp_storage_bytes, - keys_buffer, - order_buffer, - keys.size(), - 0, - sizeof(KeyType) * 8, - stream.value()); - - return std::pair{keys_buffer.Current() == keys_buffer1.data() ? std::move(keys_buffer1) - : std::move(keys_buffer2), - order_buffer.Current() == order_buffer1.data() ? std::move(order_buffer1) - : std::move(order_buffer2)}; -} - /** * @brief Reduces node tree representation to column tree CSR representation. * diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu index ad807b57766..7d9d926bdd2 100644 --- a/cpp/src/io/json/json_tree.cu +++ b/cpp/src/io/json/json_tree.cu @@ -16,6 +16,7 @@ #include "io/utilities/hostdevice_vector.hpp" #include "nested_json.hpp" +#include "json_utils.hpp" #include #include @@ -33,7 +34,6 @@ #include #include -#include #include #include #include @@ -139,55 +139,6 @@ struct is_nested_end { } }; -/** - * @brief Returns stable sorted keys and its sorted order - * - * Uses cub stable radix sort. The order is internally generated, hence it saves a copy and memory. - * Since the key and order is returned, using double buffer helps to avoid extra copy to user - * provided output iterator. - * - * @tparam IndexType sorted order type - * @tparam KeyType key type - * @param keys keys to sort - * @param stream CUDA stream used for device memory operations and kernel launches. - * @return Sorted keys and indices producing that sorted order - */ -template -std::pair, rmm::device_uvector> stable_sorted_key_order( - cudf::device_span keys, rmm::cuda_stream_view stream) -{ - CUDF_FUNC_RANGE(); - - // Determine temporary device storage requirements - rmm::device_uvector keys_buffer1(keys.size(), stream); - rmm::device_uvector keys_buffer2(keys.size(), stream); - rmm::device_uvector order_buffer1(keys.size(), stream); - rmm::device_uvector order_buffer2(keys.size(), stream); - cub::DoubleBuffer order_buffer(order_buffer1.data(), order_buffer2.data()); - cub::DoubleBuffer keys_buffer(keys_buffer1.data(), keys_buffer2.data()); - size_t temp_storage_bytes = 0; - cub::DeviceRadixSort::SortPairs( - nullptr, temp_storage_bytes, keys_buffer, order_buffer, keys.size()); - rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); - - thrust::copy(rmm::exec_policy(stream), keys.begin(), keys.end(), keys_buffer1.begin()); - thrust::sequence(rmm::exec_policy(stream), order_buffer1.begin(), order_buffer1.end()); - - cub::DeviceRadixSort::SortPairs(d_temp_storage.data(), - temp_storage_bytes, - keys_buffer, - order_buffer, - keys.size(), - 0, - sizeof(KeyType) * 8, - stream.value()); - - return std::pair{keys_buffer.Current() == keys_buffer1.data() ? std::move(keys_buffer1) - : std::move(keys_buffer2), - order_buffer.Current() == order_buffer1.data() ? std::move(order_buffer1) - : std::move(order_buffer2)}; -} - /** * @brief Propagate parent node from first sibling to other siblings. * diff --git a/cpp/src/io/json/json_utils.hpp b/cpp/src/io/json/json_utils.hpp new file mode 100644 index 00000000000..80d8f2f9b0f --- /dev/null +++ b/cpp/src/io/json/json_utils.hpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace cudf::io::json::detail { +/** + * @brief Returns stable sorted keys and its sorted order + * + * Uses cub stable radix sort. The order is internally generated, hence it saves a copy and memory. + * Since the key and order is returned, using double buffer helps to avoid extra copy to user + * provided output iterator. + * + * @tparam IndexType sorted order type + * @tparam KeyType key type + * @param keys keys to sort + * @param stream CUDA stream used for device memory operations and kernel launches. + * @return Sorted keys and indices producing that sorted order + */ +template +std::pair, rmm::device_uvector> stable_sorted_key_order( + cudf::device_span keys, rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + + // Determine temporary device storage requirements + rmm::device_uvector keys_buffer1(keys.size(), stream); + rmm::device_uvector keys_buffer2(keys.size(), stream); + rmm::device_uvector order_buffer1(keys.size(), stream); + rmm::device_uvector order_buffer2(keys.size(), stream); + cub::DoubleBuffer order_buffer(order_buffer1.data(), order_buffer2.data()); + cub::DoubleBuffer keys_buffer(keys_buffer1.data(), keys_buffer2.data()); + size_t temp_storage_bytes = 0; + cub::DeviceRadixSort::SortPairs( + nullptr, temp_storage_bytes, keys_buffer, order_buffer, keys.size()); + rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); + + thrust::copy(rmm::exec_policy(stream), keys.begin(), keys.end(), keys_buffer1.begin()); + thrust::sequence(rmm::exec_policy(stream), order_buffer1.begin(), order_buffer1.end()); + + cub::DeviceRadixSort::SortPairs(d_temp_storage.data(), + temp_storage_bytes, + keys_buffer, + order_buffer, + keys.size(), + 0, + sizeof(KeyType) * 8, + stream.value()); + + return std::pair{keys_buffer.Current() == keys_buffer1.data() ? std::move(keys_buffer1) + : std::move(keys_buffer2), + order_buffer.Current() == order_buffer1.data() ? std::move(order_buffer1) + : std::move(order_buffer2)}; +} + +} diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 2997858839a..8374c34db23 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -23,6 +23,8 @@ #include #include +#include +#include #include #include @@ -308,6 +310,16 @@ reduce_to_column_tree(tree_meta_t& tree, device_span row_offsets, rmm::cuda_stream_view stream); +std::tuple, rmm::device_uvector> +reduce_to_column_tree(tree_meta_t& tree, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, + bool is_array_of_arrays, + NodeIndexT const row_array_parent_col_id, + rmm::cuda_stream_view stream); + /** * @brief Reduce node tree into column tree by aggregating each property of column. * @@ -318,10 +330,15 @@ reduce_to_column_tree(tree_meta_t& tree, * @return A tuple containing the column tree, identifier for each column and the maximum row index * in each column */ + std::tuple> reduce_to_column_tree_csr( tree_meta_t& tree, - device_span col_ids, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, device_span row_offsets, + bool is_array_of_arrays, + NodeIndexT const row_array_parent_col_id, rmm::cuda_stream_view stream); /** diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index c6ab8aa021a..4585b9deacc 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -316,6 +316,7 @@ ConfigureTest( ConfigureTest(JSON_WRITER_TEST io/json_writer.cpp) ConfigureTest(JSON_TYPE_CAST_TEST io/json_type_cast_test.cu) ConfigureTest(NESTED_JSON_TEST io/nested_json_test.cpp io/json_tree.cpp) +ConfigureTest(JSON_TREE_CSR io/json_tree_csr.cu) ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp) ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp) ConfigureTest(JSON_QUOTE_NORMALIZATION io/json_quote_normalization_test.cpp) diff --git a/cpp/tests/io/json_tree_csr.cu b/cpp/tests/io/json_tree_csr.cu new file mode 100644 index 00000000000..6a52370080c --- /dev/null +++ b/cpp/tests/io/json_tree_csr.cu @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "io/json/nested_json.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +namespace cuio_json = cudf::io::json; + +struct h_tree_meta_t { + std::vector node_categories; + std::vector parent_node_ids; + std::vector node_range_begin; + std::vector node_range_end; +}; + +struct h_column_tree_csr { + // position of nnzs + std::vector rowidx; + std::vector colidx; + // node properties + std::vector column_ids; + std::vector categories; + std::vector range_begin; + std::vector range_end; +}; + +bool check_equality(cuio_json::tree_meta_t &d_a, cuio_json::column_tree_csr &d_b, rmm::cuda_stream_view stream) { + // convert from tree_meta_t to column_tree_csr + h_tree_meta_t a{ + cudf::detail::make_std_vector_async(d_a.node_categories, stream), + cudf::detail::make_std_vector_async(d_a.parent_node_ids, stream), + cudf::detail::make_std_vector_async(d_a.node_range_begin, stream), + cudf::detail::make_std_vector_async(d_a.node_range_end, stream) + }; + + h_column_tree_csr b{ + cudf::detail::make_std_vector_async(d_b.rowidx, stream), + cudf::detail::make_std_vector_async(d_b.colidx, stream), + cudf::detail::make_std_vector_async(d_b.column_ids, stream), + cudf::detail::make_std_vector_async(d_b.categories, stream), + cudf::detail::make_std_vector_async(d_b.range_begin, stream), + cudf::detail::make_std_vector_async(d_b.range_end, stream) + }; + + stream.synchronize(); + + auto num_nodes = a.parent_node_ids.size(); + if(b.rowidx.size() != num_nodes + 1) return false; + + for(auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { + auto v = b.colidx[pos]; + if(a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) return false; + } + for(size_t u = 1; u < num_nodes; u++) { + auto v = b.colidx[b.rowidx[u]]; + if(a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) return false; + for(auto pos = b.rowidx[u] + 1; pos < b.rowidx[u+1]; pos++) { + v = b.colidx[pos]; + if(a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) return false; + } + } + return true; +} + +struct JsonColumnTreeTests : public cudf::test::BaseFixture {}; + +TEST_F(JsonColumnTreeTests, SimpleLines) +{ + auto const stream = cudf::get_default_stream(); + std::string const input = + R"( {} + { "a": { "y" : 6, "z": [] }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )"; // Prepare input & output buffers + cudf::string_scalar d_scalar(input, true, stream); + auto d_input = cudf::device_span{d_scalar.data(), + static_cast(d_scalar.size())}; + + cudf::io::json_reader_options options{}; + options.enable_lines(true); + + // Parse the JSON and get the token stream + auto const [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( + d_input, options, stream, rmm::mr::get_current_device_resource()); + + // Get the JSON's tree representation + auto gpu_tree = cuio_json::detail::get_tree_representation( + tokens_gpu, token_indices_gpu, false, stream, rmm::mr::get_current_device_resource()); + + auto tup = + cuio_json::detail::records_orient_tree_traversal(d_input, + gpu_tree, + false, + options.is_enabled_lines(), + stream, + rmm::mr::get_current_device_resource()); + auto &gpu_col_id = std::get<0>(tup); + auto &gpu_row_offsets = std::get<1>(tup); + + auto const num_nodes = gpu_col_id.size(); + rmm::device_uvector sorted_col_ids(gpu_col_id.size(), stream); // make a copy + thrust::copy(rmm::exec_policy(stream), gpu_col_id.begin(), gpu_col_id.end(), sorted_col_ids.begin()); + + // sort by {col_id} on {node_ids} stable + rmm::device_uvector node_ids(gpu_col_id.size(), stream); + thrust::sequence(rmm::exec_policy(stream), node_ids.begin(), node_ids.end()); + thrust::stable_sort_by_key( + rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end(), node_ids.begin()); + + cudf::size_type const row_array_parent_col_id = [&]() { + cudf::size_type value = cudf::io::json::parent_node_sentinel; + auto const list_node_index = options.is_enabled_lines() ? 0 : 1; + CUDF_CUDA_TRY(cudaMemcpyAsync(&value, + gpu_col_id.data() + list_node_index, + sizeof(cudf::size_type), + cudaMemcpyDefault, + stream.value())); + stream.synchronize(); + return value; + }(); + + auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] = + cudf::io::json::detail::reduce_to_column_tree(gpu_tree, + gpu_col_id, + sorted_col_ids, + node_ids, + gpu_row_offsets, + false, + row_array_parent_col_id, + stream); + + auto [d_column_tree_csr, d_max_row_offsets_csr] = + cudf::io::json::detail::reduce_to_column_tree_csr(gpu_tree, + gpu_col_id, + sorted_col_ids, + node_ids, + gpu_row_offsets, + false, + row_array_parent_col_id, + stream); + + // assert equality between csr and meta formats + assert(check_equality(d_column_tree, d_column_tree_csr, stream)); +} From 1823854cea1fe1f01953f78e0329931c61596293 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 25 Jun 2024 00:56:27 +0000 Subject: [PATCH 04/46] formatting --- cpp/src/io/json/json_column.cu | 2 +- cpp/src/io/json/json_tree.cu | 2 +- cpp/src/io/json/json_utils.hpp | 9 +-- cpp/src/io/json/nested_json.hpp | 3 +- cpp/tests/io/json_tree_csr.cu | 107 ++++++++++++++++---------------- 5 files changed, 63 insertions(+), 60 deletions(-) diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 854c3796245..9fb9e83d08b 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -16,8 +16,8 @@ #include "io/utilities/parsing_utils.cuh" #include "io/utilities/string_parsing.hpp" -#include "nested_json.hpp" #include "json_utils.hpp" +#include "nested_json.hpp" #include #include diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu index 7d9d926bdd2..5e0d2b389ba 100644 --- a/cpp/src/io/json/json_tree.cu +++ b/cpp/src/io/json/json_tree.cu @@ -15,8 +15,8 @@ */ #include "io/utilities/hostdevice_vector.hpp" -#include "nested_json.hpp" #include "json_utils.hpp" +#include "nested_json.hpp" #include #include diff --git a/cpp/src/io/json/json_utils.hpp b/cpp/src/io/json/json_utils.hpp index 80d8f2f9b0f..8864bde84d8 100644 --- a/cpp/src/io/json/json_utils.hpp +++ b/cpp/src/io/json/json_utils.hpp @@ -16,16 +16,17 @@ #pragma once -#include #include +#include #include #include #include #include -#include -#include #include +#include + +#include #include namespace cudf::io::json::detail { @@ -78,4 +79,4 @@ std::pair, rmm::device_uvector> stable_s : std::move(order_buffer2)}; } -} +} // namespace cudf::io::json::detail diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 8374c34db23..bdcc8a223f1 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -22,8 +22,9 @@ #include #include -#include #include +#include + #include #include diff --git a/cpp/tests/io/json_tree_csr.cu b/cpp/tests/io/json_tree_csr.cu index 6a52370080c..3bd64a8355d 100644 --- a/cpp/tests/io/json_tree_csr.cu +++ b/cpp/tests/io/json_tree_csr.cu @@ -22,18 +22,19 @@ #include #include -#include #include +#include #include #include #include #include #include + #include -#include #include +#include namespace cuio_json = cudf::io::json; @@ -55,39 +56,38 @@ struct h_column_tree_csr { std::vector range_end; }; -bool check_equality(cuio_json::tree_meta_t &d_a, cuio_json::column_tree_csr &d_b, rmm::cuda_stream_view stream) { +bool check_equality(cuio_json::tree_meta_t& d_a, + cuio_json::column_tree_csr& d_b, + rmm::cuda_stream_view stream) +{ // convert from tree_meta_t to column_tree_csr - h_tree_meta_t a{ - cudf::detail::make_std_vector_async(d_a.node_categories, stream), - cudf::detail::make_std_vector_async(d_a.parent_node_ids, stream), - cudf::detail::make_std_vector_async(d_a.node_range_begin, stream), - cudf::detail::make_std_vector_async(d_a.node_range_end, stream) - }; - - h_column_tree_csr b{ - cudf::detail::make_std_vector_async(d_b.rowidx, stream), - cudf::detail::make_std_vector_async(d_b.colidx, stream), - cudf::detail::make_std_vector_async(d_b.column_ids, stream), - cudf::detail::make_std_vector_async(d_b.categories, stream), - cudf::detail::make_std_vector_async(d_b.range_begin, stream), - cudf::detail::make_std_vector_async(d_b.range_end, stream) - }; + h_tree_meta_t a{cudf::detail::make_std_vector_async(d_a.node_categories, stream), + cudf::detail::make_std_vector_async(d_a.parent_node_ids, stream), + cudf::detail::make_std_vector_async(d_a.node_range_begin, stream), + cudf::detail::make_std_vector_async(d_a.node_range_end, stream)}; + + h_column_tree_csr b{cudf::detail::make_std_vector_async(d_b.rowidx, stream), + cudf::detail::make_std_vector_async(d_b.colidx, stream), + cudf::detail::make_std_vector_async(d_b.column_ids, stream), + cudf::detail::make_std_vector_async(d_b.categories, stream), + cudf::detail::make_std_vector_async(d_b.range_begin, stream), + cudf::detail::make_std_vector_async(d_b.range_end, stream)}; stream.synchronize(); auto num_nodes = a.parent_node_ids.size(); - if(b.rowidx.size() != num_nodes + 1) return false; + if (b.rowidx.size() != num_nodes + 1) return false; - for(auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { + for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { auto v = b.colidx[pos]; - if(a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) return false; + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) return false; } - for(size_t u = 1; u < num_nodes; u++) { + for (size_t u = 1; u < num_nodes; u++) { auto v = b.colidx[b.rowidx[u]]; - if(a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) return false; - for(auto pos = b.rowidx[u] + 1; pos < b.rowidx[u+1]; pos++) { + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) return false; + for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { v = b.colidx[pos]; - if(a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) return false; + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) return false; } } return true; @@ -117,19 +117,20 @@ TEST_F(JsonColumnTreeTests, SimpleLines) auto gpu_tree = cuio_json::detail::get_tree_representation( tokens_gpu, token_indices_gpu, false, stream, rmm::mr::get_current_device_resource()); - auto tup = + auto tup = cuio_json::detail::records_orient_tree_traversal(d_input, - gpu_tree, - false, - options.is_enabled_lines(), - stream, - rmm::mr::get_current_device_resource()); - auto &gpu_col_id = std::get<0>(tup); - auto &gpu_row_offsets = std::get<1>(tup); - - auto const num_nodes = gpu_col_id.size(); + gpu_tree, + false, + options.is_enabled_lines(), + stream, + rmm::mr::get_current_device_resource()); + auto& gpu_col_id = std::get<0>(tup); + auto& gpu_row_offsets = std::get<1>(tup); + + auto const num_nodes = gpu_col_id.size(); rmm::device_uvector sorted_col_ids(gpu_col_id.size(), stream); // make a copy - thrust::copy(rmm::exec_policy(stream), gpu_col_id.begin(), gpu_col_id.end(), sorted_col_ids.begin()); + thrust::copy( + rmm::exec_policy(stream), gpu_col_id.begin(), gpu_col_id.end(), sorted_col_ids.begin()); // sort by {col_id} on {node_ids} stable rmm::device_uvector node_ids(gpu_col_id.size(), stream); @@ -138,7 +139,7 @@ TEST_F(JsonColumnTreeTests, SimpleLines) rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end(), node_ids.begin()); cudf::size_type const row_array_parent_col_id = [&]() { - cudf::size_type value = cudf::io::json::parent_node_sentinel; + cudf::size_type value = cudf::io::json::parent_node_sentinel; auto const list_node_index = options.is_enabled_lines() ? 0 : 1; CUDF_CUDA_TRY(cudaMemcpyAsync(&value, gpu_col_id.data() + list_node_index, @@ -151,24 +152,24 @@ TEST_F(JsonColumnTreeTests, SimpleLines) auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] = cudf::io::json::detail::reduce_to_column_tree(gpu_tree, - gpu_col_id, - sorted_col_ids, - node_ids, - gpu_row_offsets, - false, - row_array_parent_col_id, - stream); + gpu_col_id, + sorted_col_ids, + node_ids, + gpu_row_offsets, + false, + row_array_parent_col_id, + stream); auto [d_column_tree_csr, d_max_row_offsets_csr] = cudf::io::json::detail::reduce_to_column_tree_csr(gpu_tree, - gpu_col_id, - sorted_col_ids, - node_ids, - gpu_row_offsets, - false, - row_array_parent_col_id, - stream); - - // assert equality between csr and meta formats + gpu_col_id, + sorted_col_ids, + node_ids, + gpu_row_offsets, + false, + row_array_parent_col_id, + stream); + + // assert equality between csr and meta formats assert(check_equality(d_column_tree, d_column_tree_csr, stream)); } From 84a7749066539cd661586bafee4ed5a7c72ed95f Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 28 Jun 2024 17:10:41 +0000 Subject: [PATCH 05/46] fixing csr construction --- cpp/CMakeLists.txt | 1 + cpp/src/io/json/json_column.cu | 292 --------------------------------- cpp/tests/io/json_tree_csr.cu | 16 +- 3 files changed, 16 insertions(+), 293 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5fd68bfb26c..2b94e273404 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -387,6 +387,7 @@ add_library( src/io/functions.cpp src/io/json/byte_range_info.cu src/io/json/json_column.cu + src/io/json/json_column_csr.cu src/io/json/json_normalization.cu src/io/json/json_tree.cu src/io/json/nested_json_gpu.cu diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 5d0c307a5bc..43c5b10c9a8 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -298,298 +298,6 @@ reduce_to_column_tree(tree_meta_t& tree, std::move(max_row_offsets)}; } -/** - * @brief Reduces node tree representation to column tree CSR representation. - * - * @param tree Node tree representation of JSON string - * @param original_col_ids Column ids of nodes - * @param sorted_col_ids Sorted column ids of nodes - * @param ordered_node_ids Node ids of nodes sorted by column ids - * @param row_offsets Row offsets of nodes - * @param is_array_of_arrays Whether the tree is an array of arrays - * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true - * @param stream CUDA stream used for device memory operations and kernel launches - * @return A tuple of column tree representation of JSON string, column ids of columns, and - * max row offsets of columns - */ -std::tuple> reduce_to_column_tree_csr( - tree_meta_t& tree, - device_span original_col_ids, - device_span sorted_col_ids, - device_span ordered_node_ids, - device_span row_offsets, - bool is_array_of_arrays, - NodeIndexT const row_array_parent_col_id, - rmm::cuda_stream_view stream) -{ - CUDF_FUNC_RANGE(); - // 1. column count for allocation - auto const num_columns = - thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end()); - - rmm::device_uvector unique_node_ids(num_columns, stream); - rmm::device_uvector csr_unique_node_ids(num_columns, stream); - rmm::device_uvector column_levels(num_columns, stream); - thrust::unique_by_key_copy(rmm::exec_policy(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - ordered_node_ids.begin(), - thrust::make_discard_iterator(), - unique_node_ids.begin()); - thrust::copy_n( - rmm::exec_policy(stream), - thrust::make_permutation_iterator(tree.node_levels.begin(), unique_node_ids.begin()), - unique_node_ids.size(), - column_levels.begin()); - auto [sorted_column_levels, sorted_column_levels_order] = - stable_sorted_key_order(column_levels, stream); - - // 2. reduce_by_key {col_id}, {row_offset}, max. - rmm::device_uvector unique_col_ids(num_columns, stream); - rmm::device_uvector max_row_offsets(num_columns, stream); - rmm::device_uvector csr_unique_col_ids(num_columns, stream); - rmm::device_uvector csr_max_row_offsets(num_columns, stream); - auto ordered_row_offsets = - thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()); - thrust::reduce_by_key(rmm::exec_policy(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - ordered_row_offsets, - unique_col_ids.begin(), - max_row_offsets.begin(), - thrust::equal_to(), - thrust::maximum()); - - // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) - rmm::device_uvector column_categories(num_columns, stream); - rmm::device_uvector csr_column_categories(num_columns, stream); - thrust::reduce_by_key( - rmm::exec_policy(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin()), - unique_col_ids.begin(), - column_categories.begin(), - thrust::equal_to(), - [] __device__(NodeT type_a, NodeT type_b) -> NodeT { - auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); - auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); - // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) - // *+*=*, v+v=v - if (type_a == type_b) { - return type_a; - } else if (is_a_leaf) { - // *+v=*, N+V=N - // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR - return type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b); - } else if (is_b_leaf) { - return type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a); - } - // *+#=E - return NC_ERR; - }); - - auto csr_permutation_it = thrust::make_zip_iterator( - thrust::make_permutation_iterator(unique_node_ids.begin(), sorted_column_levels_order.begin()), - thrust::make_permutation_iterator(unique_col_ids.begin(), sorted_column_levels_order.begin()), - thrust::make_permutation_iterator(max_row_offsets.begin(), sorted_column_levels_order.begin()), - thrust::make_permutation_iterator(column_categories.begin(), - sorted_column_levels_order.begin())); - thrust::copy(rmm::exec_policy(stream), - csr_permutation_it, - csr_permutation_it + num_columns, - thrust::make_zip_iterator(csr_unique_node_ids.begin(), - csr_unique_col_ids.begin(), - csr_max_row_offsets.begin(), - csr_column_categories.begin())); - - // 4. unique_copy parent_node_ids, ranges - rmm::device_uvector csr_parent_col_ids(num_columns, stream); - rmm::device_uvector csr_col_range_begin(num_columns, stream); // Field names - rmm::device_uvector csr_col_range_end(num_columns, stream); - thrust::copy_n( - rmm::exec_policy(stream), - thrust::make_zip_iterator( - thrust::make_permutation_iterator(tree.parent_node_ids.begin(), csr_unique_node_ids.begin()), - thrust::make_permutation_iterator(tree.node_range_begin.begin(), csr_unique_node_ids.begin()), - thrust::make_permutation_iterator(tree.node_range_end.begin(), csr_unique_node_ids.begin())), - csr_unique_node_ids.size(), - thrust::make_zip_iterator( - csr_parent_col_ids.begin(), csr_col_range_begin.begin(), csr_col_range_end.begin())); - - // convert parent_node_ids to parent_col_ids - thrust::transform( - rmm::exec_policy(stream), - csr_parent_col_ids.begin(), - csr_parent_col_ids.end(), - csr_parent_col_ids.begin(), - [col_ids = original_col_ids.begin()] __device__(auto parent_node_id) -> size_type { - return parent_node_id == parent_node_sentinel ? parent_node_sentinel - : col_ids[parent_node_id]; - }); - - /* - CSR construction: - 1. Sort column levels and get their ordering - 2. For each column node coln iterated according to sorted_column_levels; do - a. Find nodes that have coln as the parent node -> set adj_coln - b. row idx[coln] = size of adj_coln + 1 - c. col idx[coln] = adj_coln U {parent_col_id[coln]} - */ - - rmm::device_uvector rowidx(num_columns + 1, stream); - thrust::fill(rmm::exec_policy(stream), rowidx.begin(), rowidx.end(), 0); - auto [sorted_csr_parent_col_ids, sorted_csr_parent_col_ids_order] = - stable_sorted_key_order(csr_parent_col_ids, stream); - rmm::device_uvector non_leaf_nodes(num_columns, stream); - rmm::device_uvector non_leaf_adjacency(num_columns, stream); - thrust::reduce_by_key(rmm::exec_policy(stream), - sorted_csr_parent_col_ids.begin(), - sorted_csr_parent_col_ids.end(), - thrust::make_constant_iterator(1), - non_leaf_nodes.begin(), - non_leaf_adjacency.begin(), - thrust::equal_to()); - // Add the non_leaf_adjacency to rowidx at positions non_leaf_nodes - thrust::transform(rmm::exec_policy(stream), - non_leaf_nodes.begin(), - non_leaf_nodes.end(), - thrust::make_constant_iterator(1), - non_leaf_nodes.begin(), - thrust::plus()); - thrust::scatter(rmm::exec_policy(stream), - non_leaf_adjacency.begin(), - non_leaf_adjacency.end(), - non_leaf_nodes.begin(), - rowidx.begin() + 1); - // We are discarding the parent of the root node. - thrust::transform(rmm::exec_policy(stream), - rowidx.begin() + 2, - rowidx.end(), - thrust::make_constant_iterator(1), - rowidx.begin() + 1, - thrust::plus()); - thrust::inclusive_scan( - rmm::exec_policy(stream), rowidx.begin() + 1, rowidx.end(), rowidx.begin() + 1); - - rmm::device_uvector colidx((num_columns - 1) * 2, stream); - thrust::scatter(rmm::exec_policy(stream), - csr_parent_col_ids.begin(), - csr_parent_col_ids.end(), - rowidx.begin() + 1, - colidx.begin()); - rmm::device_uvector map((num_columns - 1) * 2, stream); - thrust::sequence(rmm::exec_policy(stream), map.begin(), map.end()); - rmm::device_uvector stencil((num_columns - 1) * 2, stream); - thrust::fill(rmm::exec_policy(stream), stencil.begin(), stencil.end(), 1); - thrust::scatter(rmm::exec_policy(stream), - thrust::make_constant_iterator(0), - thrust::make_constant_iterator(0) + num_columns, - rowidx.begin() + 1, - stencil.begin()); - thrust::scatter_if(rmm::exec_policy(stream), - sorted_csr_parent_col_ids.begin() + 1, - sorted_csr_parent_col_ids.end(), - map.begin(), - stencil.begin(), - colidx.begin()); - - // condition is true if parent is not a list, or sentinel/root - // Special case to return true if parent is a list and is_array_of_arrays is true - auto is_non_list_parent = [column_categories = column_categories.begin(), - is_array_of_arrays, - row_array_parent_col_id] __device__(auto parent_col_id) -> bool { - return !(parent_col_id == parent_node_sentinel || - column_categories[parent_col_id] == NC_LIST && - (!is_array_of_arrays || parent_col_id != row_array_parent_col_id)); - }; - // Mixed types in List children go to different columns, - // so all immediate children of list column should have same max_row_offsets. - // create list's children max_row_offsets array. (initialize to zero) - // atomicMax on children max_row_offsets array. - // gather the max_row_offsets from children row offset array. - { - rmm::device_uvector list_parents_children_max_row_offsets(num_columns, stream); - thrust::fill(rmm::exec_policy(stream), - list_parents_children_max_row_offsets.begin(), - list_parents_children_max_row_offsets.end(), - 0); - thrust::for_each(rmm::exec_policy(stream), - csr_unique_col_ids.begin(), - csr_unique_col_ids.end(), - [csr_column_categories = csr_column_categories.begin(), - csr_parent_col_ids = csr_parent_col_ids.begin(), - csr_max_row_offsets = csr_max_row_offsets.begin(), - list_parents_children_max_row_offsets = - list_parents_children_max_row_offsets.begin()] __device__(auto col_id) { - auto csr_parent_col_id = csr_parent_col_ids[col_id]; - if (csr_parent_col_id != parent_node_sentinel and - csr_column_categories[csr_parent_col_id] == node_t::NC_LIST) { - cuda::atomic_ref ref{ - *(list_parents_children_max_row_offsets + csr_parent_col_id)}; - ref.fetch_max(csr_max_row_offsets[col_id], - cuda::std::memory_order_relaxed); - } - }); - thrust::gather_if( - rmm::exec_policy(stream), - csr_parent_col_ids.begin(), - csr_parent_col_ids.end(), - csr_parent_col_ids.begin(), - list_parents_children_max_row_offsets.begin(), - csr_max_row_offsets.begin(), - [csr_column_categories = csr_column_categories.begin()] __device__(size_type parent_col_id) { - return parent_col_id != parent_node_sentinel and - csr_column_categories[parent_col_id] == node_t::NC_LIST; - }); - } - - // copy lists' max_row_offsets to children. - // all structs should have same size. - thrust::transform_if( - rmm::exec_policy(stream), - csr_unique_col_ids.begin(), - csr_unique_col_ids.end(), - csr_max_row_offsets.begin(), - [csr_column_categories = csr_column_categories.begin(), - is_non_list_parent, - csr_parent_col_ids = csr_parent_col_ids.begin(), - csr_max_row_offsets = csr_max_row_offsets.begin()] __device__(size_type col_id) { - auto parent_col_id = csr_parent_col_ids[col_id]; - // condition is true if parent is not a list, or sentinel/root - while (is_non_list_parent(parent_col_id)) { - col_id = parent_col_id; - parent_col_id = csr_parent_col_ids[parent_col_id]; - } - return csr_max_row_offsets[col_id]; - }, - [csr_column_categories = csr_column_categories.begin(), - is_non_list_parent, - parent_col_ids = csr_parent_col_ids.begin()] __device__(size_type col_id) { - auto parent_col_id = parent_col_ids[col_id]; - // condition is true if parent is not a list, or sentinel/root - return is_non_list_parent(parent_col_id); - }); - - // For Struct and List (to avoid copying entire strings when mixed type as string is enabled) - thrust::transform_if( - rmm::exec_policy(stream), - csr_col_range_begin.begin(), - csr_col_range_begin.end(), - csr_column_categories.begin(), - csr_col_range_end.begin(), - [] __device__(auto i) { return i + 1; }, - [] __device__(NodeT type) { return type == NC_STRUCT || type == NC_LIST; }); - - return std::tuple{column_tree_csr{std::move(rowidx), - std::move(colidx), - std::move(csr_unique_col_ids), - std::move(csr_column_categories), - std::move(csr_col_range_begin), - std::move(csr_col_range_end)}, - std::move(csr_max_row_offsets)}; -} - /** * @brief Get the column indices for the values column for array of arrays rows * diff --git a/cpp/tests/io/json_tree_csr.cu b/cpp/tests/io/json_tree_csr.cu index 3bd64a8355d..c20ea938298 100644 --- a/cpp/tests/io/json_tree_csr.cu +++ b/cpp/tests/io/json_tree_csr.cu @@ -56,6 +56,14 @@ struct h_column_tree_csr { std::vector range_end; }; +template +void print(std::string str, std::vector &vec) { + std::cout << str << " = "; + for(size_t i = 0; i < vec.size(); i++) + std::cout << vec[i] << " "; + std::cout << std::endl; +} + bool check_equality(cuio_json::tree_meta_t& d_a, cuio_json::column_tree_csr& d_b, rmm::cuda_stream_view stream) @@ -90,6 +98,11 @@ bool check_equality(cuio_json::tree_meta_t& d_a, if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) return false; } } + for (size_t u = 0; u < num_nodes; u++) { + if (a.node_categories[b.column_ids[u]] != b.categories[u]) return false; + if (a.node_range_begin[b.column_ids[u]] != b.range_begin[u]) return false; + if (a.node_range_end[b.column_ids[u]] != b.range_end[u]) return false; + } return true; } @@ -170,6 +183,7 @@ TEST_F(JsonColumnTreeTests, SimpleLines) row_array_parent_col_id, stream); + auto iseq = check_equality(d_column_tree, d_column_tree_csr, stream); // assert equality between csr and meta formats - assert(check_equality(d_column_tree, d_column_tree_csr, stream)); + assert(iseq == true); } From 810c389ff575c9300ffefdf4edacfd352d5352a1 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 28 Jun 2024 17:11:25 +0000 Subject: [PATCH 06/46] moving the csr algorithms --- cpp/src/io/json/json_column_csr.cu | 330 +++++++++++++++++++++++++++++ 1 file changed, 330 insertions(+) create mode 100644 cpp/src/io/json/json_column_csr.cu diff --git a/cpp/src/io/json/json_column_csr.cu b/cpp/src/io/json/json_column_csr.cu new file mode 100644 index 00000000000..c1f58609381 --- /dev/null +++ b/cpp/src/io/json/json_column_csr.cu @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "json_utils.hpp" +#include "nested_json.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cudf::io::json::detail { + +/** + * @brief Reduces node tree representation to column tree CSR representation. + * + * @param tree Node tree representation of JSON string + * @param original_col_ids Column ids of nodes + * @param sorted_col_ids Sorted column ids of nodes + * @param ordered_node_ids Node ids of nodes sorted by column ids + * @param row_offsets Row offsets of nodes + * @param is_array_of_arrays Whether the tree is an array of arrays + * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true + * @param stream CUDA stream used for device memory operations and kernel launches + * @return A tuple of column tree representation of JSON string, column ids of columns, and + * max row offsets of columns + */ +std::tuple> reduce_to_column_tree_csr( + tree_meta_t& tree, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, + bool is_array_of_arrays, + NodeIndexT const row_array_parent_col_id, + rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + // 1. column count for allocation + auto const num_columns = + thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end()); + + rmm::device_uvector unique_node_ids(num_columns, stream); + rmm::device_uvector csr_unique_node_ids(num_columns, stream); + rmm::device_uvector column_levels(num_columns, stream); + thrust::unique_by_key_copy(rmm::exec_policy(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + ordered_node_ids.begin(), + thrust::make_discard_iterator(), + unique_node_ids.begin()); + thrust::copy_n( + rmm::exec_policy(stream), + thrust::make_permutation_iterator(tree.node_levels.begin(), unique_node_ids.begin()), + unique_node_ids.size(), + column_levels.begin()); + auto [sorted_column_levels, sorted_column_levels_order] = + stable_sorted_key_order(column_levels, stream); + + // 2. reduce_by_key {col_id}, {row_offset}, max. + rmm::device_uvector unique_col_ids(num_columns, stream); + rmm::device_uvector max_row_offsets(num_columns, stream); + rmm::device_uvector csr_unique_col_ids(num_columns, stream); + rmm::device_uvector csr_max_row_offsets(num_columns, stream); + auto ordered_row_offsets = + thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()); + thrust::reduce_by_key(rmm::exec_policy(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + ordered_row_offsets, + unique_col_ids.begin(), + max_row_offsets.begin(), + thrust::equal_to(), + thrust::maximum()); + + // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) + rmm::device_uvector column_categories(num_columns, stream); + rmm::device_uvector csr_column_categories(num_columns, stream); + thrust::reduce_by_key( + rmm::exec_policy(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin()), + unique_col_ids.begin(), + column_categories.begin(), + thrust::equal_to(), + [] __device__(NodeT type_a, NodeT type_b) -> NodeT { + auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); + auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); + // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) + // *+*=*, v+v=v + if (type_a == type_b) { + return type_a; + } else if (is_a_leaf) { + // *+v=*, N+V=N + // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR + return type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b); + } else if (is_b_leaf) { + return type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a); + } + // *+#=E + return NC_ERR; + }); + + auto csr_permutation_it = thrust::make_zip_iterator( + thrust::make_permutation_iterator(unique_node_ids.begin(), sorted_column_levels_order.begin()), + thrust::make_permutation_iterator(unique_col_ids.begin(), sorted_column_levels_order.begin()), + thrust::make_permutation_iterator(max_row_offsets.begin(), sorted_column_levels_order.begin()), + thrust::make_permutation_iterator(column_categories.begin(), + sorted_column_levels_order.begin())); + thrust::copy(rmm::exec_policy(stream), + csr_permutation_it, + csr_permutation_it + num_columns, + thrust::make_zip_iterator(csr_unique_node_ids.begin(), + csr_unique_col_ids.begin(), + csr_max_row_offsets.begin(), + csr_column_categories.begin())); + + // 4. unique_copy parent_node_ids, ranges + rmm::device_uvector csr_parent_col_ids(num_columns, stream); + rmm::device_uvector csr_col_range_begin(num_columns, stream); // Field names + rmm::device_uvector csr_col_range_end(num_columns, stream); + thrust::copy_n( + rmm::exec_policy(stream), + thrust::make_zip_iterator( + thrust::make_permutation_iterator(tree.parent_node_ids.begin(), csr_unique_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_range_begin.begin(), csr_unique_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_range_end.begin(), csr_unique_node_ids.begin())), + csr_unique_node_ids.size(), + thrust::make_zip_iterator( + csr_parent_col_ids.begin(), csr_col_range_begin.begin(), csr_col_range_end.begin())); + + // convert parent_node_ids to parent_col_ids + thrust::transform( + rmm::exec_policy(stream), + csr_parent_col_ids.begin(), + csr_parent_col_ids.end(), + csr_parent_col_ids.begin(), + [col_ids = original_col_ids.begin()] __device__(auto parent_node_id) -> size_type { + return parent_node_id == parent_node_sentinel ? parent_node_sentinel + : col_ids[parent_node_id]; + }); + + /* + CSR construction: + 1. Sort column levels and get their ordering + 2. For each column node coln iterated according to sorted_column_levels; do + a. Find nodes that have coln as the parent node -> set adj_coln + b. row idx[coln] = size of adj_coln + 1 + c. col idx[coln] = adj_coln U {parent_col_id[coln]} + */ + + rmm::device_uvector rowidx(num_columns + 1, stream); + thrust::fill(rmm::exec_policy(stream), rowidx.begin(), rowidx.end(), 0); + + // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) + // children adjacency + auto num_non_leaf_columns = thrust::unique_count(rmm::exec_policy(stream), csr_parent_col_ids.begin() + 1, csr_parent_col_ids.end()); + thrust::reduce_by_key(rmm::exec_policy(stream), csr_parent_col_ids.begin() + 1, csr_parent_col_ids.end(), thrust::make_constant_iterator(1), thrust::make_discard_iterator(), rowidx.begin() + 1, thrust::equal_to()); + thrust::inclusive_scan( + rmm::exec_policy(stream), rowidx.begin() + 1, rowidx.end(), rowidx.begin() + 1); + // overwrite the csr_parent_col_ids with the col ids in the csr tree + thrust::fill(rmm::exec_policy(stream), csr_parent_col_ids.begin(), csr_parent_col_ids.end(), -1); + thrust::scatter(rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_non_leaf_columns, rowidx.begin(), csr_parent_col_ids.begin() + 1); + thrust::inclusive_scan(rmm::exec_policy(stream), csr_parent_col_ids.begin(), csr_parent_col_ids.end(), csr_parent_col_ids.begin(), thrust::maximum{}); + // We are discarding the parent of the root node. Add the parent adjacency. Since we have already performed the scan, we use a counting iterator to add + thrust::transform(rmm::exec_policy(stream), + rowidx.begin() + 2, + rowidx.end(), + thrust::make_counting_iterator(1), + rowidx.begin() + 2, + thrust::plus()); + + rmm::device_uvector colidx((num_columns - 1) * 2, stream); + thrust::fill(rmm::exec_policy(stream), colidx.begin(), colidx.end(), 0); + // Skip the parent of root node + thrust::scatter(rmm::exec_policy(stream), + csr_parent_col_ids.begin() + 1, + csr_parent_col_ids.end(), + rowidx.begin() + 1, + colidx.begin()); + // excluding root node + rmm::device_uvector map(num_columns - 1, stream); + thrust::fill(rmm::exec_policy(stream), map.begin(), map.end(), 1); + thrust::inclusive_scan_by_key(rmm::exec_policy(stream), csr_parent_col_ids.begin() + 1, csr_parent_col_ids.end(), map.begin(), map.begin()); + thrust::for_each(rmm::exec_policy(stream), thrust::make_counting_iterator(1), thrust::make_counting_iterator(1) + num_columns - 1, + [rowidx = rowidx.begin(), map = map.begin(), csr_parent_col_ids = csr_parent_col_ids.begin()] __device__(auto i) { + auto csr_parent_col_id = csr_parent_col_ids[i]; + if(csr_parent_col_id == 0) map[i - 1]--; + else map[i - 1] += rowidx[csr_parent_col_id]; + }); + thrust::scatter(rmm::exec_policy(stream), thrust::make_counting_iterator(1), thrust::make_counting_iterator(1) + num_columns - 1, map.begin(), colidx.begin()); + + // condition is true if parent is not a list, or sentinel/root + // Special case to return true if parent is a list and is_array_of_arrays is true + auto is_non_list_parent = [column_categories = column_categories.begin(), + is_array_of_arrays, + row_array_parent_col_id] __device__(auto parent_col_id) -> bool { + return !(parent_col_id == parent_node_sentinel || + column_categories[parent_col_id] == NC_LIST && + (!is_array_of_arrays || parent_col_id != row_array_parent_col_id)); + }; + // Mixed types in List children go to different columns, + // so all immediate children of list column should have same max_row_offsets. + // create list's children max_row_offsets array. (initialize to zero) + // atomicMax on children max_row_offsets array. + // gather the max_row_offsets from children row offset array. + { + rmm::device_uvector list_parents_children_max_row_offsets(num_columns, stream); + thrust::fill(rmm::exec_policy(stream), + list_parents_children_max_row_offsets.begin(), + list_parents_children_max_row_offsets.end(), + 0); + thrust::for_each(rmm::exec_policy(stream), + csr_unique_col_ids.begin(), + csr_unique_col_ids.end(), + [csr_column_categories = csr_column_categories.begin(), + csr_parent_col_ids = csr_parent_col_ids.begin(), + csr_max_row_offsets = csr_max_row_offsets.begin(), + list_parents_children_max_row_offsets = + list_parents_children_max_row_offsets.begin()] __device__(auto col_id) { + auto csr_parent_col_id = csr_parent_col_ids[col_id]; + if (csr_parent_col_id != parent_node_sentinel and + csr_column_categories[csr_parent_col_id] == node_t::NC_LIST) { + cuda::atomic_ref ref{ + *(list_parents_children_max_row_offsets + csr_parent_col_id)}; + ref.fetch_max(csr_max_row_offsets[col_id], + cuda::std::memory_order_relaxed); + } + }); + thrust::gather_if( + rmm::exec_policy(stream), + csr_parent_col_ids.begin(), + csr_parent_col_ids.end(), + csr_parent_col_ids.begin(), + list_parents_children_max_row_offsets.begin(), + csr_max_row_offsets.begin(), + [csr_column_categories = csr_column_categories.begin()] __device__(size_type parent_col_id) { + return parent_col_id != parent_node_sentinel and + csr_column_categories[parent_col_id] == node_t::NC_LIST; + }); + } + + // copy lists' max_row_offsets to children. + // all structs should have same size. + thrust::transform_if( + rmm::exec_policy(stream), + csr_unique_col_ids.begin(), + csr_unique_col_ids.end(), + csr_max_row_offsets.begin(), + [csr_column_categories = csr_column_categories.begin(), + is_non_list_parent, + csr_parent_col_ids = csr_parent_col_ids.begin(), + csr_max_row_offsets = csr_max_row_offsets.begin()] __device__(size_type col_id) { + auto parent_col_id = csr_parent_col_ids[col_id]; + // condition is true if parent is not a list, or sentinel/root + while (is_non_list_parent(parent_col_id)) { + col_id = parent_col_id; + parent_col_id = csr_parent_col_ids[parent_col_id]; + } + return csr_max_row_offsets[col_id]; + }, + [csr_column_categories = csr_column_categories.begin(), + is_non_list_parent, + parent_col_ids = csr_parent_col_ids.begin()] __device__(size_type col_id) { + auto parent_col_id = parent_col_ids[col_id]; + // condition is true if parent is not a list, or sentinel/root + return is_non_list_parent(parent_col_id); + }); + + // For Struct and List (to avoid copying entire strings when mixed type as string is enabled) + thrust::transform_if( + rmm::exec_policy(stream), + csr_col_range_begin.begin(), + csr_col_range_begin.end(), + csr_column_categories.begin(), + csr_col_range_end.begin(), + [] __device__(auto i) { return i + 1; }, + [] __device__(NodeT type) { return type == NC_STRUCT || type == NC_LIST; }); + + return std::tuple{column_tree_csr{std::move(rowidx), + std::move(colidx), + std::move(csr_unique_col_ids), + std::move(csr_column_categories), + std::move(csr_col_range_begin), + std::move(csr_col_range_end)}, + std::move(csr_max_row_offsets)}; +} + +} // namespace cudf::io::json::detail From 6a1a415ec1f4a758cb650da0cd3cf3ef332ea6ff Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 28 Jun 2024 17:13:35 +0000 Subject: [PATCH 07/46] formatting --- cpp/src/io/json/json_column_csr.cu | 56 +++++++++++++++++++++++------- cpp/tests/io/json_tree_csr.cu | 5 +-- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/cpp/src/io/json/json_column_csr.cu b/cpp/src/io/json/json_column_csr.cu index c1f58609381..dbb32f278f0 100644 --- a/cpp/src/io/json/json_column_csr.cu +++ b/cpp/src/io/json/json_column_csr.cu @@ -195,15 +195,31 @@ std::tuple> reduce_to_column_tre // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) // children adjacency - auto num_non_leaf_columns = thrust::unique_count(rmm::exec_policy(stream), csr_parent_col_ids.begin() + 1, csr_parent_col_ids.end()); - thrust::reduce_by_key(rmm::exec_policy(stream), csr_parent_col_ids.begin() + 1, csr_parent_col_ids.end(), thrust::make_constant_iterator(1), thrust::make_discard_iterator(), rowidx.begin() + 1, thrust::equal_to()); + auto num_non_leaf_columns = thrust::unique_count( + rmm::exec_policy(stream), csr_parent_col_ids.begin() + 1, csr_parent_col_ids.end()); + thrust::reduce_by_key(rmm::exec_policy(stream), + csr_parent_col_ids.begin() + 1, + csr_parent_col_ids.end(), + thrust::make_constant_iterator(1), + thrust::make_discard_iterator(), + rowidx.begin() + 1, + thrust::equal_to()); thrust::inclusive_scan( rmm::exec_policy(stream), rowidx.begin() + 1, rowidx.end(), rowidx.begin() + 1); // overwrite the csr_parent_col_ids with the col ids in the csr tree thrust::fill(rmm::exec_policy(stream), csr_parent_col_ids.begin(), csr_parent_col_ids.end(), -1); - thrust::scatter(rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_non_leaf_columns, rowidx.begin(), csr_parent_col_ids.begin() + 1); - thrust::inclusive_scan(rmm::exec_policy(stream), csr_parent_col_ids.begin(), csr_parent_col_ids.end(), csr_parent_col_ids.begin(), thrust::maximum{}); - // We are discarding the parent of the root node. Add the parent adjacency. Since we have already performed the scan, we use a counting iterator to add + thrust::scatter(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + num_non_leaf_columns, + rowidx.begin(), + csr_parent_col_ids.begin() + 1); + thrust::inclusive_scan(rmm::exec_policy(stream), + csr_parent_col_ids.begin(), + csr_parent_col_ids.end(), + csr_parent_col_ids.begin(), + thrust::maximum{}); + // We are discarding the parent of the root node. Add the parent adjacency. Since we have already + // performed the scan, we use a counting iterator to add thrust::transform(rmm::exec_policy(stream), rowidx.begin() + 2, rowidx.end(), @@ -222,14 +238,28 @@ std::tuple> reduce_to_column_tre // excluding root node rmm::device_uvector map(num_columns - 1, stream); thrust::fill(rmm::exec_policy(stream), map.begin(), map.end(), 1); - thrust::inclusive_scan_by_key(rmm::exec_policy(stream), csr_parent_col_ids.begin() + 1, csr_parent_col_ids.end(), map.begin(), map.begin()); - thrust::for_each(rmm::exec_policy(stream), thrust::make_counting_iterator(1), thrust::make_counting_iterator(1) + num_columns - 1, - [rowidx = rowidx.begin(), map = map.begin(), csr_parent_col_ids = csr_parent_col_ids.begin()] __device__(auto i) { - auto csr_parent_col_id = csr_parent_col_ids[i]; - if(csr_parent_col_id == 0) map[i - 1]--; - else map[i - 1] += rowidx[csr_parent_col_id]; - }); - thrust::scatter(rmm::exec_policy(stream), thrust::make_counting_iterator(1), thrust::make_counting_iterator(1) + num_columns - 1, map.begin(), colidx.begin()); + thrust::inclusive_scan_by_key(rmm::exec_policy(stream), + csr_parent_col_ids.begin() + 1, + csr_parent_col_ids.end(), + map.begin(), + map.begin()); + thrust::for_each(rmm::exec_policy(stream), + thrust::make_counting_iterator(1), + thrust::make_counting_iterator(1) + num_columns - 1, + [rowidx = rowidx.begin(), + map = map.begin(), + csr_parent_col_ids = csr_parent_col_ids.begin()] __device__(auto i) { + auto csr_parent_col_id = csr_parent_col_ids[i]; + if (csr_parent_col_id == 0) + map[i - 1]--; + else + map[i - 1] += rowidx[csr_parent_col_id]; + }); + thrust::scatter(rmm::exec_policy(stream), + thrust::make_counting_iterator(1), + thrust::make_counting_iterator(1) + num_columns - 1, + map.begin(), + colidx.begin()); // condition is true if parent is not a list, or sentinel/root // Special case to return true if parent is a list and is_array_of_arrays is true diff --git a/cpp/tests/io/json_tree_csr.cu b/cpp/tests/io/json_tree_csr.cu index c20ea938298..10735921331 100644 --- a/cpp/tests/io/json_tree_csr.cu +++ b/cpp/tests/io/json_tree_csr.cu @@ -57,9 +57,10 @@ struct h_column_tree_csr { }; template -void print(std::string str, std::vector &vec) { +void print(std::string str, std::vector& vec) +{ std::cout << str << " = "; - for(size_t i = 0; i < vec.size(); i++) + for (size_t i = 0; i < vec.size(); i++) std::cout << vec[i] << " "; std::cout << std::endl; } From 4bba629cc5d93f9c578b1503281a49b0f0985142 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Mon, 15 Jul 2024 17:26:31 +0000 Subject: [PATCH 08/46] moving to experimental namespace --- cpp/src/io/json/json_column.cu | 1 - cpp/src/io/json/json_column_csr.cu | 4 +- cpp/src/io/json/nested_json.hpp | 74 ++++++++++++++++-------------- cpp/tests/io/json_tree_csr.cu | 4 +- 4 files changed, 44 insertions(+), 39 deletions(-) diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 43c5b10c9a8..ca8466a22b4 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/src/io/json/json_column_csr.cu b/cpp/src/io/json/json_column_csr.cu index dbb32f278f0..8e7233fa148 100644 --- a/cpp/src/io/json/json_column_csr.cu +++ b/cpp/src/io/json/json_column_csr.cu @@ -49,7 +49,7 @@ #include #include -namespace cudf::io::json::detail { +namespace cudf::io::json::experimental::detail { /** * @brief Reduces node tree representation to column tree CSR representation. @@ -95,7 +95,7 @@ std::tuple> reduce_to_column_tre unique_node_ids.size(), column_levels.begin()); auto [sorted_column_levels, sorted_column_levels_order] = - stable_sorted_key_order(column_levels, stream); + cudf::io::json::detail::stable_sorted_key_order(column_levels, stream); // 2. reduce_by_key {col_id}, {row_offset}, max. rmm::device_uvector unique_col_ids(num_columns, stream); diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index bdcc8a223f1..16dc9d63b2f 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -47,19 +47,6 @@ struct tree_meta_t { rmm::device_uvector node_range_end; }; -struct column_tree_csr { - // position of nnzs - rmm::device_uvector rowidx; - rmm::device_uvector colidx; - // node properties - rmm::device_uvector column_ids; - rmm::device_uvector categories; - rmm::device_uvector range_begin; - rmm::device_uvector range_end; - std::vector ignore_vals; - std::vector is_mixed_type_column; - std::vector is_pruned; -}; /** * @brief A column type @@ -200,6 +187,46 @@ struct device_json_column { } }; +namespace experimental { +struct column_tree_csr { + // position of nnzs + rmm::device_uvector rowidx; + rmm::device_uvector colidx; + // node properties + rmm::device_uvector column_ids; + rmm::device_uvector categories; + rmm::device_uvector range_begin; + rmm::device_uvector range_end; + std::vector ignore_vals; + std::vector is_mixed_type_column; + std::vector is_pruned; +}; + +namespace detail { +/** + * @brief Reduce node tree into column tree by aggregating each property of column. + * + * @param tree json node tree to reduce (modified in-place, but restored to original state) + * @param col_ids column ids of each node (modified in-place, but restored to original state) + * @param row_offsets row offsets of each node (modified in-place, but restored to original state) + * @param stream The CUDA stream to which kernels are dispatched + * @return A tuple containing the column tree, identifier for each column and the maximum row index + * in each column + */ + +std::tuple> reduce_to_column_tree_csr( + tree_meta_t& tree, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, + bool is_array_of_arrays, + NodeIndexT const row_array_parent_col_id, + rmm::cuda_stream_view stream); + +} +} + namespace detail { // TODO: return device_uvector instead of passing pre-allocated memory @@ -321,27 +348,6 @@ reduce_to_column_tree(tree_meta_t& tree, NodeIndexT const row_array_parent_col_id, rmm::cuda_stream_view stream); -/** - * @brief Reduce node tree into column tree by aggregating each property of column. - * - * @param tree json node tree to reduce (modified in-place, but restored to original state) - * @param col_ids column ids of each node (modified in-place, but restored to original state) - * @param row_offsets row offsets of each node (modified in-place, but restored to original state) - * @param stream The CUDA stream to which kernels are dispatched - * @return A tuple containing the column tree, identifier for each column and the maximum row index - * in each column - */ - -std::tuple> reduce_to_column_tree_csr( - tree_meta_t& tree, - device_span original_col_ids, - device_span sorted_col_ids, - device_span ordered_node_ids, - device_span row_offsets, - bool is_array_of_arrays, - NodeIndexT const row_array_parent_col_id, - rmm::cuda_stream_view stream); - /** * @brief Retrieves the parse_options to be used for type inference and type casting * diff --git a/cpp/tests/io/json_tree_csr.cu b/cpp/tests/io/json_tree_csr.cu index 10735921331..e73e4f2a629 100644 --- a/cpp/tests/io/json_tree_csr.cu +++ b/cpp/tests/io/json_tree_csr.cu @@ -66,7 +66,7 @@ void print(std::string str, std::vector& vec) } bool check_equality(cuio_json::tree_meta_t& d_a, - cuio_json::column_tree_csr& d_b, + cuio_json::experimental::column_tree_csr& d_b, rmm::cuda_stream_view stream) { // convert from tree_meta_t to column_tree_csr @@ -175,7 +175,7 @@ TEST_F(JsonColumnTreeTests, SimpleLines) stream); auto [d_column_tree_csr, d_max_row_offsets_csr] = - cudf::io::json::detail::reduce_to_column_tree_csr(gpu_tree, + cudf::io::json::experimental::detail::reduce_to_column_tree_csr(gpu_tree, gpu_col_id, sorted_col_ids, node_ids, From df9e65b2fa03a282a740f936b7810345180650f1 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Mon, 15 Jul 2024 17:39:35 +0000 Subject: [PATCH 09/46] formatting --- cpp/src/io/json/json_column_csr.cu | 2 +- cpp/src/io/json/nested_json.hpp | 5 ++--- cpp/tests/io/json_tree_csr.cu | 14 +++++++------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/cpp/src/io/json/json_column_csr.cu b/cpp/src/io/json/json_column_csr.cu index 8e7233fa148..ee65dbc3bc8 100644 --- a/cpp/src/io/json/json_column_csr.cu +++ b/cpp/src/io/json/json_column_csr.cu @@ -357,4 +357,4 @@ std::tuple> reduce_to_column_tre std::move(csr_max_row_offsets)}; } -} // namespace cudf::io::json::detail +} // namespace cudf::io::json::experimental::detail diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 16dc9d63b2f..386b55ed2a0 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -47,7 +47,6 @@ struct tree_meta_t { rmm::device_uvector node_range_end; }; - /** * @brief A column type */ @@ -224,8 +223,8 @@ std::tuple> reduce_to_column_tre NodeIndexT const row_array_parent_col_id, rmm::cuda_stream_view stream); -} -} +} // namespace detail +} // namespace experimental namespace detail { diff --git a/cpp/tests/io/json_tree_csr.cu b/cpp/tests/io/json_tree_csr.cu index e73e4f2a629..12e92551521 100644 --- a/cpp/tests/io/json_tree_csr.cu +++ b/cpp/tests/io/json_tree_csr.cu @@ -176,13 +176,13 @@ TEST_F(JsonColumnTreeTests, SimpleLines) auto [d_column_tree_csr, d_max_row_offsets_csr] = cudf::io::json::experimental::detail::reduce_to_column_tree_csr(gpu_tree, - gpu_col_id, - sorted_col_ids, - node_ids, - gpu_row_offsets, - false, - row_array_parent_col_id, - stream); + gpu_col_id, + sorted_col_ids, + node_ids, + gpu_row_offsets, + false, + row_array_parent_col_id, + stream); auto iseq = check_equality(d_column_tree, d_column_tree_csr, stream); // assert equality between csr and meta formats From d1588c886e7f46c6b8712c8ebd75e96fadcae8e4 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Mon, 15 Jul 2024 17:53:41 +0000 Subject: [PATCH 10/46] removed node properties from csr struct - will be introduced in stages in later PRs --- cpp/src/io/json/nested_json.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 386b55ed2a0..20019a703c9 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -196,9 +196,6 @@ struct column_tree_csr { rmm::device_uvector categories; rmm::device_uvector range_begin; rmm::device_uvector range_end; - std::vector ignore_vals; - std::vector is_mixed_type_column; - std::vector is_pruned; }; namespace detail { From 5541b93db7ac3dae2b7d0e207213397911b9a12a Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Wed, 24 Jul 2024 22:59:45 +0000 Subject: [PATCH 11/46] partial commit --- cpp/src/io/json/json_column_csr.cu | 241 +++++++++++++---------------- cpp/src/io/json/nested_json.hpp | 25 ++- cpp/tests/io/json_tree_csr.cu | 1 + 3 files changed, 127 insertions(+), 140 deletions(-) diff --git a/cpp/src/io/json/json_column_csr.cu b/cpp/src/io/json/json_column_csr.cu index ee65dbc3bc8..866820fa67c 100644 --- a/cpp/src/io/json/json_column_csr.cu +++ b/cpp/src/io/json/json_column_csr.cu @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,25 @@ namespace cudf::io::json::experimental::detail { +using row_offset_t = size_type; + +struct unvalidated_column_tree { + rmm::device_uvector rowidx; + rmm::device_uvector colidx; + rmm::device_uvector max_row_offsets; + rmm::device_uvector column_categories; +}; + +struct level_ordering { + device_span node_levels; + device_span col_ids; + __device__ bool operator()(NodeIndexT lhs_node_id, NodeIndexT rhs_node_id) const + { + return (node_levels[lhs_node_id] < node_levels[rhs_node_id]) || + (node_levels[lhs_node_id] == node_levels[rhs_node_id] && col_ids[lhs_node_id] < col_ids[rhs_node_id]); + } +}; + /** * @brief Reduces node tree representation to column tree CSR representation. * @@ -65,129 +85,93 @@ namespace cudf::io::json::experimental::detail { * @return A tuple of column tree representation of JSON string, column ids of columns, and * max row offsets of columns */ -std::tuple> reduce_to_column_tree_csr( +unvalidated_column_tree reduce_to_column_tree_csr( tree_meta_t& tree, - device_span original_col_ids, - device_span sorted_col_ids, - device_span ordered_node_ids, + device_span col_ids, device_span row_offsets, bool is_array_of_arrays, NodeIndexT const row_array_parent_col_id, rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); - // 1. column count for allocation - auto const num_columns = - thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end()); - rmm::device_uvector unique_node_ids(num_columns, stream); - rmm::device_uvector csr_unique_node_ids(num_columns, stream); - rmm::device_uvector column_levels(num_columns, stream); - thrust::unique_by_key_copy(rmm::exec_policy(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - ordered_node_ids.begin(), - thrust::make_discard_iterator(), - unique_node_ids.begin()); - thrust::copy_n( - rmm::exec_policy(stream), - thrust::make_permutation_iterator(tree.node_levels.begin(), unique_node_ids.begin()), - unique_node_ids.size(), - column_levels.begin()); - auto [sorted_column_levels, sorted_column_levels_order] = - cudf::io::json::detail::stable_sorted_key_order(column_levels, stream); + rmm::device_uvector level_ordered_col_ids(col_ids.size(), stream); + rmm::device_uvector level_ordered_node_ids(col_ids.size(), stream); + thrust::copy(rmm::exec_policy_nosync(stream), col_ids.begin(), col_ids.end(), level_ordered_col_ids.begin()); + thrust::sequence(rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), level_ordered_node_ids.end()); + + // Reorder nodes and column ids in level-wise fashion + thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), level_ordered_node_ids.end(), + level_ordered_col_ids.begin(), level_ordering{tree.node_levels, col_ids}); - // 2. reduce_by_key {col_id}, {row_offset}, max. - rmm::device_uvector unique_col_ids(num_columns, stream); + // 1. get the number of columns in tree, mapping between node tree col ids and csr col ids, and the node id of first row in each column + auto const num_columns = + thrust::unique_count(rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_col_ids.end()); + rmm::device_uvector level_ordered_unique_node_ids(num_columns, stream); + rmm::device_uvector mapped_col_ids(num_columns, stream); + thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_node_ids.end(), level_ordered_node_ids.begin(), mapped_col_ids.begin(), level_ordered_unique_node_ids.begin()); + auto rev_mapped_col_ids_it = thrust::make_permutation_iterator(thrust::make_counting_iterator(0), mapped_col_ids.begin()); + + // 2. maximum number of rows per column: computed with reduce_by_key {col_id}, {row_offset}, max. + // 3. category for each column node by aggregating all nodes in node tree corresponding to same column: + // reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) rmm::device_uvector max_row_offsets(num_columns, stream); - rmm::device_uvector csr_unique_col_ids(num_columns, stream); - rmm::device_uvector csr_max_row_offsets(num_columns, stream); + rmm::device_uvector column_categories(num_columns, stream); auto ordered_row_offsets = - thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()); + thrust::make_permutation_iterator(row_offsets.begin(), level_ordered_node_ids.begin()); + auto ordered_node_categories = thrust::make_permutation_iterator(tree.node_categories.begin(), level_ordered_node_ids.begin()); thrust::reduce_by_key(rmm::exec_policy(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - ordered_row_offsets, - unique_col_ids.begin(), - max_row_offsets.begin(), - thrust::equal_to(), - thrust::maximum()); - - // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) - rmm::device_uvector column_categories(num_columns, stream); - rmm::device_uvector csr_column_categories(num_columns, stream); - thrust::reduce_by_key( - rmm::exec_policy(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin()), - unique_col_ids.begin(), - column_categories.begin(), - thrust::equal_to(), - [] __device__(NodeT type_a, NodeT type_b) -> NodeT { - auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); - auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); - // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) - // *+*=*, v+v=v - if (type_a == type_b) { - return type_a; - } else if (is_a_leaf) { - // *+v=*, N+V=N - // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR - return type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b); - } else if (is_b_leaf) { - return type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a); - } - // *+#=E - return NC_ERR; - }); + level_ordered_col_ids.begin(), + level_ordered_col_ids.end(), + thrust::make_zip_iterator(thrust::make_tuple(ordered_row_offsets, ordered_node_categories)), + thrust::make_discard_iterator(), + thrust::make_zip_iterator(thrust::make_tuple(max_row_offsets.begin(), column_categories.begin())), + thrust::equal_to(), + [] __device__(auto a, auto b) { + auto row_offset_a = thrust::get<0>(a); + auto row_offset_b = thrust::get<0>(b); + auto type_a = thrust::get<1>(a); + auto type_b = thrust::get<1>(b); + + NodeT max_offset; + auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); + auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); + // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) + // *+*=*, v+v=v + if (type_a == type_b) { + max_offset = type_a; + } else if (is_a_leaf) { + // *+v=*, N+V=N + // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR + max_offset = type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b); + } else if (is_b_leaf) { + max_offset = type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a); + } + // *+#=E + max_offset = NC_ERR; - auto csr_permutation_it = thrust::make_zip_iterator( - thrust::make_permutation_iterator(unique_node_ids.begin(), sorted_column_levels_order.begin()), - thrust::make_permutation_iterator(unique_col_ids.begin(), sorted_column_levels_order.begin()), - thrust::make_permutation_iterator(max_row_offsets.begin(), sorted_column_levels_order.begin()), - thrust::make_permutation_iterator(column_categories.begin(), - sorted_column_levels_order.begin())); - thrust::copy(rmm::exec_policy(stream), - csr_permutation_it, - csr_permutation_it + num_columns, - thrust::make_zip_iterator(csr_unique_node_ids.begin(), - csr_unique_col_ids.begin(), - csr_max_row_offsets.begin(), - csr_column_categories.begin())); + thrust::maximum row_offset_op; + return thrust::make_tuple(row_offset_op(row_offset_a, row_offset_b), max_offset); + }); - // 4. unique_copy parent_node_ids, ranges - rmm::device_uvector csr_parent_col_ids(num_columns, stream); - rmm::device_uvector csr_col_range_begin(num_columns, stream); // Field names - rmm::device_uvector csr_col_range_end(num_columns, stream); + // 4. construct parent_col_ids using permutation iterator + rmm::device_uvector parent_col_ids(num_columns, stream); thrust::copy_n( rmm::exec_policy(stream), - thrust::make_zip_iterator( - thrust::make_permutation_iterator(tree.parent_node_ids.begin(), csr_unique_node_ids.begin()), - thrust::make_permutation_iterator(tree.node_range_begin.begin(), csr_unique_node_ids.begin()), - thrust::make_permutation_iterator(tree.node_range_end.begin(), csr_unique_node_ids.begin())), - csr_unique_node_ids.size(), - thrust::make_zip_iterator( - csr_parent_col_ids.begin(), csr_col_range_begin.begin(), csr_col_range_end.begin())); - - // convert parent_node_ids to parent_col_ids - thrust::transform( - rmm::exec_policy(stream), - csr_parent_col_ids.begin(), - csr_parent_col_ids.end(), - csr_parent_col_ids.begin(), - [col_ids = original_col_ids.begin()] __device__(auto parent_node_id) -> size_type { - return parent_node_id == parent_node_sentinel ? parent_node_sentinel - : col_ids[parent_node_id]; - }); + thrust::make_permutation_iterator(tree.parent_node_ids.begin(), level_ordered_unique_node_ids.begin()), + num_columns, + thrust::make_transform_output_iterator(parent_col_ids.begin(), + [col_ids = col_ids.begin(), rev_mapped_col_ids_it] __device__(auto parent_node_id) -> NodeIndexT { + return parent_node_id == parent_node_sentinel ? parent_node_sentinel : rev_mapped_col_ids_it[col_ids[parent_node_id]]; + })); /* - CSR construction: - 1. Sort column levels and get their ordering - 2. For each column node coln iterated according to sorted_column_levels; do - a. Find nodes that have coln as the parent node -> set adj_coln - b. row idx[coln] = size of adj_coln + 1 - c. col idx[coln] = adj_coln U {parent_col_id[coln]} + 5. CSR construction: + a. Sort column levels and get their ordering + b. For each column node coln iterated according to sorted_column_levels; do + i. Find nodes that have coln as the parent node -> set adj_coln + ii. row idx[coln] = size of adj_coln + 1 + iii. col idx[coln] = adj_coln U {parent_col_id[coln]} */ rmm::device_uvector rowidx(num_columns + 1, stream); @@ -196,28 +180,16 @@ std::tuple> reduce_to_column_tre // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) // children adjacency auto num_non_leaf_columns = thrust::unique_count( - rmm::exec_policy(stream), csr_parent_col_ids.begin() + 1, csr_parent_col_ids.end()); + rmm::exec_policy(stream), parent_col_ids.begin() + 1, parent_col_ids.end()); thrust::reduce_by_key(rmm::exec_policy(stream), - csr_parent_col_ids.begin() + 1, - csr_parent_col_ids.end(), + parent_col_ids.begin() + 1, + parent_col_ids.end(), thrust::make_constant_iterator(1), thrust::make_discard_iterator(), rowidx.begin() + 1, thrust::equal_to()); thrust::inclusive_scan( rmm::exec_policy(stream), rowidx.begin() + 1, rowidx.end(), rowidx.begin() + 1); - // overwrite the csr_parent_col_ids with the col ids in the csr tree - thrust::fill(rmm::exec_policy(stream), csr_parent_col_ids.begin(), csr_parent_col_ids.end(), -1); - thrust::scatter(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(0) + num_non_leaf_columns, - rowidx.begin(), - csr_parent_col_ids.begin() + 1); - thrust::inclusive_scan(rmm::exec_policy(stream), - csr_parent_col_ids.begin(), - csr_parent_col_ids.end(), - csr_parent_col_ids.begin(), - thrust::maximum{}); // We are discarding the parent of the root node. Add the parent adjacency. Since we have already // performed the scan, we use a counting iterator to add thrust::transform(rmm::exec_policy(stream), @@ -228,32 +200,31 @@ std::tuple> reduce_to_column_tre thrust::plus()); rmm::device_uvector colidx((num_columns - 1) * 2, stream); - thrust::fill(rmm::exec_policy(stream), colidx.begin(), colidx.end(), 0); + // Skip the parent of root node thrust::scatter(rmm::exec_policy(stream), - csr_parent_col_ids.begin() + 1, - csr_parent_col_ids.end(), + parent_col_ids.begin() + 1, + parent_col_ids.end(), rowidx.begin() + 1, colidx.begin()); - // excluding root node + // excluding root node, construct scatter map rmm::device_uvector map(num_columns - 1, stream); - thrust::fill(rmm::exec_policy(stream), map.begin(), map.end(), 1); thrust::inclusive_scan_by_key(rmm::exec_policy(stream), - csr_parent_col_ids.begin() + 1, - csr_parent_col_ids.end(), - map.begin(), + parent_col_ids.begin() + 1, + parent_col_ids.end(), + thrust::make_constant_iterator(1), map.begin()); - thrust::for_each(rmm::exec_policy(stream), + thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(1), - thrust::make_counting_iterator(1) + num_columns - 1, + num_columns - 1, [rowidx = rowidx.begin(), map = map.begin(), - csr_parent_col_ids = csr_parent_col_ids.begin()] __device__(auto i) { - auto csr_parent_col_id = csr_parent_col_ids[i]; - if (csr_parent_col_id == 0) + parent_col_ids = parent_col_ids.begin()] __device__(auto i) { + auto parent_col_id = parent_col_ids[i]; + if (parent_col_id == 0) map[i - 1]--; else - map[i - 1] += rowidx[csr_parent_col_id]; + map[i - 1] += rowidx[parent_col_id]; }); thrust::scatter(rmm::exec_policy(stream), thrust::make_counting_iterator(1), @@ -281,6 +252,8 @@ std::tuple> reduce_to_column_tre list_parents_children_max_row_offsets.begin(), list_parents_children_max_row_offsets.end(), 0); + auto list_nodes = thrust::make_permutation_iterator + thrust::for_each(rmm::exec_policy(stream), csr_unique_col_ids.begin(), csr_unique_col_ids.end(), diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 20019a703c9..1e617240159 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -187,15 +187,28 @@ struct device_json_column { }; namespace experimental { -struct column_tree_csr { +/* + * @brief Unvalidated column tree stored in Compressed Sparse Row (CSR) format. The device json column + * subtree - the subgraph that conforms to column tree properties - is extracted and further processed + * according to the JSON reader options passed. Only the final processed subgraph is annotated with information + * required to construct cuDF columns. + */ +struct column_tree { // position of nnzs rmm::device_uvector rowidx; rmm::device_uvector colidx; - // node properties - rmm::device_uvector column_ids; - rmm::device_uvector categories; - rmm::device_uvector range_begin; - rmm::device_uvector range_end; + // device_json_column properties + using row_offset_t = size_type; + // Indicator array for the device column subtree + // Stores the number of rows in the column if the node is part of device column subtree + // Stores zero otherwise + rmm::device_uvector subtree_nrows; + rmm::device_uvector string_offsets; + rmm::device_uvector string_lengths; + // Row offsets + rmm::device_uvector child_offsets; + // Validity bitmap + rmm::device_buffer validity; }; namespace detail { diff --git a/cpp/tests/io/json_tree_csr.cu b/cpp/tests/io/json_tree_csr.cu index 12e92551521..594d7dde640 100644 --- a/cpp/tests/io/json_tree_csr.cu +++ b/cpp/tests/io/json_tree_csr.cu @@ -185,6 +185,7 @@ TEST_F(JsonColumnTreeTests, SimpleLines) stream); auto iseq = check_equality(d_column_tree, d_column_tree_csr, stream); + std::cout << "iseq = " << iseq << std::endl; // assert equality between csr and meta formats assert(iseq == true); } From d05e670916cf4ece469c2c76c8eedcafd93b9a65 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 30 Jul 2024 22:07:34 +0000 Subject: [PATCH 12/46] better csr construction --- cpp/src/io/json/json_column_csr.cu | 199 +++++++++-------------- cpp/src/io/json/nested_json.hpp | 19 ++- cpp/tests/CMakeLists.txt | 1 + cpp/tests/io/{ => json}/json_tree_csr.cu | 50 +++--- 4 files changed, 121 insertions(+), 148 deletions(-) rename cpp/tests/io/{ => json}/json_tree_csr.cu (81%) diff --git a/cpp/src/io/json/json_column_csr.cu b/cpp/src/io/json/json_column_csr.cu index 866820fa67c..3bec413435c 100644 --- a/cpp/src/io/json/json_column_csr.cu +++ b/cpp/src/io/json/json_column_csr.cu @@ -48,19 +48,15 @@ #include #include #include +#include #include +#include + namespace cudf::io::json::experimental::detail { using row_offset_t = size_type; -struct unvalidated_column_tree { - rmm::device_uvector rowidx; - rmm::device_uvector colidx; - rmm::device_uvector max_row_offsets; - rmm::device_uvector column_categories; -}; - struct level_ordering { device_span node_levels; device_span col_ids; @@ -71,6 +67,24 @@ struct level_ordering { } }; +struct parent_nodeids_to_colids { + device_span col_ids; + device_span rev_mapped_col_ids; + __device__ auto operator()(NodeIndexT parent_node_id) -> NodeIndexT { + return parent_node_id == parent_node_sentinel ? parent_node_sentinel : rev_mapped_col_ids[col_ids[parent_node_id]]; + } +}; + +template +void print(device_span d_vec, std::string name, rmm::cuda_stream_view stream) { + auto h_vec = cudf::detail::make_std_vector_async(d_vec, stream); + std::cout << name << " = "; + for(auto e : h_vec) { + std::cout << e << " "; + } + std::cout << std::endl; +} + /** * @brief Reduces node tree representation to column tree CSR representation. * @@ -85,10 +99,10 @@ struct level_ordering { * @return A tuple of column tree representation of JSON string, column ids of columns, and * max row offsets of columns */ -unvalidated_column_tree reduce_to_column_tree_csr( +std::tuple reduce_to_column_tree( tree_meta_t& tree, device_span col_ids, - device_span row_offsets, + device_span row_offsets, bool is_array_of_arrays, NodeIndexT const row_array_parent_col_id, rmm::cuda_stream_view stream) @@ -109,13 +123,19 @@ unvalidated_column_tree reduce_to_column_tree_csr( thrust::unique_count(rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_col_ids.end()); rmm::device_uvector level_ordered_unique_node_ids(num_columns, stream); rmm::device_uvector mapped_col_ids(num_columns, stream); + rmm::device_uvector rev_mapped_col_ids(num_columns, stream); thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_node_ids.end(), level_ordered_node_ids.begin(), mapped_col_ids.begin(), level_ordered_unique_node_ids.begin()); - auto rev_mapped_col_ids_it = thrust::make_permutation_iterator(thrust::make_counting_iterator(0), mapped_col_ids.begin()); + auto *dev_num_levels_ptr = thrust::max_element(rmm::exec_policy(stream), tree.node_levels.begin(), tree.node_levels.end()); + + rmm::device_uvector mapped_col_ids_copy(num_columns, stream); + thrust::copy(rmm::exec_policy(stream), mapped_col_ids.begin(), mapped_col_ids.end(), mapped_col_ids_copy.begin()); + thrust::sequence(rmm::exec_policy(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end()); + thrust::sort_by_key(rmm::exec_policy(stream), mapped_col_ids_copy.begin(), mapped_col_ids_copy.end(), rev_mapped_col_ids.begin()); // 2. maximum number of rows per column: computed with reduce_by_key {col_id}, {row_offset}, max. // 3. category for each column node by aggregating all nodes in node tree corresponding to same column: // reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) - rmm::device_uvector max_row_offsets(num_columns, stream); + rmm::device_uvector max_row_offsets(num_columns, stream); rmm::device_uvector column_categories(num_columns, stream); auto ordered_row_offsets = thrust::make_permutation_iterator(row_offsets.begin(), level_ordered_node_ids.begin()); @@ -133,37 +153,34 @@ unvalidated_column_tree reduce_to_column_tree_csr( auto type_a = thrust::get<1>(a); auto type_b = thrust::get<1>(b); - NodeT max_offset; + NodeT ctg; auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) // *+*=*, v+v=v if (type_a == type_b) { - max_offset = type_a; + ctg = type_a; } else if (is_a_leaf) { // *+v=*, N+V=N // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR - max_offset = type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b); + ctg = (type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b)); } else if (is_b_leaf) { - max_offset = type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a); + ctg = (type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a)); } - // *+#=E - max_offset = NC_ERR; + else ctg = NC_ERR; thrust::maximum row_offset_op; - return thrust::make_tuple(row_offset_op(row_offset_a, row_offset_b), max_offset); + return thrust::make_tuple(row_offset_op(row_offset_a, row_offset_b), ctg); }); // 4. construct parent_col_ids using permutation iterator rmm::device_uvector parent_col_ids(num_columns, stream); + thrust::transform_output_iterator parent_col_ids_it(parent_col_ids.begin(), parent_nodeids_to_colids{col_ids, rev_mapped_col_ids}); thrust::copy_n( rmm::exec_policy(stream), thrust::make_permutation_iterator(tree.parent_node_ids.begin(), level_ordered_unique_node_ids.begin()), num_columns, - thrust::make_transform_output_iterator(parent_col_ids.begin(), - [col_ids = col_ids.begin(), rev_mapped_col_ids_it] __device__(auto parent_node_id) -> NodeIndexT { - return parent_node_id == parent_node_sentinel ? parent_node_sentinel : rev_mapped_col_ids_it[col_ids[parent_node_id]]; - })); + parent_col_ids_it); /* 5. CSR construction: @@ -176,7 +193,6 @@ unvalidated_column_tree reduce_to_column_tree_csr( rmm::device_uvector rowidx(num_columns + 1, stream); thrust::fill(rmm::exec_policy(stream), rowidx.begin(), rowidx.end(), 0); - // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) // children adjacency auto num_non_leaf_columns = thrust::unique_count( @@ -188,19 +204,18 @@ unvalidated_column_tree reduce_to_column_tree_csr( thrust::make_discard_iterator(), rowidx.begin() + 1, thrust::equal_to()); - thrust::inclusive_scan( - rmm::exec_policy(stream), rowidx.begin() + 1, rowidx.end(), rowidx.begin() + 1); - // We are discarding the parent of the root node. Add the parent adjacency. Since we have already - // performed the scan, we use a counting iterator to add - thrust::transform(rmm::exec_policy(stream), - rowidx.begin() + 2, - rowidx.end(), - thrust::make_counting_iterator(1), - rowidx.begin() + 2, - thrust::plus()); + thrust::transform_inclusive_scan(rmm::exec_policy(stream), + thrust::make_zip_iterator(thrust::make_counting_iterator(1), rowidx.begin() + 1), + thrust::make_zip_iterator(thrust::make_counting_iterator(1) + num_columns, rowidx.end()), + rowidx.begin() + 1, + cuda::proclaim_return_type([] __device__(auto a) { + auto n = thrust::get<0>(a); + auto idx = thrust::get<1>(a); + return n == 1 ? idx : idx + 1; + }), + thrust::plus{}); rmm::device_uvector colidx((num_columns - 1) * 2, stream); - // Skip the parent of root node thrust::scatter(rmm::exec_policy(stream), parent_col_ids.begin() + 1, @@ -232,102 +247,44 @@ unvalidated_column_tree reduce_to_column_tree_csr( map.begin(), colidx.begin()); - // condition is true if parent is not a list, or sentinel/root - // Special case to return true if parent is a list and is_array_of_arrays is true - auto is_non_list_parent = [column_categories = column_categories.begin(), - is_array_of_arrays, - row_array_parent_col_id] __device__(auto parent_col_id) -> bool { - return !(parent_col_id == parent_node_sentinel || - column_categories[parent_col_id] == NC_LIST && - (!is_array_of_arrays || parent_col_id != row_array_parent_col_id)); - }; // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. // create list's children max_row_offsets array. (initialize to zero) // atomicMax on children max_row_offsets array. // gather the max_row_offsets from children row offset array. { - rmm::device_uvector list_parents_children_max_row_offsets(num_columns, stream); - thrust::fill(rmm::exec_policy(stream), - list_parents_children_max_row_offsets.begin(), - list_parents_children_max_row_offsets.end(), - 0); - auto list_nodes = thrust::make_permutation_iterator + auto max_row_offsets_it = thrust::make_permutation_iterator(max_row_offsets.begin(), colidx.begin()); + rmm::device_uvector max_children_max_row_offsets(num_columns, stream); + size_t temp_storage_bytes = 0; + cub::DeviceSegmentedReduce::Max(nullptr, temp_storage_bytes, max_row_offsets_it, max_children_max_row_offsets.begin(), num_columns, rowidx.begin(), rowidx.begin() + 1, stream.value()); + rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); + cub::DeviceSegmentedReduce::Max(d_temp_storage.data(), temp_storage_bytes, max_row_offsets_it, max_children_max_row_offsets.begin(), num_columns, rowidx.begin(), rowidx.begin() + 1, stream.value()); - thrust::for_each(rmm::exec_policy(stream), - csr_unique_col_ids.begin(), - csr_unique_col_ids.end(), - [csr_column_categories = csr_column_categories.begin(), - csr_parent_col_ids = csr_parent_col_ids.begin(), - csr_max_row_offsets = csr_max_row_offsets.begin(), - list_parents_children_max_row_offsets = - list_parents_children_max_row_offsets.begin()] __device__(auto col_id) { - auto csr_parent_col_id = csr_parent_col_ids[col_id]; - if (csr_parent_col_id != parent_node_sentinel and - csr_column_categories[csr_parent_col_id] == node_t::NC_LIST) { - cuda::atomic_ref ref{ - *(list_parents_children_max_row_offsets + csr_parent_col_id)}; - ref.fetch_max(csr_max_row_offsets[col_id], - cuda::std::memory_order_relaxed); - } - }); - thrust::gather_if( - rmm::exec_policy(stream), - csr_parent_col_ids.begin(), - csr_parent_col_ids.end(), - csr_parent_col_ids.begin(), - list_parents_children_max_row_offsets.begin(), - csr_max_row_offsets.begin(), - [csr_column_categories = csr_column_categories.begin()] __device__(size_type parent_col_id) { - return parent_col_id != parent_node_sentinel and - csr_column_categories[parent_col_id] == node_t::NC_LIST; - }); + rmm::device_uvector list_ancestors(num_columns, stream); + thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), num_columns, + [rowidx = rowidx.begin(), + colidx = colidx.begin(), + column_categories = column_categories.begin(), + dev_num_levels_ptr, + list_ancestors = list_ancestors.begin()] __device__(NodeIndexT node) { + auto num_levels = *dev_num_levels_ptr; + list_ancestors[node] = node; + for(int level = 0; level < num_levels; level++) { + if(list_ancestors[node] > 0) + list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; + else list_ancestors[node] = -1; + if(list_ancestors[node] == -1 || column_categories[list_ancestors[node]] == NC_LIST) break; + } + }); + thrust::gather_if(rmm::exec_policy(stream), list_ancestors.begin(), list_ancestors.end(), list_ancestors.begin(), + max_children_max_row_offsets.begin(), max_row_offsets.begin(), + [] __device__(auto ancestor) { + return ancestor != -1; + }); } - // copy lists' max_row_offsets to children. - // all structs should have same size. - thrust::transform_if( - rmm::exec_policy(stream), - csr_unique_col_ids.begin(), - csr_unique_col_ids.end(), - csr_max_row_offsets.begin(), - [csr_column_categories = csr_column_categories.begin(), - is_non_list_parent, - csr_parent_col_ids = csr_parent_col_ids.begin(), - csr_max_row_offsets = csr_max_row_offsets.begin()] __device__(size_type col_id) { - auto parent_col_id = csr_parent_col_ids[col_id]; - // condition is true if parent is not a list, or sentinel/root - while (is_non_list_parent(parent_col_id)) { - col_id = parent_col_id; - parent_col_id = csr_parent_col_ids[parent_col_id]; - } - return csr_max_row_offsets[col_id]; - }, - [csr_column_categories = csr_column_categories.begin(), - is_non_list_parent, - parent_col_ids = csr_parent_col_ids.begin()] __device__(size_type col_id) { - auto parent_col_id = parent_col_ids[col_id]; - // condition is true if parent is not a list, or sentinel/root - return is_non_list_parent(parent_col_id); - }); - - // For Struct and List (to avoid copying entire strings when mixed type as string is enabled) - thrust::transform_if( - rmm::exec_policy(stream), - csr_col_range_begin.begin(), - csr_col_range_begin.end(), - csr_column_categories.begin(), - csr_col_range_end.begin(), - [] __device__(auto i) { return i + 1; }, - [] __device__(NodeT type) { return type == NC_STRUCT || type == NC_LIST; }); - - return std::tuple{column_tree_csr{std::move(rowidx), - std::move(colidx), - std::move(csr_unique_col_ids), - std::move(csr_column_categories), - std::move(csr_col_range_begin), - std::move(csr_col_range_end)}, - std::move(csr_max_row_offsets)}; + return std::tuple{csr{std::move(rowidx), std::move(colidx)}, + column_tree_properties{std::move(column_categories), std::move(max_row_offsets), std::move(mapped_col_ids)}}; } } // namespace cudf::io::json::experimental::detail diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 1e617240159..1d8f24af2fe 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -187,6 +187,20 @@ struct device_json_column { }; namespace experimental { +/* + * @brief Sparse graph adjacency matrix stored in Compressed Sparse Row (CSR) format. + */ +struct csr { + rmm::device_uvector rowidx; + rmm::device_uvector colidx; +}; + +struct column_tree_properties { + rmm::device_uvector categories; + rmm::device_uvector max_row_offsets; + rmm::device_uvector mapped_ids; +}; + /* * @brief Unvalidated column tree stored in Compressed Sparse Row (CSR) format. The device json column * subtree - the subgraph that conforms to column tree properties - is extracted and further processed @@ -195,6 +209,7 @@ namespace experimental { */ struct column_tree { // position of nnzs + csr adjacency; rmm::device_uvector rowidx; rmm::device_uvector colidx; // device_json_column properties @@ -223,11 +238,9 @@ namespace detail { * in each column */ -std::tuple> reduce_to_column_tree_csr( +std::tuple reduce_to_column_tree( tree_meta_t& tree, device_span original_col_ids, - device_span sorted_col_ids, - device_span ordered_node_ids, device_span row_offsets, bool is_array_of_arrays, NodeIndexT const row_array_parent_col_id, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 88187623930..a36facc7de7 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -325,6 +325,7 @@ ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp) ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp) ConfigureTest(JSON_QUOTE_NORMALIZATION io/json/json_quote_normalization_test.cpp) ConfigureTest(JSON_WHITESPACE_NORMALIZATION io/json/json_whitespace_normalization_test.cu) +ConfigureTest(JSON_TREE_CSR io/json/json_tree_csr.cu) ConfigureTest( DATA_CHUNK_SOURCE_TEST io/text/data_chunk_source_test.cpp GPUS 1 diff --git a/cpp/tests/io/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu similarity index 81% rename from cpp/tests/io/json_tree_csr.cu rename to cpp/tests/io/json/json_tree_csr.cu index 594d7dde640..18e4cca136e 100644 --- a/cpp/tests/io/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -45,15 +45,13 @@ struct h_tree_meta_t { std::vector node_range_end; }; -struct h_column_tree_csr { +struct h_column_tree { // position of nnzs std::vector rowidx; std::vector colidx; // node properties - std::vector column_ids; std::vector categories; - std::vector range_begin; - std::vector range_end; + std::vector column_ids; }; template @@ -66,7 +64,8 @@ void print(std::string str, std::vector& vec) } bool check_equality(cuio_json::tree_meta_t& d_a, - cuio_json::experimental::column_tree_csr& d_b, + cuio_json::experimental::csr& d_b_csr, + cuio_json::experimental::column_tree_properties& d_b_ctp, rmm::cuda_stream_view stream) { // convert from tree_meta_t to column_tree_csr @@ -75,34 +74,40 @@ bool check_equality(cuio_json::tree_meta_t& d_a, cudf::detail::make_std_vector_async(d_a.node_range_begin, stream), cudf::detail::make_std_vector_async(d_a.node_range_end, stream)}; - h_column_tree_csr b{cudf::detail::make_std_vector_async(d_b.rowidx, stream), - cudf::detail::make_std_vector_async(d_b.colidx, stream), - cudf::detail::make_std_vector_async(d_b.column_ids, stream), - cudf::detail::make_std_vector_async(d_b.categories, stream), - cudf::detail::make_std_vector_async(d_b.range_begin, stream), - cudf::detail::make_std_vector_async(d_b.range_end, stream)}; + h_column_tree b{cudf::detail::make_std_vector_async(d_b_csr.rowidx, stream), + cudf::detail::make_std_vector_async(d_b_csr.colidx, stream), + cudf::detail::make_std_vector_async(d_b_ctp.categories, stream), + cudf::detail::make_std_vector_async(d_b_ctp.mapped_ids, stream)}; stream.synchronize(); auto num_nodes = a.parent_node_ids.size(); - if (b.rowidx.size() != num_nodes + 1) return false; + if (b.rowidx.size() != num_nodes + 1) { + return false; + } for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { auto v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) return false; + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { + return false; + } } for (size_t u = 1; u < num_nodes; u++) { auto v = b.colidx[b.rowidx[u]]; - if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) return false; + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { + return false; + } for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) return false; + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { + return false; + } } } for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) return false; - if (a.node_range_begin[b.column_ids[u]] != b.range_begin[u]) return false; - if (a.node_range_end[b.column_ids[u]] != b.range_end[u]) return false; + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { + return false; + } } return true; } @@ -174,18 +179,15 @@ TEST_F(JsonColumnTreeTests, SimpleLines) row_array_parent_col_id, stream); - auto [d_column_tree_csr, d_max_row_offsets_csr] = - cudf::io::json::experimental::detail::reduce_to_column_tree_csr(gpu_tree, + auto [d_column_tree_csr, d_column_tree_properties] = + cudf::io::json::experimental::detail::reduce_to_column_tree(gpu_tree, gpu_col_id, - sorted_col_ids, - node_ids, gpu_row_offsets, false, row_array_parent_col_id, stream); - auto iseq = check_equality(d_column_tree, d_column_tree_csr, stream); - std::cout << "iseq = " << iseq << std::endl; + auto iseq = check_equality(d_column_tree, d_column_tree_csr, d_column_tree_properties, stream); // assert equality between csr and meta formats assert(iseq == true); } From 1ce88be731323aef901bcea5cb0049bd1ff9f5a0 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 30 Jul 2024 22:08:45 +0000 Subject: [PATCH 13/46] formatting --- cpp/src/io/json/json_column_csr.cu | 259 +++++++++++++++++------------ cpp/src/io/json/nested_json.hpp | 10 +- cpp/tests/io/json/json_tree_csr.cu | 34 ++-- 3 files changed, 170 insertions(+), 133 deletions(-) diff --git a/cpp/src/io/json/json_column_csr.cu b/cpp/src/io/json/json_column_csr.cu index 3bec413435c..98edf6faf33 100644 --- a/cpp/src/io/json/json_column_csr.cu +++ b/cpp/src/io/json/json_column_csr.cu @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -51,8 +52,6 @@ #include #include -#include - namespace cudf::io::json::experimental::detail { using row_offset_t = size_type; @@ -63,23 +62,27 @@ struct level_ordering { __device__ bool operator()(NodeIndexT lhs_node_id, NodeIndexT rhs_node_id) const { return (node_levels[lhs_node_id] < node_levels[rhs_node_id]) || - (node_levels[lhs_node_id] == node_levels[rhs_node_id] && col_ids[lhs_node_id] < col_ids[rhs_node_id]); + (node_levels[lhs_node_id] == node_levels[rhs_node_id] && + col_ids[lhs_node_id] < col_ids[rhs_node_id]); } }; struct parent_nodeids_to_colids { device_span col_ids; device_span rev_mapped_col_ids; - __device__ auto operator()(NodeIndexT parent_node_id) -> NodeIndexT { - return parent_node_id == parent_node_sentinel ? parent_node_sentinel : rev_mapped_col_ids[col_ids[parent_node_id]]; + __device__ auto operator()(NodeIndexT parent_node_id) -> NodeIndexT + { + return parent_node_id == parent_node_sentinel ? parent_node_sentinel + : rev_mapped_col_ids[col_ids[parent_node_id]]; } }; template -void print(device_span d_vec, std::string name, rmm::cuda_stream_view stream) { +void print(device_span d_vec, std::string name, rmm::cuda_stream_view stream) +{ auto h_vec = cudf::detail::make_std_vector_async(d_vec, stream); std::cout << name << " = "; - for(auto e : h_vec) { + for (auto e : h_vec) { std::cout << e << " "; } std::cout << std::endl; @@ -111,76 +114,99 @@ std::tuple reduce_to_column_tree( rmm::device_uvector level_ordered_col_ids(col_ids.size(), stream); rmm::device_uvector level_ordered_node_ids(col_ids.size(), stream); - thrust::copy(rmm::exec_policy_nosync(stream), col_ids.begin(), col_ids.end(), level_ordered_col_ids.begin()); - thrust::sequence(rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), level_ordered_node_ids.end()); + thrust::copy( + rmm::exec_policy_nosync(stream), col_ids.begin(), col_ids.end(), level_ordered_col_ids.begin()); + thrust::sequence( + rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), level_ordered_node_ids.end()); // Reorder nodes and column ids in level-wise fashion - thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), level_ordered_node_ids.end(), - level_ordered_col_ids.begin(), level_ordering{tree.node_levels, col_ids}); + thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), + level_ordered_node_ids.begin(), + level_ordered_node_ids.end(), + level_ordered_col_ids.begin(), + level_ordering{tree.node_levels, col_ids}); - // 1. get the number of columns in tree, mapping between node tree col ids and csr col ids, and the node id of first row in each column - auto const num_columns = - thrust::unique_count(rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_col_ids.end()); + // 1. get the number of columns in tree, mapping between node tree col ids and csr col ids, and + // the node id of first row in each column + auto const num_columns = thrust::unique_count( + rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_col_ids.end()); rmm::device_uvector level_ordered_unique_node_ids(num_columns, stream); rmm::device_uvector mapped_col_ids(num_columns, stream); rmm::device_uvector rev_mapped_col_ids(num_columns, stream); - thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_node_ids.end(), level_ordered_node_ids.begin(), mapped_col_ids.begin(), level_ordered_unique_node_ids.begin()); - auto *dev_num_levels_ptr = thrust::max_element(rmm::exec_policy(stream), tree.node_levels.begin(), tree.node_levels.end()); + thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream), + level_ordered_col_ids.begin(), + level_ordered_node_ids.end(), + level_ordered_node_ids.begin(), + mapped_col_ids.begin(), + level_ordered_unique_node_ids.begin()); + auto* dev_num_levels_ptr = + thrust::max_element(rmm::exec_policy(stream), tree.node_levels.begin(), tree.node_levels.end()); rmm::device_uvector mapped_col_ids_copy(num_columns, stream); - thrust::copy(rmm::exec_policy(stream), mapped_col_ids.begin(), mapped_col_ids.end(), mapped_col_ids_copy.begin()); + thrust::copy(rmm::exec_policy(stream), + mapped_col_ids.begin(), + mapped_col_ids.end(), + mapped_col_ids_copy.begin()); thrust::sequence(rmm::exec_policy(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end()); - thrust::sort_by_key(rmm::exec_policy(stream), mapped_col_ids_copy.begin(), mapped_col_ids_copy.end(), rev_mapped_col_ids.begin()); + thrust::sort_by_key(rmm::exec_policy(stream), + mapped_col_ids_copy.begin(), + mapped_col_ids_copy.end(), + rev_mapped_col_ids.begin()); // 2. maximum number of rows per column: computed with reduce_by_key {col_id}, {row_offset}, max. - // 3. category for each column node by aggregating all nodes in node tree corresponding to same column: + // 3. category for each column node by aggregating all nodes in node tree corresponding to same + // column: // reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) rmm::device_uvector max_row_offsets(num_columns, stream); rmm::device_uvector column_categories(num_columns, stream); auto ordered_row_offsets = thrust::make_permutation_iterator(row_offsets.begin(), level_ordered_node_ids.begin()); - auto ordered_node_categories = thrust::make_permutation_iterator(tree.node_categories.begin(), level_ordered_node_ids.begin()); - thrust::reduce_by_key(rmm::exec_policy(stream), - level_ordered_col_ids.begin(), - level_ordered_col_ids.end(), - thrust::make_zip_iterator(thrust::make_tuple(ordered_row_offsets, ordered_node_categories)), - thrust::make_discard_iterator(), - thrust::make_zip_iterator(thrust::make_tuple(max_row_offsets.begin(), column_categories.begin())), - thrust::equal_to(), - [] __device__(auto a, auto b) { - auto row_offset_a = thrust::get<0>(a); - auto row_offset_b = thrust::get<0>(b); - auto type_a = thrust::get<1>(a); - auto type_b = thrust::get<1>(b); - - NodeT ctg; - auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); - auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); - // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) - // *+*=*, v+v=v - if (type_a == type_b) { - ctg = type_a; - } else if (is_a_leaf) { - // *+v=*, N+V=N - // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR - ctg = (type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b)); - } else if (is_b_leaf) { - ctg = (type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a)); - } - else ctg = NC_ERR; + auto ordered_node_categories = + thrust::make_permutation_iterator(tree.node_categories.begin(), level_ordered_node_ids.begin()); + thrust::reduce_by_key( + rmm::exec_policy(stream), + level_ordered_col_ids.begin(), + level_ordered_col_ids.end(), + thrust::make_zip_iterator(thrust::make_tuple(ordered_row_offsets, ordered_node_categories)), + thrust::make_discard_iterator(), + thrust::make_zip_iterator( + thrust::make_tuple(max_row_offsets.begin(), column_categories.begin())), + thrust::equal_to(), + [] __device__(auto a, auto b) { + auto row_offset_a = thrust::get<0>(a); + auto row_offset_b = thrust::get<0>(b); + auto type_a = thrust::get<1>(a); + auto type_b = thrust::get<1>(b); + + NodeT ctg; + auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); + auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); + // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) + // *+*=*, v+v=v + if (type_a == type_b) { + ctg = type_a; + } else if (is_a_leaf) { + // *+v=*, N+V=N + // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR + ctg = (type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b)); + } else if (is_b_leaf) { + ctg = (type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a)); + } else + ctg = NC_ERR; - thrust::maximum row_offset_op; - return thrust::make_tuple(row_offset_op(row_offset_a, row_offset_b), ctg); - }); + thrust::maximum row_offset_op; + return thrust::make_tuple(row_offset_op(row_offset_a, row_offset_b), ctg); + }); // 4. construct parent_col_ids using permutation iterator rmm::device_uvector parent_col_ids(num_columns, stream); - thrust::transform_output_iterator parent_col_ids_it(parent_col_ids.begin(), parent_nodeids_to_colids{col_ids, rev_mapped_col_ids}); - thrust::copy_n( - rmm::exec_policy(stream), - thrust::make_permutation_iterator(tree.parent_node_ids.begin(), level_ordered_unique_node_ids.begin()), - num_columns, - parent_col_ids_it); + thrust::transform_output_iterator parent_col_ids_it( + parent_col_ids.begin(), parent_nodeids_to_colids{col_ids, rev_mapped_col_ids}); + thrust::copy_n(rmm::exec_policy(stream), + thrust::make_permutation_iterator(tree.parent_node_ids.begin(), + level_ordered_unique_node_ids.begin()), + num_columns, + parent_col_ids_it); /* 5. CSR construction: @@ -204,16 +230,17 @@ std::tuple reduce_to_column_tree( thrust::make_discard_iterator(), rowidx.begin() + 1, thrust::equal_to()); - thrust::transform_inclusive_scan(rmm::exec_policy(stream), - thrust::make_zip_iterator(thrust::make_counting_iterator(1), rowidx.begin() + 1), - thrust::make_zip_iterator(thrust::make_counting_iterator(1) + num_columns, rowidx.end()), - rowidx.begin() + 1, - cuda::proclaim_return_type([] __device__(auto a) { - auto n = thrust::get<0>(a); - auto idx = thrust::get<1>(a); - return n == 1 ? idx : idx + 1; - }), - thrust::plus{}); + thrust::transform_inclusive_scan( + rmm::exec_policy(stream), + thrust::make_zip_iterator(thrust::make_counting_iterator(1), rowidx.begin() + 1), + thrust::make_zip_iterator(thrust::make_counting_iterator(1) + num_columns, rowidx.end()), + rowidx.begin() + 1, + cuda::proclaim_return_type([] __device__(auto a) { + auto n = thrust::get<0>(a); + auto idx = thrust::get<1>(a); + return n == 1 ? idx : idx + 1; + }), + thrust::plus{}); rmm::device_uvector colidx((num_columns - 1) * 2, stream); // Skip the parent of root node @@ -226,21 +253,21 @@ std::tuple reduce_to_column_tree( rmm::device_uvector map(num_columns - 1, stream); thrust::inclusive_scan_by_key(rmm::exec_policy(stream), parent_col_ids.begin() + 1, - parent_col_ids.end(), + parent_col_ids.end(), thrust::make_constant_iterator(1), map.begin()); thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(1), - num_columns - 1, - [rowidx = rowidx.begin(), - map = map.begin(), - parent_col_ids = parent_col_ids.begin()] __device__(auto i) { - auto parent_col_id = parent_col_ids[i]; - if (parent_col_id == 0) - map[i - 1]--; - else - map[i - 1] += rowidx[parent_col_id]; - }); + thrust::make_counting_iterator(1), + num_columns - 1, + [rowidx = rowidx.begin(), + map = map.begin(), + parent_col_ids = parent_col_ids.begin()] __device__(auto i) { + auto parent_col_id = parent_col_ids[i]; + if (parent_col_id == 0) + map[i - 1]--; + else + map[i - 1] += rowidx[parent_col_id]; + }); thrust::scatter(rmm::exec_policy(stream), thrust::make_counting_iterator(1), thrust::make_counting_iterator(1) + num_columns - 1, @@ -253,38 +280,62 @@ std::tuple reduce_to_column_tree( // atomicMax on children max_row_offsets array. // gather the max_row_offsets from children row offset array. { - auto max_row_offsets_it = thrust::make_permutation_iterator(max_row_offsets.begin(), colidx.begin()); + auto max_row_offsets_it = + thrust::make_permutation_iterator(max_row_offsets.begin(), colidx.begin()); rmm::device_uvector max_children_max_row_offsets(num_columns, stream); size_t temp_storage_bytes = 0; - cub::DeviceSegmentedReduce::Max(nullptr, temp_storage_bytes, max_row_offsets_it, max_children_max_row_offsets.begin(), num_columns, rowidx.begin(), rowidx.begin() + 1, stream.value()); + cub::DeviceSegmentedReduce::Max(nullptr, + temp_storage_bytes, + max_row_offsets_it, + max_children_max_row_offsets.begin(), + num_columns, + rowidx.begin(), + rowidx.begin() + 1, + stream.value()); rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); - cub::DeviceSegmentedReduce::Max(d_temp_storage.data(), temp_storage_bytes, max_row_offsets_it, max_children_max_row_offsets.begin(), num_columns, rowidx.begin(), rowidx.begin() + 1, stream.value()); + cub::DeviceSegmentedReduce::Max(d_temp_storage.data(), + temp_storage_bytes, + max_row_offsets_it, + max_children_max_row_offsets.begin(), + num_columns, + rowidx.begin(), + rowidx.begin() + 1, + stream.value()); rmm::device_uvector list_ancestors(num_columns, stream); - thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), num_columns, - [rowidx = rowidx.begin(), - colidx = colidx.begin(), - column_categories = column_categories.begin(), - dev_num_levels_ptr, - list_ancestors = list_ancestors.begin()] __device__(NodeIndexT node) { - auto num_levels = *dev_num_levels_ptr; - list_ancestors[node] = node; - for(int level = 0; level < num_levels; level++) { - if(list_ancestors[node] > 0) - list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; - else list_ancestors[node] = -1; - if(list_ancestors[node] == -1 || column_categories[list_ancestors[node]] == NC_LIST) break; - } - }); - thrust::gather_if(rmm::exec_policy(stream), list_ancestors.begin(), list_ancestors.end(), list_ancestors.begin(), - max_children_max_row_offsets.begin(), max_row_offsets.begin(), - [] __device__(auto ancestor) { - return ancestor != -1; - }); + thrust::for_each_n( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + num_columns, + [rowidx = rowidx.begin(), + colidx = colidx.begin(), + column_categories = column_categories.begin(), + dev_num_levels_ptr, + list_ancestors = list_ancestors.begin()] __device__(NodeIndexT node) { + auto num_levels = *dev_num_levels_ptr; + list_ancestors[node] = node; + for (int level = 0; level < num_levels; level++) { + if (list_ancestors[node] > 0) + list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; + else + list_ancestors[node] = -1; + if (list_ancestors[node] == -1 || column_categories[list_ancestors[node]] == NC_LIST) + break; + } + }); + thrust::gather_if(rmm::exec_policy(stream), + list_ancestors.begin(), + list_ancestors.end(), + list_ancestors.begin(), + max_children_max_row_offsets.begin(), + max_row_offsets.begin(), + [] __device__(auto ancestor) { return ancestor != -1; }); } - return std::tuple{csr{std::move(rowidx), std::move(colidx)}, - column_tree_properties{std::move(column_categories), std::move(max_row_offsets), std::move(mapped_col_ids)}}; + return std::tuple{ + csr{std::move(rowidx), std::move(colidx)}, + column_tree_properties{ + std::move(column_categories), std::move(max_row_offsets), std::move(mapped_col_ids)}}; } } // namespace cudf::io::json::experimental::detail diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 1d8f24af2fe..f4ecb3b97b4 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -188,7 +188,7 @@ struct device_json_column { namespace experimental { /* - * @brief Sparse graph adjacency matrix stored in Compressed Sparse Row (CSR) format. + * @brief Sparse graph adjacency matrix stored in Compressed Sparse Row (CSR) format. */ struct csr { rmm::device_uvector rowidx; @@ -202,10 +202,10 @@ struct column_tree_properties { }; /* - * @brief Unvalidated column tree stored in Compressed Sparse Row (CSR) format. The device json column - * subtree - the subgraph that conforms to column tree properties - is extracted and further processed - * according to the JSON reader options passed. Only the final processed subgraph is annotated with information - * required to construct cuDF columns. + * @brief Unvalidated column tree stored in Compressed Sparse Row (CSR) format. The device json + * column subtree - the subgraph that conforms to column tree properties - is extracted and further + * processed according to the JSON reader options passed. Only the final processed subgraph is + * annotated with information required to construct cuDF columns. */ struct column_tree { // position of nnzs diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index 18e4cca136e..bb3f835177f 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -75,39 +75,29 @@ bool check_equality(cuio_json::tree_meta_t& d_a, cudf::detail::make_std_vector_async(d_a.node_range_end, stream)}; h_column_tree b{cudf::detail::make_std_vector_async(d_b_csr.rowidx, stream), - cudf::detail::make_std_vector_async(d_b_csr.colidx, stream), - cudf::detail::make_std_vector_async(d_b_ctp.categories, stream), - cudf::detail::make_std_vector_async(d_b_ctp.mapped_ids, stream)}; + cudf::detail::make_std_vector_async(d_b_csr.colidx, stream), + cudf::detail::make_std_vector_async(d_b_ctp.categories, stream), + cudf::detail::make_std_vector_async(d_b_ctp.mapped_ids, stream)}; stream.synchronize(); auto num_nodes = a.parent_node_ids.size(); - if (b.rowidx.size() != num_nodes + 1) { - return false; - } + if (b.rowidx.size() != num_nodes + 1) { return false; } for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { auto v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { - return false; - } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { return false; } } for (size_t u = 1; u < num_nodes; u++) { auto v = b.colidx[b.rowidx[u]]; - if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { - return false; - } + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { return false; } for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { - return false; - } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { return false; } } } for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) { - return false; - } + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; } } return true; } @@ -180,12 +170,8 @@ TEST_F(JsonColumnTreeTests, SimpleLines) stream); auto [d_column_tree_csr, d_column_tree_properties] = - cudf::io::json::experimental::detail::reduce_to_column_tree(gpu_tree, - gpu_col_id, - gpu_row_offsets, - false, - row_array_parent_col_id, - stream); + cudf::io::json::experimental::detail::reduce_to_column_tree( + gpu_tree, gpu_col_id, gpu_row_offsets, false, row_array_parent_col_id, stream); auto iseq = check_equality(d_column_tree, d_column_tree_csr, d_column_tree_properties, stream); // assert equality between csr and meta formats From d6d724ca4f624f2d41b658c5d4577c56fe62a853 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 30 Jul 2024 22:22:09 +0000 Subject: [PATCH 14/46] exec policy is no sync --- cpp/src/io/json/json_column_csr.cu | 35 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/cpp/src/io/json/json_column_csr.cu b/cpp/src/io/json/json_column_csr.cu index 98edf6faf33..b7968a636c2 100644 --- a/cpp/src/io/json/json_column_csr.cu +++ b/cpp/src/io/json/json_column_csr.cu @@ -139,16 +139,17 @@ std::tuple reduce_to_column_tree( level_ordered_node_ids.begin(), mapped_col_ids.begin(), level_ordered_unique_node_ids.begin()); - auto* dev_num_levels_ptr = - thrust::max_element(rmm::exec_policy(stream), tree.node_levels.begin(), tree.node_levels.end()); + auto* dev_num_levels_ptr = thrust::max_element( + rmm::exec_policy_nosync(stream), tree.node_levels.begin(), tree.node_levels.end()); rmm::device_uvector mapped_col_ids_copy(num_columns, stream); - thrust::copy(rmm::exec_policy(stream), + thrust::copy(rmm::exec_policy_nosync(stream), mapped_col_ids.begin(), mapped_col_ids.end(), mapped_col_ids_copy.begin()); - thrust::sequence(rmm::exec_policy(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end()); - thrust::sort_by_key(rmm::exec_policy(stream), + thrust::sequence( + rmm::exec_policy_nosync(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end()); + thrust::sort_by_key(rmm::exec_policy_nosync(stream), mapped_col_ids_copy.begin(), mapped_col_ids_copy.end(), rev_mapped_col_ids.begin()); @@ -164,7 +165,7 @@ std::tuple reduce_to_column_tree( auto ordered_node_categories = thrust::make_permutation_iterator(tree.node_categories.begin(), level_ordered_node_ids.begin()); thrust::reduce_by_key( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_col_ids.end(), thrust::make_zip_iterator(thrust::make_tuple(ordered_row_offsets, ordered_node_categories)), @@ -202,7 +203,7 @@ std::tuple reduce_to_column_tree( rmm::device_uvector parent_col_ids(num_columns, stream); thrust::transform_output_iterator parent_col_ids_it( parent_col_ids.begin(), parent_nodeids_to_colids{col_ids, rev_mapped_col_ids}); - thrust::copy_n(rmm::exec_policy(stream), + thrust::copy_n(rmm::exec_policy_nosync(stream), thrust::make_permutation_iterator(tree.parent_node_ids.begin(), level_ordered_unique_node_ids.begin()), num_columns, @@ -218,12 +219,12 @@ std::tuple reduce_to_column_tree( */ rmm::device_uvector rowidx(num_columns + 1, stream); - thrust::fill(rmm::exec_policy(stream), rowidx.begin(), rowidx.end(), 0); + thrust::fill(rmm::exec_policy_nosync(stream), rowidx.begin(), rowidx.end(), 0); // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) // children adjacency auto num_non_leaf_columns = thrust::unique_count( - rmm::exec_policy(stream), parent_col_ids.begin() + 1, parent_col_ids.end()); - thrust::reduce_by_key(rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), parent_col_ids.begin() + 1, parent_col_ids.end()); + thrust::reduce_by_key(rmm::exec_policy_nosync(stream), parent_col_ids.begin() + 1, parent_col_ids.end(), thrust::make_constant_iterator(1), @@ -231,7 +232,7 @@ std::tuple reduce_to_column_tree( rowidx.begin() + 1, thrust::equal_to()); thrust::transform_inclusive_scan( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), thrust::make_zip_iterator(thrust::make_counting_iterator(1), rowidx.begin() + 1), thrust::make_zip_iterator(thrust::make_counting_iterator(1) + num_columns, rowidx.end()), rowidx.begin() + 1, @@ -244,19 +245,19 @@ std::tuple reduce_to_column_tree( rmm::device_uvector colidx((num_columns - 1) * 2, stream); // Skip the parent of root node - thrust::scatter(rmm::exec_policy(stream), + thrust::scatter(rmm::exec_policy_nosync(stream), parent_col_ids.begin() + 1, parent_col_ids.end(), rowidx.begin() + 1, colidx.begin()); // excluding root node, construct scatter map rmm::device_uvector map(num_columns - 1, stream); - thrust::inclusive_scan_by_key(rmm::exec_policy(stream), + thrust::inclusive_scan_by_key(rmm::exec_policy_nosync(stream), parent_col_ids.begin() + 1, parent_col_ids.end(), thrust::make_constant_iterator(1), map.begin()); - thrust::for_each_n(rmm::exec_policy(stream), + thrust::for_each_n(rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(1), num_columns - 1, [rowidx = rowidx.begin(), @@ -268,7 +269,7 @@ std::tuple reduce_to_column_tree( else map[i - 1] += rowidx[parent_col_id]; }); - thrust::scatter(rmm::exec_policy(stream), + thrust::scatter(rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(1), thrust::make_counting_iterator(1) + num_columns - 1, map.begin(), @@ -304,7 +305,7 @@ std::tuple reduce_to_column_tree( rmm::device_uvector list_ancestors(num_columns, stream); thrust::for_each_n( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(0), num_columns, [rowidx = rowidx.begin(), @@ -323,7 +324,7 @@ std::tuple reduce_to_column_tree( break; } }); - thrust::gather_if(rmm::exec_policy(stream), + thrust::gather_if(rmm::exec_policy_nosync(stream), list_ancestors.begin(), list_ancestors.end(), list_ancestors.begin(), From 2622d6bfaa18341495af1a68dc616fcf8f493d08 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 30 Jul 2024 23:38:37 +0000 Subject: [PATCH 15/46] fix copyright year --- cpp/src/io/json/json_column_csr.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/io/json/json_column_csr.cu b/cpp/src/io/json/json_column_csr.cu index b7968a636c2..74f065bddfa 100644 --- a/cpp/src/io/json/json_column_csr.cu +++ b/cpp/src/io/json/json_column_csr.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 94983728e04a0eb492e795140967152c977527b8 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Wed, 31 Jul 2024 05:36:09 +0000 Subject: [PATCH 16/46] fixing max row offsets --- cpp/CMakeLists.txt | 2 +- ...umn_csr.cu => column_tree_construction.cu} | 256 ++++++++++++++++-- cpp/src/io/json/json_column.cu | 199 -------------- cpp/src/io/json/nested_json.hpp | 23 +- cpp/tests/io/json/json_tree_csr.cu | 11 +- 5 files changed, 254 insertions(+), 237 deletions(-) rename cpp/src/io/json/{json_column_csr.cu => column_tree_construction.cu} (58%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d06cfa39ade..cbf87fc67ed 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -393,7 +393,7 @@ add_library( src/io/functions.cpp src/io/json/byte_range_info.cu src/io/json/json_column.cu - src/io/json/json_column_csr.cu + src/io/json/column_tree_construction.cu src/io/json/json_normalization.cu src/io/json/json_tree.cu src/io/json/nested_json_gpu.cu diff --git a/cpp/src/io/json/json_column_csr.cu b/cpp/src/io/json/column_tree_construction.cu similarity index 58% rename from cpp/src/io/json/json_column_csr.cu rename to cpp/src/io/json/column_tree_construction.cu index 74f065bddfa..5ef21096ac9 100644 --- a/cpp/src/io/json/json_column_csr.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -52,7 +52,22 @@ #include #include -namespace cudf::io::json::experimental::detail { +namespace cudf::io::json { + +template +void print(device_span d_vec, std::string name, rmm::cuda_stream_view stream) +{ + stream.synchronize(); + auto h_vec = cudf::detail::make_std_vector_async(d_vec, stream); + stream.synchronize(); + std::cout << name << " = "; + for (auto e : h_vec) { + std::cout << e << " "; + } + std::cout << std::endl; +} + +namespace experimental::detail { using row_offset_t = size_type; @@ -77,17 +92,6 @@ struct parent_nodeids_to_colids { } }; -template -void print(device_span d_vec, std::string name, rmm::cuda_stream_view stream) -{ - auto h_vec = cudf::detail::make_std_vector_async(d_vec, stream); - std::cout << name << " = "; - for (auto e : h_vec) { - std::cout << e << " "; - } - std::cout << std::endl; -} - /** * @brief Reduces node tree representation to column tree CSR representation. * @@ -168,10 +172,9 @@ std::tuple reduce_to_column_tree( rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_col_ids.end(), - thrust::make_zip_iterator(thrust::make_tuple(ordered_row_offsets, ordered_node_categories)), + thrust::make_zip_iterator(ordered_row_offsets, ordered_node_categories), thrust::make_discard_iterator(), - thrust::make_zip_iterator( - thrust::make_tuple(max_row_offsets.begin(), column_categories.begin())), + thrust::make_zip_iterator(max_row_offsets.begin(), column_categories.begin()), thrust::equal_to(), [] __device__(auto a, auto b) { auto row_offset_a = thrust::get<0>(a); @@ -195,8 +198,8 @@ std::tuple reduce_to_column_tree( } else ctg = NC_ERR; - thrust::maximum row_offset_op; - return thrust::make_tuple(row_offset_op(row_offset_a, row_offset_b), ctg); + thrust::maximum row_offset_op; + return thrust::make_pair(row_offset_op(row_offset_a, row_offset_b), ctg); }); // 4. construct parent_col_ids using permutation iterator @@ -277,8 +280,7 @@ std::tuple reduce_to_column_tree( // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. - // create list's children max_row_offsets array. (initialize to zero) - // atomicMax on children max_row_offsets array. + // create list's children max_row_offsets array // gather the max_row_offsets from children row offset array. { auto max_row_offsets_it = @@ -315,15 +317,14 @@ std::tuple reduce_to_column_tree( list_ancestors = list_ancestors.begin()] __device__(NodeIndexT node) { auto num_levels = *dev_num_levels_ptr; list_ancestors[node] = node; - for (int level = 0; level < num_levels; level++) { + for (int level = 0; level <= num_levels; level++) { if (list_ancestors[node] > 0) list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; - else - list_ancestors[node] = -1; - if (list_ancestors[node] == -1 || column_categories[list_ancestors[node]] == NC_LIST) + if (list_ancestors[node] == 0 || column_categories[list_ancestors[node]] == NC_LIST) break; } }); + thrust::gather_if(rmm::exec_policy_nosync(stream), list_ancestors.begin(), list_ancestors.end(), @@ -339,4 +340,211 @@ std::tuple reduce_to_column_tree( std::move(column_categories), std::move(max_row_offsets), std::move(mapped_col_ids)}}; } -} // namespace cudf::io::json::experimental::detail +} // namespace experimental::detail + +namespace detail { +/** + * @brief Reduces node tree representation to column tree representation. + * + * @param tree Node tree representation of JSON string + * @param original_col_ids Column ids of nodes + * @param sorted_col_ids Sorted column ids of nodes + * @param ordered_node_ids Node ids of nodes sorted by column ids + * @param row_offsets Row offsets of nodes + * @param is_array_of_arrays Whether the tree is an array of arrays + * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true + * @param stream CUDA stream used for device memory operations and kernel launches + * @return A tuple of column tree representation of JSON string, column ids of columns, and + * max row offsets of columns + */ +std::tuple, rmm::device_uvector> +reduce_to_column_tree(tree_meta_t& tree, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, + bool is_array_of_arrays, + NodeIndexT const row_array_parent_col_id, + rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + // 1. column count for allocation + auto const num_columns = + thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end()); + + // 2. reduce_by_key {col_id}, {row_offset}, max. + rmm::device_uvector unique_col_ids(num_columns, stream); + rmm::device_uvector max_row_offsets(num_columns, stream); + auto ordered_row_offsets = + thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()); + thrust::reduce_by_key(rmm::exec_policy(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + ordered_row_offsets, + unique_col_ids.begin(), + max_row_offsets.begin(), + thrust::equal_to(), + thrust::maximum()); + + // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) + rmm::device_uvector column_categories(num_columns, stream); + thrust::reduce_by_key( + rmm::exec_policy(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin()), + unique_col_ids.begin(), + column_categories.begin(), + thrust::equal_to(), + [] __device__(NodeT type_a, NodeT type_b) -> NodeT { + auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); + auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); + // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) + // *+*=*, v+v=v + if (type_a == type_b) { + return type_a; + } else if (is_a_leaf) { + // *+v=*, N+V=N + // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR + return type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b); + } else if (is_b_leaf) { + return type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a); + } + // *+#=E + return NC_ERR; + }); + + // 4. unique_copy parent_node_ids, ranges + rmm::device_uvector column_levels(0, stream); // not required + rmm::device_uvector parent_col_ids(num_columns, stream); + rmm::device_uvector col_range_begin(num_columns, stream); // Field names + rmm::device_uvector col_range_end(num_columns, stream); + rmm::device_uvector unique_node_ids(num_columns, stream); + thrust::unique_by_key_copy(rmm::exec_policy(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + ordered_node_ids.begin(), + thrust::make_discard_iterator(), + unique_node_ids.begin()); + thrust::copy_n( + rmm::exec_policy(stream), + thrust::make_zip_iterator( + thrust::make_permutation_iterator(tree.parent_node_ids.begin(), unique_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_range_begin.begin(), unique_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_range_end.begin(), unique_node_ids.begin())), + unique_node_ids.size(), + thrust::make_zip_iterator( + parent_col_ids.begin(), col_range_begin.begin(), col_range_end.begin())); + + // convert parent_node_ids to parent_col_ids + thrust::transform( + rmm::exec_policy(stream), + parent_col_ids.begin(), + parent_col_ids.end(), + parent_col_ids.begin(), + [col_ids = original_col_ids.begin()] __device__(auto parent_node_id) -> size_type { + return parent_node_id == parent_node_sentinel ? parent_node_sentinel + : col_ids[parent_node_id]; + }); + + // condition is true if parent is not a list, or sentinel/root + // Special case to return true if parent is a list and is_array_of_arrays is true + auto is_non_list_parent = [column_categories = column_categories.begin(), + is_array_of_arrays, + row_array_parent_col_id] __device__(auto parent_col_id) -> bool { + return !(parent_col_id == parent_node_sentinel || + column_categories[parent_col_id] == NC_LIST && + (!is_array_of_arrays || parent_col_id != row_array_parent_col_id)); + return (parent_col_id != parent_node_sentinel) && + (column_categories[parent_col_id] != NC_LIST) || + (is_array_of_arrays == true && parent_col_id == row_array_parent_col_id); + }; + + // Mixed types in List children go to different columns, + // so all immediate children of list column should have same max_row_offsets. + // create list's children max_row_offsets array. (initialize to zero) + // atomicMax on children max_row_offsets array. + // gather the max_row_offsets from children row offset array. + { + rmm::device_uvector list_parents_children_max_row_offsets(num_columns, stream); + thrust::fill(rmm::exec_policy(stream), + list_parents_children_max_row_offsets.begin(), + list_parents_children_max_row_offsets.end(), + 0); + thrust::for_each(rmm::exec_policy(stream), + unique_col_ids.begin(), + unique_col_ids.end(), + [column_categories = column_categories.begin(), + parent_col_ids = parent_col_ids.begin(), + max_row_offsets = max_row_offsets.begin(), + list_parents_children_max_row_offsets = + list_parents_children_max_row_offsets.begin()] __device__(auto col_id) { + auto parent_col_id = parent_col_ids[col_id]; + if (parent_col_id != parent_node_sentinel and + column_categories[parent_col_id] == node_t::NC_LIST) { + cuda::atomic_ref ref{ + *(list_parents_children_max_row_offsets + parent_col_id)}; + ref.fetch_max(max_row_offsets[col_id], cuda::std::memory_order_relaxed); + } + }); + thrust::gather_if( + rmm::exec_policy(stream), + parent_col_ids.begin(), + parent_col_ids.end(), + parent_col_ids.begin(), + list_parents_children_max_row_offsets.begin(), + max_row_offsets.begin(), + [column_categories = column_categories.begin()] __device__(size_type parent_col_id) { + return parent_col_id != parent_node_sentinel and + column_categories[parent_col_id] == node_t::NC_LIST; + }); + } + + // copy lists' max_row_offsets to children. + // all structs should have same size. + thrust::transform_if( + rmm::exec_policy(stream), + unique_col_ids.begin(), + unique_col_ids.end(), + max_row_offsets.begin(), + [column_categories = column_categories.begin(), + is_non_list_parent, + parent_col_ids = parent_col_ids.begin(), + max_row_offsets = max_row_offsets.begin()] __device__(size_type col_id) { + auto parent_col_id = parent_col_ids[col_id]; + // condition is true if parent is not a list, or sentinel/root + while (is_non_list_parent(parent_col_id)) { + col_id = parent_col_id; + parent_col_id = parent_col_ids[parent_col_id]; + } + return max_row_offsets[col_id]; + }, + [column_categories = column_categories.begin(), + is_non_list_parent, + parent_col_ids = parent_col_ids.begin()] __device__(size_type col_id) { + auto parent_col_id = parent_col_ids[col_id]; + // condition is true if parent is not a list, or sentinel/root + return is_non_list_parent(parent_col_id); + }); + + // For Struct and List (to avoid copying entire strings when mixed type as string is enabled) + thrust::transform_if( + rmm::exec_policy(stream), + col_range_begin.begin(), + col_range_begin.end(), + column_categories.begin(), + col_range_end.begin(), + [] __device__(auto i) { return i + 1; }, + [] __device__(NodeT type) { return type == NC_STRUCT || type == NC_LIST; }); + + return std::tuple{tree_meta_t{std::move(column_categories), + std::move(parent_col_ids), + std::move(column_levels), + std::move(col_range_begin), + std::move(col_range_end)}, + std::move(unique_col_ids), + std::move(max_row_offsets)}; +} + +} // namespace detail +} // namespace cudf::io::json diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 70493b90575..2e3c5746520 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -98,205 +98,6 @@ void print_tree(host_span input, printf(" (JSON)\n"); } -/** - * @brief Reduces node tree representation to column tree representation. - * - * @param tree Node tree representation of JSON string - * @param original_col_ids Column ids of nodes - * @param sorted_col_ids Sorted column ids of nodes - * @param ordered_node_ids Node ids of nodes sorted by column ids - * @param row_offsets Row offsets of nodes - * @param is_array_of_arrays Whether the tree is an array of arrays - * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true - * @param stream CUDA stream used for device memory operations and kernel launches - * @return A tuple of column tree representation of JSON string, column ids of columns, and - * max row offsets of columns - */ -std::tuple, rmm::device_uvector> -reduce_to_column_tree(tree_meta_t& tree, - device_span original_col_ids, - device_span sorted_col_ids, - device_span ordered_node_ids, - device_span row_offsets, - bool is_array_of_arrays, - NodeIndexT const row_array_parent_col_id, - rmm::cuda_stream_view stream) -{ - CUDF_FUNC_RANGE(); - // 1. column count for allocation - auto const num_columns = - thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end()); - - // 2. reduce_by_key {col_id}, {row_offset}, max. - rmm::device_uvector unique_col_ids(num_columns, stream); - rmm::device_uvector max_row_offsets(num_columns, stream); - auto ordered_row_offsets = - thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()); - thrust::reduce_by_key(rmm::exec_policy(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - ordered_row_offsets, - unique_col_ids.begin(), - max_row_offsets.begin(), - thrust::equal_to(), - thrust::maximum()); - - // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) - rmm::device_uvector column_categories(num_columns, stream); - thrust::reduce_by_key( - rmm::exec_policy(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin()), - unique_col_ids.begin(), - column_categories.begin(), - thrust::equal_to(), - [] __device__(NodeT type_a, NodeT type_b) -> NodeT { - auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); - auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); - // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) - // *+*=*, v+v=v - if (type_a == type_b) { - return type_a; - } else if (is_a_leaf) { - // *+v=*, N+V=N - // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR - return type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b); - } else if (is_b_leaf) { - return type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a); - } - // *+#=E - return NC_ERR; - }); - - // 4. unique_copy parent_node_ids, ranges - rmm::device_uvector column_levels(0, stream); // not required - rmm::device_uvector parent_col_ids(num_columns, stream); - rmm::device_uvector col_range_begin(num_columns, stream); // Field names - rmm::device_uvector col_range_end(num_columns, stream); - rmm::device_uvector unique_node_ids(num_columns, stream); - thrust::unique_by_key_copy(rmm::exec_policy(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - ordered_node_ids.begin(), - thrust::make_discard_iterator(), - unique_node_ids.begin()); - thrust::copy_n( - rmm::exec_policy(stream), - thrust::make_zip_iterator( - thrust::make_permutation_iterator(tree.parent_node_ids.begin(), unique_node_ids.begin()), - thrust::make_permutation_iterator(tree.node_range_begin.begin(), unique_node_ids.begin()), - thrust::make_permutation_iterator(tree.node_range_end.begin(), unique_node_ids.begin())), - unique_node_ids.size(), - thrust::make_zip_iterator( - parent_col_ids.begin(), col_range_begin.begin(), col_range_end.begin())); - - // convert parent_node_ids to parent_col_ids - thrust::transform( - rmm::exec_policy(stream), - parent_col_ids.begin(), - parent_col_ids.end(), - parent_col_ids.begin(), - [col_ids = original_col_ids.begin()] __device__(auto parent_node_id) -> size_type { - return parent_node_id == parent_node_sentinel ? parent_node_sentinel - : col_ids[parent_node_id]; - }); - - // condition is true if parent is not a list, or sentinel/root - // Special case to return true if parent is a list and is_array_of_arrays is true - auto is_non_list_parent = [column_categories = column_categories.begin(), - is_array_of_arrays, - row_array_parent_col_id] __device__(auto parent_col_id) -> bool { - return !(parent_col_id == parent_node_sentinel || - column_categories[parent_col_id] == NC_LIST && - (!is_array_of_arrays || parent_col_id != row_array_parent_col_id)); - }; - // Mixed types in List children go to different columns, - // so all immediate children of list column should have same max_row_offsets. - // create list's children max_row_offsets array. (initialize to zero) - // atomicMax on children max_row_offsets array. - // gather the max_row_offsets from children row offset array. - { - rmm::device_uvector list_parents_children_max_row_offsets(num_columns, stream); - thrust::fill(rmm::exec_policy(stream), - list_parents_children_max_row_offsets.begin(), - list_parents_children_max_row_offsets.end(), - 0); - thrust::for_each(rmm::exec_policy(stream), - unique_col_ids.begin(), - unique_col_ids.end(), - [column_categories = column_categories.begin(), - parent_col_ids = parent_col_ids.begin(), - max_row_offsets = max_row_offsets.begin(), - list_parents_children_max_row_offsets = - list_parents_children_max_row_offsets.begin()] __device__(auto col_id) { - auto parent_col_id = parent_col_ids[col_id]; - if (parent_col_id != parent_node_sentinel and - column_categories[parent_col_id] == node_t::NC_LIST) { - cuda::atomic_ref ref{ - *(list_parents_children_max_row_offsets + parent_col_id)}; - ref.fetch_max(max_row_offsets[col_id], cuda::std::memory_order_relaxed); - } - }); - thrust::gather_if( - rmm::exec_policy(stream), - parent_col_ids.begin(), - parent_col_ids.end(), - parent_col_ids.begin(), - list_parents_children_max_row_offsets.begin(), - max_row_offsets.begin(), - [column_categories = column_categories.begin()] __device__(size_type parent_col_id) { - return parent_col_id != parent_node_sentinel and - column_categories[parent_col_id] == node_t::NC_LIST; - }); - } - - // copy lists' max_row_offsets to children. - // all structs should have same size. - thrust::transform_if( - rmm::exec_policy(stream), - unique_col_ids.begin(), - unique_col_ids.end(), - max_row_offsets.begin(), - [column_categories = column_categories.begin(), - is_non_list_parent, - parent_col_ids = parent_col_ids.begin(), - max_row_offsets = max_row_offsets.begin()] __device__(size_type col_id) { - auto parent_col_id = parent_col_ids[col_id]; - // condition is true if parent is not a list, or sentinel/root - while (is_non_list_parent(parent_col_id)) { - col_id = parent_col_id; - parent_col_id = parent_col_ids[parent_col_id]; - } - return max_row_offsets[col_id]; - }, - [column_categories = column_categories.begin(), - is_non_list_parent, - parent_col_ids = parent_col_ids.begin()] __device__(size_type col_id) { - auto parent_col_id = parent_col_ids[col_id]; - // condition is true if parent is not a list, or sentinel/root - return is_non_list_parent(parent_col_id); - }); - - // For Struct and List (to avoid copying entire strings when mixed type as string is enabled) - thrust::transform_if( - rmm::exec_policy(stream), - col_range_begin.begin(), - col_range_begin.end(), - column_categories.begin(), - col_range_end.begin(), - [] __device__(auto i) { return i + 1; }, - [] __device__(NodeT type) { return type == NC_STRUCT || type == NC_LIST; }); - - return std::tuple{tree_meta_t{std::move(column_categories), - std::move(parent_col_ids), - std::move(column_levels), - std::move(col_range_begin), - std::move(col_range_end)}, - std::move(unique_col_ids), - std::move(max_row_offsets)}; -} - /** * @brief Get the column indices for the values column for array of arrays rows * diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index f4ecb3b97b4..8e7275e6083 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -344,23 +344,22 @@ get_array_children_indices(TreeDepthT row_array_children_level, device_span node_levels, device_span parent_node_ids, rmm::cuda_stream_view stream); + /** * @brief Reduce node tree into column tree by aggregating each property of column. * - * @param tree json node tree to reduce (modified in-place, but restored to original state) - * @param col_ids column ids of each node (modified in-place, but restored to original state) - * @param row_offsets row offsets of each node (modified in-place, but restored to original state) - * @param stream The CUDA stream to which kernels are dispatched - * @return A tuple containing the column tree, identifier for each column and the maximum row index - * in each column + * @param tree Node tree representation of JSON string + * @param original_col_ids Column ids of nodes + * @param sorted_col_ids Sorted column ids of nodes + * @param ordered_node_ids Node ids of nodes sorted by column ids + * @param row_offsets Row offsets of nodes + * @param is_array_of_arrays Whether the tree is an array of arrays + * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true + * @param stream CUDA stream used for device memory operations and kernel launches + * @return A tuple of column tree representation of JSON string, column ids of columns, and + * max row offsets of columns */ std::tuple, rmm::device_uvector> -reduce_to_column_tree(tree_meta_t& tree, - device_span col_ids, - device_span row_offsets, - rmm::cuda_stream_view stream); - -std::tuple, rmm::device_uvector> reduce_to_column_tree(tree_meta_t& tree, device_span original_col_ids, device_span sorted_col_ids, diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index bb3f835177f..aefeaf9a39a 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -64,11 +64,14 @@ void print(std::string str, std::vector& vec) } bool check_equality(cuio_json::tree_meta_t& d_a, + rmm::device_uvector& d_a_max_row_offsets, cuio_json::experimental::csr& d_b_csr, cuio_json::experimental::column_tree_properties& d_b_ctp, rmm::cuda_stream_view stream) { // convert from tree_meta_t to column_tree_csr + stream.synchronize(); + h_tree_meta_t a{cudf::detail::make_std_vector_async(d_a.node_categories, stream), cudf::detail::make_std_vector_async(d_a.parent_node_ids, stream), cudf::detail::make_std_vector_async(d_a.node_range_begin, stream), @@ -79,6 +82,9 @@ bool check_equality(cuio_json::tree_meta_t& d_a, cudf::detail::make_std_vector_async(d_b_ctp.categories, stream), cudf::detail::make_std_vector_async(d_b_ctp.mapped_ids, stream)}; + auto a_max_row_offsets = cudf::detail::make_std_vector_async(d_a_max_row_offsets, stream); + auto b_max_row_offsets = cudf::detail::make_std_vector_async(d_b_ctp.max_row_offsets, stream); + stream.synchronize(); auto num_nodes = a.parent_node_ids.size(); @@ -99,6 +105,9 @@ bool check_equality(cuio_json::tree_meta_t& d_a, for (size_t u = 0; u < num_nodes; u++) { if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; } } + for (size_t u = 0; u < num_nodes; u++) { + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; } + } return true; } @@ -173,7 +182,7 @@ TEST_F(JsonColumnTreeTests, SimpleLines) cudf::io::json::experimental::detail::reduce_to_column_tree( gpu_tree, gpu_col_id, gpu_row_offsets, false, row_array_parent_col_id, stream); - auto iseq = check_equality(d_column_tree, d_column_tree_csr, d_column_tree_properties, stream); + auto iseq = check_equality(d_column_tree, d_max_row_offsets, d_column_tree_csr, d_column_tree_properties, stream); // assert equality between csr and meta formats assert(iseq == true); } From 4339b0a1a8a9a5ceee0dc76ceeb11066a8524668 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Wed, 31 Jul 2024 05:37:15 +0000 Subject: [PATCH 17/46] formatting --- cpp/src/io/json/column_tree_construction.cu | 13 ++++++------- cpp/tests/io/json/json_tree_csr.cu | 3 ++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 5ef21096ac9..89a9365a60b 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -318,8 +318,7 @@ std::tuple reduce_to_column_tree( auto num_levels = *dev_num_levels_ptr; list_ancestors[node] = node; for (int level = 0; level <= num_levels; level++) { - if (list_ancestors[node] > 0) - list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; + if (list_ancestors[node] > 0) list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; if (list_ancestors[node] == 0 || column_categories[list_ancestors[node]] == NC_LIST) break; } @@ -455,9 +454,9 @@ reduce_to_column_tree(tree_meta_t& tree, return !(parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST && (!is_array_of_arrays || parent_col_id != row_array_parent_col_id)); - return (parent_col_id != parent_node_sentinel) && - (column_categories[parent_col_id] != NC_LIST) || - (is_array_of_arrays == true && parent_col_id == row_array_parent_col_id); + return (parent_col_id != parent_node_sentinel) && + (column_categories[parent_col_id] != NC_LIST) || + (is_array_of_arrays == true && parent_col_id == row_array_parent_col_id); }; // Mixed types in List children go to different columns, @@ -546,5 +545,5 @@ reduce_to_column_tree(tree_meta_t& tree, std::move(max_row_offsets)}; } -} // namespace detail -} // namespace cudf::io::json +} // namespace detail +} // namespace cudf::io::json diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index aefeaf9a39a..bc135e041d0 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -182,7 +182,8 @@ TEST_F(JsonColumnTreeTests, SimpleLines) cudf::io::json::experimental::detail::reduce_to_column_tree( gpu_tree, gpu_col_id, gpu_row_offsets, false, row_array_parent_col_id, stream); - auto iseq = check_equality(d_column_tree, d_max_row_offsets, d_column_tree_csr, d_column_tree_properties, stream); + auto iseq = check_equality( + d_column_tree, d_max_row_offsets, d_column_tree_csr, d_column_tree_properties, stream); // assert equality between csr and meta formats assert(iseq == true); } From 9b6b7ff0ac10458de9395c84043f19005e00e8e9 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Wed, 31 Jul 2024 17:43:12 +0000 Subject: [PATCH 18/46] struct docs --- cpp/src/io/json/nested_json.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 8e7275e6083..5976160016d 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -195,6 +195,10 @@ struct csr { rmm::device_uvector colidx; }; +/* + * @brief Auxiliary column tree properties that are required to construct the device json + * column subtree, but not required for the final cudf column construction. + */ struct column_tree_properties { rmm::device_uvector categories; rmm::device_uvector max_row_offsets; From 85608eb0b6a2aa3288aa2d460559d243047b68b8 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Wed, 31 Jul 2024 18:03:47 +0000 Subject: [PATCH 19/46] cudf exports! --- cpp/src/io/json/nested_json.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 45118002f04..47d3a85e62f 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -244,7 +244,7 @@ namespace detail { * @return A tuple containing the column tree, identifier for each column and the maximum row index * in each column */ - +CUDF_EXPORT std::tuple reduce_to_column_tree( tree_meta_t& tree, device_span original_col_ids, @@ -370,6 +370,7 @@ get_array_children_indices(TreeDepthT row_array_children_level, * @return A tuple of column tree representation of JSON string, column ids of columns, and * max row offsets of columns */ +CUDF_EXPORT std::tuple, rmm::device_uvector> reduce_to_column_tree(tree_meta_t& tree, device_span original_col_ids, From e29656d972ae011be18ea5f960a345ab9e18b12d Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 6 Sep 2024 18:05:33 +0000 Subject: [PATCH 20/46] deduplicating code --- cpp/src/io/json/column_tree_construction.cu | 125 ++++++++------------ 1 file changed, 52 insertions(+), 73 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 89a9365a60b..ea14ac885e2 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -54,6 +54,8 @@ namespace cudf::io::json { +using row_offset_t = size_type; + template void print(device_span d_vec, std::string name, rmm::cuda_stream_view stream) { @@ -67,9 +69,44 @@ void print(device_span d_vec, std::string name, rmm::cuda_stream_view s std::cout << std::endl; } -namespace experimental::detail { +template +void max_row_offsets_col_categories(InputIterator1 keys_first, InputIterator1 keys_last, InputIterator2 values_first, OutputIterator1 keys_output, OutputIterator2 values_output, rmm::cuda_stream_view stream) { + thrust::reduce_by_key( + rmm::exec_policy_nosync(stream), + keys_first, + keys_last, + values_first, + keys_output, + values_output, + thrust::equal_to(), + [] __device__(auto a, auto b) { + auto row_offset_a = thrust::get<0>(a); + auto row_offset_b = thrust::get<0>(b); + auto type_a = thrust::get<1>(a); + auto type_b = thrust::get<1>(b); -using row_offset_t = size_type; + NodeT ctg; + auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); + auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); + // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) + // *+*=*, v+v=v + if (type_a == type_b) { + ctg = type_a; + } else if (is_a_leaf) { + // *+v=*, N+V=N + // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR + ctg = (type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b)); + } else if (is_b_leaf) { + ctg = (type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a)); + } else + ctg = NC_ERR; + + thrust::maximum row_offset_op; + return thrust::make_pair(row_offset_op(row_offset_a, row_offset_b), ctg); + }); +} + +namespace experimental::detail { struct level_ordering { device_span node_levels; @@ -164,44 +201,14 @@ std::tuple reduce_to_column_tree( // reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) rmm::device_uvector max_row_offsets(num_columns, stream); rmm::device_uvector column_categories(num_columns, stream); - auto ordered_row_offsets = - thrust::make_permutation_iterator(row_offsets.begin(), level_ordered_node_ids.begin()); - auto ordered_node_categories = - thrust::make_permutation_iterator(tree.node_categories.begin(), level_ordered_node_ids.begin()); - thrust::reduce_by_key( - rmm::exec_policy_nosync(stream), - level_ordered_col_ids.begin(), - level_ordered_col_ids.end(), - thrust::make_zip_iterator(ordered_row_offsets, ordered_node_categories), + max_row_offsets_col_categories( + level_ordered_node_ids.begin(), + level_ordered_node_ids.end(), + thrust::make_zip_iterator(thrust::make_permutation_iterator(row_offsets.begin(), level_ordered_node_ids.begin()), thrust::make_permutation_iterator(tree.node_categories.begin(), level_ordered_node_ids.begin())), thrust::make_discard_iterator(), thrust::make_zip_iterator(max_row_offsets.begin(), column_categories.begin()), - thrust::equal_to(), - [] __device__(auto a, auto b) { - auto row_offset_a = thrust::get<0>(a); - auto row_offset_b = thrust::get<0>(b); - auto type_a = thrust::get<1>(a); - auto type_b = thrust::get<1>(b); - - NodeT ctg; - auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); - auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); - // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) - // *+*=*, v+v=v - if (type_a == type_b) { - ctg = type_a; - } else if (is_a_leaf) { - // *+v=*, N+V=N - // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR - ctg = (type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b)); - } else if (is_b_leaf) { - ctg = (type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a)); - } else - ctg = NC_ERR; - - thrust::maximum row_offset_op; - return thrust::make_pair(row_offset_op(row_offset_a, row_offset_b), ctg); - }); - + stream + ); // 4. construct parent_col_ids using permutation iterator rmm::device_uvector parent_col_ids(num_columns, stream); thrust::transform_output_iterator parent_col_ids_it( @@ -374,44 +381,16 @@ reduce_to_column_tree(tree_meta_t& tree, // 2. reduce_by_key {col_id}, {row_offset}, max. rmm::device_uvector unique_col_ids(num_columns, stream); rmm::device_uvector max_row_offsets(num_columns, stream); - auto ordered_row_offsets = - thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()); - thrust::reduce_by_key(rmm::exec_policy(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - ordered_row_offsets, - unique_col_ids.begin(), - max_row_offsets.begin(), - thrust::equal_to(), - thrust::maximum()); - // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) rmm::device_uvector column_categories(num_columns, stream); - thrust::reduce_by_key( - rmm::exec_policy(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin()), + max_row_offsets_col_categories( + sorted_col_ids.begin(), + sorted_col_ids.end(), + thrust::make_zip_iterator(thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()), thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin())), unique_col_ids.begin(), - column_categories.begin(), - thrust::equal_to(), - [] __device__(NodeT type_a, NodeT type_b) -> NodeT { - auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); - auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); - // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) - // *+*=*, v+v=v - if (type_a == type_b) { - return type_a; - } else if (is_a_leaf) { - // *+v=*, N+V=N - // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR - return type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b); - } else if (is_b_leaf) { - return type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a); - } - // *+#=E - return NC_ERR; - }); + thrust::make_zip_iterator(max_row_offsets.begin(), column_categories.begin()), + stream + ); // 4. unique_copy parent_node_ids, ranges rmm::device_uvector column_levels(0, stream); // not required From e6eda4111d3a6a13cd40bb04fe773884db1d24e4 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 6 Sep 2024 18:06:51 +0000 Subject: [PATCH 21/46] formatting --- cpp/src/io/json/column_tree_construction.cu | 102 +++++++++++--------- 1 file changed, 57 insertions(+), 45 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index ea14ac885e2..07913787c3f 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -69,41 +69,50 @@ void print(device_span d_vec, std::string name, rmm::cuda_stream_view s std::cout << std::endl; } -template -void max_row_offsets_col_categories(InputIterator1 keys_first, InputIterator1 keys_last, InputIterator2 values_first, OutputIterator1 keys_output, OutputIterator2 values_output, rmm::cuda_stream_view stream) { - thrust::reduce_by_key( - rmm::exec_policy_nosync(stream), - keys_first, - keys_last, - values_first, - keys_output, - values_output, - thrust::equal_to(), - [] __device__(auto a, auto b) { - auto row_offset_a = thrust::get<0>(a); - auto row_offset_b = thrust::get<0>(b); - auto type_a = thrust::get<1>(a); - auto type_b = thrust::get<1>(b); - - NodeT ctg; - auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); - auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); - // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) - // *+*=*, v+v=v - if (type_a == type_b) { - ctg = type_a; - } else if (is_a_leaf) { - // *+v=*, N+V=N - // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR - ctg = (type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b)); - } else if (is_b_leaf) { - ctg = (type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a)); - } else - ctg = NC_ERR; - - thrust::maximum row_offset_op; - return thrust::make_pair(row_offset_op(row_offset_a, row_offset_b), ctg); - }); +template +void max_row_offsets_col_categories(InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + rmm::cuda_stream_view stream) +{ + thrust::reduce_by_key(rmm::exec_policy_nosync(stream), + keys_first, + keys_last, + values_first, + keys_output, + values_output, + thrust::equal_to(), + [] __device__(auto a, auto b) { + auto row_offset_a = thrust::get<0>(a); + auto row_offset_b = thrust::get<0>(b); + auto type_a = thrust::get<1>(a); + auto type_b = thrust::get<1>(b); + + NodeT ctg; + auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); + auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); + // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) + // *+*=*, v+v=v + if (type_a == type_b) { + ctg = type_a; + } else if (is_a_leaf) { + // *+v=*, N+V=N + // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + + // STR = STR + ctg = (type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b)); + } else if (is_b_leaf) { + ctg = (type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a)); + } else + ctg = NC_ERR; + + thrust::maximum row_offset_op; + return thrust::make_pair(row_offset_op(row_offset_a, row_offset_b), ctg); + }); } namespace experimental::detail { @@ -202,13 +211,15 @@ std::tuple reduce_to_column_tree( rmm::device_uvector max_row_offsets(num_columns, stream); rmm::device_uvector column_categories(num_columns, stream); max_row_offsets_col_categories( - level_ordered_node_ids.begin(), - level_ordered_node_ids.end(), - thrust::make_zip_iterator(thrust::make_permutation_iterator(row_offsets.begin(), level_ordered_node_ids.begin()), thrust::make_permutation_iterator(tree.node_categories.begin(), level_ordered_node_ids.begin())), + level_ordered_node_ids.begin(), + level_ordered_node_ids.end(), + thrust::make_zip_iterator( + thrust::make_permutation_iterator(row_offsets.begin(), level_ordered_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_categories.begin(), + level_ordered_node_ids.begin())), thrust::make_discard_iterator(), thrust::make_zip_iterator(max_row_offsets.begin(), column_categories.begin()), - stream - ); + stream); // 4. construct parent_col_ids using permutation iterator rmm::device_uvector parent_col_ids(num_columns, stream); thrust::transform_output_iterator parent_col_ids_it( @@ -384,13 +395,14 @@ reduce_to_column_tree(tree_meta_t& tree, // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) rmm::device_uvector column_categories(num_columns, stream); max_row_offsets_col_categories( - sorted_col_ids.begin(), - sorted_col_ids.end(), - thrust::make_zip_iterator(thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()), thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin())), + sorted_col_ids.begin(), + sorted_col_ids.end(), + thrust::make_zip_iterator( + thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin())), unique_col_ids.begin(), thrust::make_zip_iterator(max_row_offsets.begin(), column_categories.begin()), - stream - ); + stream); // 4. unique_copy parent_node_ids, ranges rmm::device_uvector column_levels(0, stream); // not required From bf4f191104bfb029f0a0289aabcf4cb93c9db4b6 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 6 Sep 2024 20:58:35 +0000 Subject: [PATCH 22/46] addressing reviews - 1 --- cpp/src/io/json/column_tree_construction.cu | 17 ++--------------- cpp/tests/io/json/json_tree_csr.cu | 2 +- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 07913787c3f..ec00066965d 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -56,19 +56,6 @@ namespace cudf::io::json { using row_offset_t = size_type; -template -void print(device_span d_vec, std::string name, rmm::cuda_stream_view stream) -{ - stream.synchronize(); - auto h_vec = cudf::detail::make_std_vector_async(d_vec, stream); - stream.synchronize(); - std::cout << name << " = "; - for (auto e : h_vec) { - std::cout << e << " "; - } - std::cout << std::endl; -} - template reduce_to_column_tree( rmm::device_uvector max_row_offsets(num_columns, stream); rmm::device_uvector column_categories(num_columns, stream); max_row_offsets_col_categories( - level_ordered_node_ids.begin(), - level_ordered_node_ids.end(), + level_ordered_col_ids.begin(), + level_ordered_col_ids.end(), thrust::make_zip_iterator( thrust::make_permutation_iterator(row_offsets.begin(), level_ordered_node_ids.begin()), thrust::make_permutation_iterator(tree.node_categories.begin(), diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index bc135e041d0..7414181f05f 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -185,5 +185,5 @@ TEST_F(JsonColumnTreeTests, SimpleLines) auto iseq = check_equality( d_column_tree, d_max_row_offsets, d_column_tree_csr, d_column_tree_properties, stream); // assert equality between csr and meta formats - assert(iseq == true); + ASSERT_TRUE(iseq); } From 55e943aa91e57fec93264293f1326c9c86752c68 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 6 Sep 2024 21:25:46 +0000 Subject: [PATCH 23/46] addressing reviews - 2 --- cpp/src/io/json/column_tree_construction.cu | 16 ++++++++-------- cpp/src/io/json/nested_json.hpp | 12 ++++++------ cpp/tests/io/json/json_tree_csr.cu | 11 +---------- 3 files changed, 15 insertions(+), 24 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index ec00066965d..036b8693c36 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -106,7 +106,7 @@ namespace experimental::detail { struct level_ordering { device_span node_levels; - device_span col_ids; + device_span col_ids; __device__ bool operator()(NodeIndexT lhs_node_id, NodeIndexT rhs_node_id) const { return (node_levels[lhs_node_id] < node_levels[rhs_node_id]) || @@ -116,7 +116,7 @@ struct level_ordering { }; struct parent_nodeids_to_colids { - device_span col_ids; + device_span col_ids; device_span rev_mapped_col_ids; __device__ auto operator()(NodeIndexT parent_node_id) -> NodeIndexT { @@ -141,8 +141,8 @@ struct parent_nodeids_to_colids { */ std::tuple reduce_to_column_tree( tree_meta_t& tree, - device_span col_ids, - device_span row_offsets, + device_span col_ids, + device_span row_offsets, bool is_array_of_arrays, NodeIndexT const row_array_parent_col_id, rmm::cuda_stream_view stream) @@ -363,10 +363,10 @@ namespace detail { */ std::tuple, rmm::device_uvector> reduce_to_column_tree(tree_meta_t& tree, - device_span original_col_ids, - device_span sorted_col_ids, - device_span ordered_node_ids, - device_span row_offsets, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, bool is_array_of_arrays, NodeIndexT const row_array_parent_col_id, rmm::cuda_stream_view stream) diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 47d3a85e62f..9f93a48ae66 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -247,8 +247,8 @@ namespace detail { CUDF_EXPORT std::tuple reduce_to_column_tree( tree_meta_t& tree, - device_span original_col_ids, - device_span row_offsets, + device_span original_col_ids, + device_span row_offsets, bool is_array_of_arrays, NodeIndexT const row_array_parent_col_id, rmm::cuda_stream_view stream); @@ -373,10 +373,10 @@ get_array_children_indices(TreeDepthT row_array_children_level, CUDF_EXPORT std::tuple, rmm::device_uvector> reduce_to_column_tree(tree_meta_t& tree, - device_span original_col_ids, - device_span sorted_col_ids, - device_span ordered_node_ids, - device_span row_offsets, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, bool is_array_of_arrays, NodeIndexT const row_array_parent_col_id, rmm::cuda_stream_view stream); diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index 7414181f05f..f15b392dc06 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -54,17 +54,8 @@ struct h_column_tree { std::vector column_ids; }; -template -void print(std::string str, std::vector& vec) -{ - std::cout << str << " = "; - for (size_t i = 0; i < vec.size(); i++) - std::cout << vec[i] << " "; - std::cout << std::endl; -} - bool check_equality(cuio_json::tree_meta_t& d_a, - rmm::device_uvector& d_a_max_row_offsets, + cudf::device_span d_a_max_row_offsets, cuio_json::experimental::csr& d_b_csr, cuio_json::experimental::column_tree_properties& d_b_ctp, rmm::cuda_stream_view stream) From 4e005265609b10cb5c472bd28e1ecab1fc071d62 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 6 Sep 2024 22:54:39 +0000 Subject: [PATCH 24/46] tsk tsk should have run compute sanitizer sooner --- cpp/src/io/json/column_tree_construction.cu | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 036b8693c36..1f9744a4ae4 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -97,8 +97,8 @@ void max_row_offsets_col_categories(InputIterator1 keys_first, } else ctg = NC_ERR; - thrust::maximum row_offset_op; - return thrust::make_pair(row_offset_op(row_offset_a, row_offset_b), ctg); + return thrust::make_pair( + thrust::maximum{}(row_offset_a, row_offset_b), ctg); }); } @@ -149,10 +149,9 @@ std::tuple reduce_to_column_tree( { CUDF_FUNC_RANGE(); - rmm::device_uvector level_ordered_col_ids(col_ids.size(), stream); + auto level_ordered_col_ids = cudf::detail::make_device_uvector_async( + col_ids, stream, rmm::mr::get_current_device_resource()); rmm::device_uvector level_ordered_node_ids(col_ids.size(), stream); - thrust::copy( - rmm::exec_policy_nosync(stream), col_ids.begin(), col_ids.end(), level_ordered_col_ids.begin()); thrust::sequence( rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), level_ordered_node_ids.end()); @@ -172,7 +171,7 @@ std::tuple reduce_to_column_tree( rmm::device_uvector rev_mapped_col_ids(num_columns, stream); thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), - level_ordered_node_ids.end(), + level_ordered_col_ids.end(), level_ordered_node_ids.begin(), mapped_col_ids.begin(), level_ordered_unique_node_ids.begin()); From ca7a5f35daabf5ed9b900994b71908726628f000 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 6 Sep 2024 23:12:04 +0000 Subject: [PATCH 25/46] addressing reviews - 3 --- cpp/src/io/json/column_tree_construction.cu | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 1f9744a4ae4..6eeab1c55fb 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -178,11 +178,8 @@ std::tuple reduce_to_column_tree( auto* dev_num_levels_ptr = thrust::max_element( rmm::exec_policy_nosync(stream), tree.node_levels.begin(), tree.node_levels.end()); - rmm::device_uvector mapped_col_ids_copy(num_columns, stream); - thrust::copy(rmm::exec_policy_nosync(stream), - mapped_col_ids.begin(), - mapped_col_ids.end(), - mapped_col_ids_copy.begin()); + auto mapped_col_ids_copy = cudf::detail::make_device_uvector_async( + mapped_col_ids, stream, rmm::mr::get_current_device_resource()); thrust::sequence( rmm::exec_policy_nosync(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end()); thrust::sort_by_key(rmm::exec_policy_nosync(stream), @@ -225,8 +222,8 @@ std::tuple reduce_to_column_tree( iii. col idx[coln] = adj_coln U {parent_col_id[coln]} */ - rmm::device_uvector rowidx(num_columns + 1, stream); - thrust::fill(rmm::exec_policy_nosync(stream), rowidx.begin(), rowidx.end(), 0); + auto rowidx = cudf::detail::make_zeroed_device_uvector_async( + static_cast(num_columns + 1), stream, rmm::mr::get_current_device_resource()); // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) // children adjacency auto num_non_leaf_columns = thrust::unique_count( From 14664dbed234a4f0cbe5d8b43ebe999ee73dcb1d Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 6 Sep 2024 23:21:30 +0000 Subject: [PATCH 26/46] addressing reviews - 4 --- cpp/src/io/json/nested_json.hpp | 3 --- cpp/tests/io/json/json_tree_csr.cu | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 9f93a48ae66..2d40cb3c0d5 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -23,11 +23,8 @@ #include #include -#include #include -#include - #include #include diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index f15b392dc06..a00b231ed56 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -30,6 +30,7 @@ #include #include +#include #include From 5f4aca61b6af4f6c321b0b0951ed0995fa43fc7c Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 13 Sep 2024 00:08:28 +0000 Subject: [PATCH 27/46] adding more tests; debugging on the way --- cpp/src/io/json/column_tree_construction.cu | 69 ++++++++++++++-- cpp/tests/io/json/json_tree_csr.cu | 89 +++++++++++++++++---- 2 files changed, 136 insertions(+), 22 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 6eeab1c55fb..0b65a01f621 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -25,12 +25,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -102,16 +102,31 @@ void max_row_offsets_col_categories(InputIterator1 keys_first, }); } +template +void print(device_span d_vec, std::string name, rmm::cuda_stream_view stream) +{ + stream.synchronize(); + auto h_vec = cudf::detail::make_std_vector_sync(d_vec, stream); + std::cout << name << " = "; + for (auto e : h_vec) { + std::cout << e << " "; + } + std::cout << std::endl; +} + namespace experimental::detail { struct level_ordering { device_span node_levels; device_span col_ids; + device_span parent_node_ids; __device__ bool operator()(NodeIndexT lhs_node_id, NodeIndexT rhs_node_id) const { return (node_levels[lhs_node_id] < node_levels[rhs_node_id]) || (node_levels[lhs_node_id] == node_levels[rhs_node_id] && - col_ids[lhs_node_id] < col_ids[rhs_node_id]); + col_ids[parent_node_ids[lhs_node_id]] < col_ids[parent_node_ids[rhs_node_id]]) || + (node_levels[lhs_node_id] == node_levels[rhs_node_id] && + col_ids[parent_node_ids[lhs_node_id]] == col_ids[parent_node_ids[rhs_node_id]] && col_ids[lhs_node_id] < col_ids[rhs_node_id]); } }; @@ -150,7 +165,7 @@ std::tuple reduce_to_column_tree( CUDF_FUNC_RANGE(); auto level_ordered_col_ids = cudf::detail::make_device_uvector_async( - col_ids, stream, rmm::mr::get_current_device_resource()); + col_ids, stream, cudf::get_current_device_resource_ref()); rmm::device_uvector level_ordered_node_ids(col_ids.size(), stream); thrust::sequence( rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), level_ordered_node_ids.end()); @@ -160,7 +175,13 @@ std::tuple reduce_to_column_tree( level_ordered_node_ids.begin(), level_ordered_node_ids.end(), level_ordered_col_ids.begin(), - level_ordering{tree.node_levels, col_ids}); + level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}); + + /* + print(level_ordered_node_ids, "h_level_ordered_node_ids", stream); + print(col_ids, "h_col_ids", stream); + print(level_ordered_col_ids, "h_level_ordered_col_ids", stream); + */ // 1. get the number of columns in tree, mapping between node tree col ids and csr col ids, and // the node id of first row in each column @@ -179,7 +200,7 @@ std::tuple reduce_to_column_tree( rmm::exec_policy_nosync(stream), tree.node_levels.begin(), tree.node_levels.end()); auto mapped_col_ids_copy = cudf::detail::make_device_uvector_async( - mapped_col_ids, stream, rmm::mr::get_current_device_resource()); + mapped_col_ids, stream, cudf::get_current_device_resource_ref()); thrust::sequence( rmm::exec_policy_nosync(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end()); thrust::sort_by_key(rmm::exec_policy_nosync(stream), @@ -187,6 +208,10 @@ std::tuple reduce_to_column_tree( mapped_col_ids_copy.end(), rev_mapped_col_ids.begin()); + print(mapped_col_ids, "h_mapped_col_ids", stream); + print(level_ordered_unique_node_ids, "h_level_ordered_unique_node_ids", stream); + print(rev_mapped_col_ids, "h_rev_mapped_col_ids", stream); + // 2. maximum number of rows per column: computed with reduce_by_key {col_id}, {row_offset}, max. // 3. category for each column node by aggregating all nodes in node tree corresponding to same // column: @@ -223,18 +248,28 @@ std::tuple reduce_to_column_tree( */ auto rowidx = cudf::detail::make_zeroed_device_uvector_async( - static_cast(num_columns + 1), stream, rmm::mr::get_current_device_resource()); + static_cast(num_columns + 1), stream, cudf::get_current_device_resource_ref()); // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) // children adjacency + + print(parent_col_ids, "h_parent_col_ids", stream); + auto num_non_leaf_columns = thrust::unique_count( rmm::exec_policy_nosync(stream), parent_col_ids.begin() + 1, parent_col_ids.end()); + rmm::device_uvector non_leaf_nodes(num_non_leaf_columns, stream); + rmm::device_uvector non_leaf_nodes_children(num_non_leaf_columns, stream); thrust::reduce_by_key(rmm::exec_policy_nosync(stream), parent_col_ids.begin() + 1, parent_col_ids.end(), thrust::make_constant_iterator(1), - thrust::make_discard_iterator(), - rowidx.begin() + 1, + non_leaf_nodes.begin(), + non_leaf_nodes_children.begin(), thrust::equal_to()); + + thrust::scatter(rmm::exec_policy_nosync(stream), non_leaf_nodes_children.begin(), non_leaf_nodes_children.end(), non_leaf_nodes.begin(), rowidx.begin() + 1); + + print(rowidx, "h_rowidx", stream); + thrust::transform_inclusive_scan( rmm::exec_policy_nosync(stream), thrust::make_zip_iterator(thrust::make_counting_iterator(1), rowidx.begin() + 1), @@ -247,6 +282,8 @@ std::tuple reduce_to_column_tree( }), thrust::plus{}); + print(rowidx, "h_rowidx", stream); + rmm::device_uvector colidx((num_columns - 1) * 2, stream); // Skip the parent of root node thrust::scatter(rmm::exec_policy_nosync(stream), @@ -279,6 +316,8 @@ std::tuple reduce_to_column_tree( map.begin(), colidx.begin()); + print(max_row_offsets, "h_max_row_offsets", stream); + // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. // create list's children max_row_offsets array @@ -372,6 +411,12 @@ reduce_to_column_tree(tree_meta_t& tree, auto const num_columns = thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end()); + /* + print(original_col_ids, "h_original_col_ids", stream); + print(sorted_col_ids, "h_sorted_col_ids", stream); + print(ordered_node_ids, "h_ordered_node_ids", stream); + */ + // 2. reduce_by_key {col_id}, {row_offset}, max. rmm::device_uvector unique_col_ids(num_columns, stream); rmm::device_uvector max_row_offsets(num_columns, stream); @@ -399,6 +444,12 @@ reduce_to_column_tree(tree_meta_t& tree, ordered_node_ids.begin(), thrust::make_discard_iterator(), unique_node_ids.begin()); + + /* + print(unique_col_ids, "h_unique_col_ids", stream); + print(unique_node_ids, "h_unique_node_ids", stream); + */ + thrust::copy_n( rmm::exec_policy(stream), thrust::make_zip_iterator( @@ -433,6 +484,8 @@ reduce_to_column_tree(tree_meta_t& tree, (is_array_of_arrays == true && parent_col_id == row_array_parent_col_id); }; + print(max_row_offsets, "old h_max_row_offsets", stream); + // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. // create list's children max_row_offsets array. (initialize to zero) diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index a00b231ed56..6be8bb7bc04 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -80,38 +80,62 @@ bool check_equality(cuio_json::tree_meta_t& d_a, stream.synchronize(); auto num_nodes = a.parent_node_ids.size(); - if (b.rowidx.size() != num_nodes + 1) { return false; } + if (b.rowidx.size() != num_nodes + 1) { std::printf("1\n"); return false; } for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { auto v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { return false; } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { std::printf("2\n"); return false; } } + + std::printf("rowidx = \n"); + for(size_t u = 0; u < num_nodes; u++) + std::printf("%d ", b.rowidx[u]); + std::printf("\n"); + std::printf("colidx = \n"); + for(size_t u = 0; u < num_nodes; u++) { + for(int pos = b.rowidx[u]; pos < b.rowidx[u+1]; pos++) + std::printf("%d ", b.colidx[pos]); + } + std::printf("\n"); + std::printf("a.parent_node_ids = \n"); + for(size_t u = 0; u < num_nodes; u++) + std::printf("%d ", a.parent_node_ids[u]); + std::printf("\nb.column_ids = \n"); + for(size_t u = 0; u < num_nodes; u++) + std::printf("%d ", b.column_ids[u]); + std::printf("\n"); + for (size_t u = 1; u < num_nodes; u++) { auto v = b.colidx[b.rowidx[u]]; - if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { return false; } + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { std::printf("3\n"); return false; } for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { return false; } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { + std::printf("u = %lu, adj_size = %d\n", u, b.rowidx[u+1] - b.rowidx[u]); + std::printf("4: b.column_ids[%lu] = %d, b.column_ids[%d] = %d, a.parent_node_ids[b.column_ids[%d]] = %d\n", u, b.column_ids[u], v, b.column_ids[v], v, a.parent_node_ids[b.column_ids[v]]); + return false; } } } for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; } + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { std::printf("5\n"); return false; } } + + std::printf("permuted a_max_row_offsets = "); + for(size_t u = 0; u < num_nodes; u++) + std::printf("%d ", a_max_row_offsets[b.column_ids[u]]); + std::printf("\nb_max_row_offsets = "); + for(size_t u = 0; u < num_nodes; u++) + std::printf("%d ", b_max_row_offsets[u]); + std::printf("\n"); + for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; } + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { std::printf("6\n"); return false; } } return true; } -struct JsonColumnTreeTests : public cudf::test::BaseFixture {}; - -TEST_F(JsonColumnTreeTests, SimpleLines) -{ +void run_test(std::string const &input) { auto const stream = cudf::get_default_stream(); - std::string const input = - R"( {} - { "a": { "y" : 6, "z": [] }} - { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )"; // Prepare input & output buffers cudf::string_scalar d_scalar(input, true, stream); auto d_input = cudf::device_span{d_scalar.data(), static_cast(d_scalar.size())}; @@ -179,3 +203,40 @@ TEST_F(JsonColumnTreeTests, SimpleLines) // assert equality between csr and meta formats ASSERT_TRUE(iseq); } + +struct JsonColumnTreeTests : public cudf::test::BaseFixture {}; + +TEST_F(JsonColumnTreeTests, SimpleLines1) +{ + std::string const input = + R"( {} + { "a": { "y" : 6, "z": [] }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )"; // Prepare input & output buffers + run_test(input); +} + +TEST_F(JsonColumnTreeTests, SimpleLines2) +{ + std::string const input = + R"( {} + {} + { "a": { "y" : 6, "z": [] }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} + { "a": { "y" : 6, "z": [] }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} + { "a": { "y" : 6, "z": [] }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} + { "a": { "y" : 6, "z": [] }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )"; + run_test(input); +} + +TEST_F(JsonColumnTreeTests, SimpleLines3) +{ + std::string const input = R"( + { "Root": { "Key": [ { "EE": "A" } ] } } + { "Root": { "Key": { } } } + { "Root": { "Key": [{ "YY": 1}] } } + )"; + run_test(input); +} From e6a99418b7536b5308d8a69ecc2a73f73710e35c Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 13 Sep 2024 00:09:23 +0000 Subject: [PATCH 28/46] formatting --- cpp/src/io/json/column_tree_construction.cu | 11 +++- cpp/tests/io/json/json_tree_csr.cu | 65 ++++++++++++++------- 2 files changed, 53 insertions(+), 23 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 0b65a01f621..ce1bc240bed 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -126,7 +126,8 @@ struct level_ordering { (node_levels[lhs_node_id] == node_levels[rhs_node_id] && col_ids[parent_node_ids[lhs_node_id]] < col_ids[parent_node_ids[rhs_node_id]]) || (node_levels[lhs_node_id] == node_levels[rhs_node_id] && - col_ids[parent_node_ids[lhs_node_id]] == col_ids[parent_node_ids[rhs_node_id]] && col_ids[lhs_node_id] < col_ids[rhs_node_id]); + col_ids[parent_node_ids[lhs_node_id]] == col_ids[parent_node_ids[rhs_node_id]] && + col_ids[lhs_node_id] < col_ids[rhs_node_id]); } }; @@ -251,7 +252,7 @@ std::tuple reduce_to_column_tree( static_cast(num_columns + 1), stream, cudf::get_current_device_resource_ref()); // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) // children adjacency - + print(parent_col_ids, "h_parent_col_ids", stream); auto num_non_leaf_columns = thrust::unique_count( @@ -266,7 +267,11 @@ std::tuple reduce_to_column_tree( non_leaf_nodes_children.begin(), thrust::equal_to()); - thrust::scatter(rmm::exec_policy_nosync(stream), non_leaf_nodes_children.begin(), non_leaf_nodes_children.end(), non_leaf_nodes.begin(), rowidx.begin() + 1); + thrust::scatter(rmm::exec_policy_nosync(stream), + non_leaf_nodes_children.begin(), + non_leaf_nodes_children.end(), + non_leaf_nodes.begin(), + rowidx.begin() + 1); print(rowidx, "h_rowidx", stream); diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index 6be8bb7bc04..de0ca89663a 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -80,61 +80,86 @@ bool check_equality(cuio_json::tree_meta_t& d_a, stream.synchronize(); auto num_nodes = a.parent_node_ids.size(); - if (b.rowidx.size() != num_nodes + 1) { std::printf("1\n"); return false; } + if (b.rowidx.size() != num_nodes + 1) { + std::printf("1\n"); + return false; + } for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { auto v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { std::printf("2\n"); return false; } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { + std::printf("2\n"); + return false; + } } std::printf("rowidx = \n"); - for(size_t u = 0; u < num_nodes; u++) + for (size_t u = 0; u < num_nodes; u++) std::printf("%d ", b.rowidx[u]); std::printf("\n"); std::printf("colidx = \n"); - for(size_t u = 0; u < num_nodes; u++) { - for(int pos = b.rowidx[u]; pos < b.rowidx[u+1]; pos++) + for (size_t u = 0; u < num_nodes; u++) { + for (int pos = b.rowidx[u]; pos < b.rowidx[u + 1]; pos++) std::printf("%d ", b.colidx[pos]); } std::printf("\n"); std::printf("a.parent_node_ids = \n"); - for(size_t u = 0; u < num_nodes; u++) + for (size_t u = 0; u < num_nodes; u++) std::printf("%d ", a.parent_node_ids[u]); std::printf("\nb.column_ids = \n"); - for(size_t u = 0; u < num_nodes; u++) + for (size_t u = 0; u < num_nodes; u++) std::printf("%d ", b.column_ids[u]); std::printf("\n"); for (size_t u = 1; u < num_nodes; u++) { auto v = b.colidx[b.rowidx[u]]; - if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { std::printf("3\n"); return false; } + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { + std::printf("3\n"); + return false; + } for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { - std::printf("u = %lu, adj_size = %d\n", u, b.rowidx[u+1] - b.rowidx[u]); - std::printf("4: b.column_ids[%lu] = %d, b.column_ids[%d] = %d, a.parent_node_ids[b.column_ids[%d]] = %d\n", u, b.column_ids[u], v, b.column_ids[v], v, a.parent_node_ids[b.column_ids[v]]); - return false; } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { + std::printf("u = %lu, adj_size = %d\n", u, b.rowidx[u + 1] - b.rowidx[u]); + std::printf( + "4: b.column_ids[%lu] = %d, b.column_ids[%d] = %d, a.parent_node_ids[b.column_ids[%d]] = " + "%d\n", + u, + b.column_ids[u], + v, + b.column_ids[v], + v, + a.parent_node_ids[b.column_ids[v]]); + return false; + } } } for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) { std::printf("5\n"); return false; } + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { + std::printf("5\n"); + return false; + } } std::printf("permuted a_max_row_offsets = "); - for(size_t u = 0; u < num_nodes; u++) + for (size_t u = 0; u < num_nodes; u++) std::printf("%d ", a_max_row_offsets[b.column_ids[u]]); std::printf("\nb_max_row_offsets = "); - for(size_t u = 0; u < num_nodes; u++) + for (size_t u = 0; u < num_nodes; u++) std::printf("%d ", b_max_row_offsets[u]); std::printf("\n"); for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { std::printf("6\n"); return false; } + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { + std::printf("6\n"); + return false; + } } return true; } -void run_test(std::string const &input) { +void run_test(std::string const& input) +{ auto const stream = cudf::get_default_stream(); cudf::string_scalar d_scalar(input, true, stream); auto d_input = cudf::device_span{d_scalar.data(), @@ -221,11 +246,11 @@ TEST_F(JsonColumnTreeTests, SimpleLines2) R"( {} {} { "a": { "y" : 6, "z": [] }} - { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} { "a": { "y" : 6, "z": [] }} - { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} { "a": { "y" : 6, "z": [] }} - { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} { "a": { "y" : 6, "z": [] }} { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )"; run_test(input); From 82c9ebecc5fb52e64c5ecc149c32c6aadbdad36d Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 17 Sep 2024 18:46:22 +0000 Subject: [PATCH 29/46] added more tests; fixed bugs --- cpp/src/io/json/column_tree_construction.cu | 72 +++++++++-------- cpp/tests/io/json/json_tree_csr.cu | 90 +++++++++++---------- 2 files changed, 89 insertions(+), 73 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index ce1bc240bed..ae2406ae7f9 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -102,6 +102,7 @@ void max_row_offsets_col_categories(InputIterator1 keys_first, }); } +// debug printing template void print(device_span d_vec, std::string name, rmm::cuda_stream_view stream) { @@ -178,11 +179,11 @@ std::tuple reduce_to_column_tree( level_ordered_col_ids.begin(), level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}); - /* +#ifdef CSR_DEBUG_PRINT print(level_ordered_node_ids, "h_level_ordered_node_ids", stream); print(col_ids, "h_col_ids", stream); print(level_ordered_col_ids, "h_level_ordered_col_ids", stream); - */ +#endif // 1. get the number of columns in tree, mapping between node tree col ids and csr col ids, and // the node id of first row in each column @@ -209,9 +210,11 @@ std::tuple reduce_to_column_tree( mapped_col_ids_copy.end(), rev_mapped_col_ids.begin()); +#ifdef CSR_DEBUG_PRINT print(mapped_col_ids, "h_mapped_col_ids", stream); print(level_ordered_unique_node_ids, "h_level_ordered_unique_node_ids", stream); print(rev_mapped_col_ids, "h_rev_mapped_col_ids", stream); +#endif // 2. maximum number of rows per column: computed with reduce_by_key {col_id}, {row_offset}, max. // 3. category for each column node by aggregating all nodes in node tree corresponding to same @@ -253,7 +256,9 @@ std::tuple reduce_to_column_tree( // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) // children adjacency +#ifdef CSR_DEBUG_PRINT print(parent_col_ids, "h_parent_col_ids", stream); +#endif auto num_non_leaf_columns = thrust::unique_count( rmm::exec_policy_nosync(stream), parent_col_ids.begin() + 1, parent_col_ids.end()); @@ -273,8 +278,6 @@ std::tuple reduce_to_column_tree( non_leaf_nodes.begin(), rowidx.begin() + 1); - print(rowidx, "h_rowidx", stream); - thrust::transform_inclusive_scan( rmm::exec_policy_nosync(stream), thrust::make_zip_iterator(thrust::make_counting_iterator(1), rowidx.begin() + 1), @@ -287,15 +290,8 @@ std::tuple reduce_to_column_tree( }), thrust::plus{}); - print(rowidx, "h_rowidx", stream); - rmm::device_uvector colidx((num_columns - 1) * 2, stream); - // Skip the parent of root node - thrust::scatter(rmm::exec_policy_nosync(stream), - parent_col_ids.begin() + 1, - parent_col_ids.end(), - rowidx.begin() + 1, - colidx.begin()); + thrust::fill(rmm::exec_policy(stream), colidx.begin(), colidx.end(), -1); // excluding root node, construct scatter map rmm::device_uvector map(num_columns - 1, stream); thrust::inclusive_scan_by_key(rmm::exec_policy_nosync(stream), @@ -321,15 +317,23 @@ std::tuple reduce_to_column_tree( map.begin(), colidx.begin()); +#ifdef CSR_DEBUG_PRINT + print(colidx, "h_pre_colidx", stream); print(max_row_offsets, "h_max_row_offsets", stream); +#endif // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. // create list's children max_row_offsets array // gather the max_row_offsets from children row offset array. { - auto max_row_offsets_it = - thrust::make_permutation_iterator(max_row_offsets.begin(), colidx.begin()); + auto max_row_offsets_it = thrust::make_transform_iterator(thrust::make_counting_iterator(0), + cuda::proclaim_return_type( + [colidx = colidx.begin(), + max_row_offsets = max_row_offsets.begin()] __device__ (size_t i) { + if(colidx[i] == -1) return -1; + else return max_row_offsets[colidx[i]]; + })); rmm::device_uvector max_children_max_row_offsets(num_columns, stream); size_t temp_storage_bytes = 0; cub::DeviceSegmentedReduce::Max(nullptr, @@ -350,6 +354,14 @@ std::tuple reduce_to_column_tree( rowidx.begin() + 1, stream.value()); + // Skip the parent of root node + thrust::scatter(rmm::exec_policy_nosync(stream), + parent_col_ids.begin() + 1, + parent_col_ids.end(), + rowidx.begin() + 1, + colidx.begin()); + + rmm::device_uvector list_ancestors(num_columns, stream); thrust::for_each_n( rmm::exec_policy_nosync(stream), @@ -361,21 +373,17 @@ std::tuple reduce_to_column_tree( dev_num_levels_ptr, list_ancestors = list_ancestors.begin()] __device__(NodeIndexT node) { auto num_levels = *dev_num_levels_ptr; - list_ancestors[node] = node; - for (int level = 0; level <= num_levels; level++) { - if (list_ancestors[node] > 0) list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; - if (list_ancestors[node] == 0 || column_categories[list_ancestors[node]] == NC_LIST) - break; + list_ancestors[node] = colidx[rowidx[node]]; + for (int level = 0; level <= num_levels && list_ancestors[node] && column_categories[list_ancestors[node]] != NC_LIST; level++) { + list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; } }); - thrust::gather_if(rmm::exec_policy_nosync(stream), - list_ancestors.begin(), - list_ancestors.end(), - list_ancestors.begin(), - max_children_max_row_offsets.begin(), - max_row_offsets.begin(), - [] __device__(auto ancestor) { return ancestor != -1; }); + thrust::gather(rmm::exec_policy_nosync(stream), + list_ancestors.begin(), + list_ancestors.end(), + max_children_max_row_offsets.begin(), + max_row_offsets.begin()); } return std::tuple{ @@ -416,11 +424,11 @@ reduce_to_column_tree(tree_meta_t& tree, auto const num_columns = thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end()); - /* +#ifdef CSR_DEBUG_PRINT print(original_col_ids, "h_original_col_ids", stream); print(sorted_col_ids, "h_sorted_col_ids", stream); print(ordered_node_ids, "h_ordered_node_ids", stream); - */ +#endif // 2. reduce_by_key {col_id}, {row_offset}, max. rmm::device_uvector unique_col_ids(num_columns, stream); @@ -450,10 +458,10 @@ reduce_to_column_tree(tree_meta_t& tree, thrust::make_discard_iterator(), unique_node_ids.begin()); - /* +#ifdef CSR_DEBUG_PRINT print(unique_col_ids, "h_unique_col_ids", stream); print(unique_node_ids, "h_unique_node_ids", stream); - */ +#endif thrust::copy_n( rmm::exec_policy(stream), @@ -489,8 +497,6 @@ reduce_to_column_tree(tree_meta_t& tree, (is_array_of_arrays == true && parent_col_id == row_array_parent_col_id); }; - print(max_row_offsets, "old h_max_row_offsets", stream); - // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. // create list's children max_row_offsets array. (initialize to zero) @@ -518,6 +524,7 @@ reduce_to_column_tree(tree_meta_t& tree, ref.fetch_max(max_row_offsets[col_id], cuda::std::memory_order_relaxed); } }); + thrust::gather_if( rmm::exec_policy(stream), parent_col_ids.begin(), @@ -529,6 +536,7 @@ reduce_to_column_tree(tree_meta_t& tree, return parent_col_id != parent_node_sentinel and column_categories[parent_col_id] == node_t::NC_LIST; }); + } // copy lists' max_row_offsets to children. diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index de0ca89663a..27ed0e00bbb 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -55,6 +55,17 @@ struct h_column_tree { std::vector column_ids; }; +// debug printing +template +void print(cudf::host_span vec, std::string name, rmm::cuda_stream_view stream) +{ + std::cout << name << " = "; + for (auto e : vec) { + std::cout << e << " "; + } + std::cout << std::endl; +} + bool check_equality(cuio_json::tree_meta_t& d_a, cudf::device_span d_a_max_row_offsets, cuio_json::experimental::csr& d_b_csr, @@ -81,77 +92,36 @@ bool check_equality(cuio_json::tree_meta_t& d_a, auto num_nodes = a.parent_node_ids.size(); if (b.rowidx.size() != num_nodes + 1) { - std::printf("1\n"); return false; } for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { auto v = b.colidx[pos]; if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { - std::printf("2\n"); return false; } } - std::printf("rowidx = \n"); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", b.rowidx[u]); - std::printf("\n"); - std::printf("colidx = \n"); - for (size_t u = 0; u < num_nodes; u++) { - for (int pos = b.rowidx[u]; pos < b.rowidx[u + 1]; pos++) - std::printf("%d ", b.colidx[pos]); - } - std::printf("\n"); - std::printf("a.parent_node_ids = \n"); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", a.parent_node_ids[u]); - std::printf("\nb.column_ids = \n"); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", b.column_ids[u]); - std::printf("\n"); - for (size_t u = 1; u < num_nodes; u++) { auto v = b.colidx[b.rowidx[u]]; if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { - std::printf("3\n"); return false; } for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { v = b.colidx[pos]; if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { - std::printf("u = %lu, adj_size = %d\n", u, b.rowidx[u + 1] - b.rowidx[u]); - std::printf( - "4: b.column_ids[%lu] = %d, b.column_ids[%d] = %d, a.parent_node_ids[b.column_ids[%d]] = " - "%d\n", - u, - b.column_ids[u], - v, - b.column_ids[v], - v, - a.parent_node_ids[b.column_ids[v]]); return false; } } } for (size_t u = 0; u < num_nodes; u++) { if (a.node_categories[b.column_ids[u]] != b.categories[u]) { - std::printf("5\n"); return false; } } - std::printf("permuted a_max_row_offsets = "); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", a_max_row_offsets[b.column_ids[u]]); - std::printf("\nb_max_row_offsets = "); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", b_max_row_offsets[u]); - std::printf("\n"); - for (size_t u = 0; u < num_nodes; u++) { if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { - std::printf("6\n"); return false; } } @@ -219,6 +189,7 @@ void run_test(std::string const& input) row_array_parent_col_id, stream); + std::printf("\n========================================================================================\n"); auto [d_column_tree_csr, d_column_tree_properties] = cudf::io::json::experimental::detail::reduce_to_column_tree( gpu_tree, gpu_col_id, gpu_row_offsets, false, row_array_parent_col_id, stream); @@ -265,3 +236,40 @@ TEST_F(JsonColumnTreeTests, SimpleLines3) )"; run_test(input); } + +TEST_F(JsonColumnTreeTests, SimpleLines4) +{ + std::string json_stringl = R"( + {"a": 1, "b": {"0": "abc", "1": [-1.]}, "c": true} + {"a": 1, "b": {"0": "abc" }, "c": false} + {"a": 1, "b": {}} + {"a": 1, "c": null} + )"; + run_test(json_stringl); +} + +TEST_F(JsonColumnTreeTests, SimpleLines5) +{ + std::string json_stringl = R"( + { "foo1": [1,2,3], "bar": 123 } + { "foo2": { "a": 1 }, "bar": 456 } + { "foo1": [1,2,3], "bar": 123 } + { "foo2": { "a": 1 }, "bar": 456 } + { "foo1": [1,2,3], "bar": 123 } + { "foo2": { "a": 1 }, "bar": 456 } + )"; + run_test(json_stringl); +} + +TEST_F(JsonColumnTreeTests, SimpleLines6) +{ + std::string json_stringl = R"( + { "foo1": [1,2,3], "bar": 123 } + { "foo2": { "a": 1 }, "bar": 456 } + { "foo1": ["123","456"], "bar": 123 } + { "foo2": { "b": 5 }, "car": 456 } + { "foo1": [1,2,3], "bar": 123 } + { "foo2": { "a": 1 }, "bar": 456 } + )"; + run_test(json_stringl); +} From 8dd6877e1432c6cb85d4da1ada795f973a6c2037 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 17 Sep 2024 18:47:24 +0000 Subject: [PATCH 30/46] formatting --- cpp/src/io/json/column_tree_construction.cu | 55 +++++++++++---------- cpp/tests/io/json/json_tree_csr.cu | 27 +++------- 2 files changed, 36 insertions(+), 46 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index ae2406ae7f9..d9ff6353ad5 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -327,13 +327,15 @@ std::tuple reduce_to_column_tree( // create list's children max_row_offsets array // gather the max_row_offsets from children row offset array. { - auto max_row_offsets_it = thrust::make_transform_iterator(thrust::make_counting_iterator(0), + auto max_row_offsets_it = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), cuda::proclaim_return_type( - [colidx = colidx.begin(), - max_row_offsets = max_row_offsets.begin()] __device__ (size_t i) { - if(colidx[i] == -1) return -1; - else return max_row_offsets[colidx[i]]; - })); + [colidx = colidx.begin(), max_row_offsets = max_row_offsets.begin()] __device__(size_t i) { + if (colidx[i] == -1) + return -1; + else + return max_row_offsets[colidx[i]]; + })); rmm::device_uvector max_children_max_row_offsets(num_columns, stream); size_t temp_storage_bytes = 0; cub::DeviceSegmentedReduce::Max(nullptr, @@ -361,29 +363,29 @@ std::tuple reduce_to_column_tree( rowidx.begin() + 1, colidx.begin()); - rmm::device_uvector list_ancestors(num_columns, stream); - thrust::for_each_n( - rmm::exec_policy_nosync(stream), - thrust::make_counting_iterator(0), - num_columns, - [rowidx = rowidx.begin(), - colidx = colidx.begin(), - column_categories = column_categories.begin(), - dev_num_levels_ptr, - list_ancestors = list_ancestors.begin()] __device__(NodeIndexT node) { - auto num_levels = *dev_num_levels_ptr; - list_ancestors[node] = colidx[rowidx[node]]; - for (int level = 0; level <= num_levels && list_ancestors[node] && column_categories[list_ancestors[node]] != NC_LIST; level++) { - list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; - } - }); + thrust::for_each_n(rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0), + num_columns, + [rowidx = rowidx.begin(), + colidx = colidx.begin(), + column_categories = column_categories.begin(), + dev_num_levels_ptr, + list_ancestors = list_ancestors.begin()] __device__(NodeIndexT node) { + auto num_levels = *dev_num_levels_ptr; + list_ancestors[node] = colidx[rowidx[node]]; + for (int level = 0; level <= num_levels && list_ancestors[node] && + column_categories[list_ancestors[node]] != NC_LIST; + level++) { + list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; + } + }); thrust::gather(rmm::exec_policy_nosync(stream), - list_ancestors.begin(), - list_ancestors.end(), - max_children_max_row_offsets.begin(), - max_row_offsets.begin()); + list_ancestors.begin(), + list_ancestors.end(), + max_children_max_row_offsets.begin(), + max_row_offsets.begin()); } return std::tuple{ @@ -536,7 +538,6 @@ reduce_to_column_tree(tree_meta_t& tree, return parent_col_id != parent_node_sentinel and column_categories[parent_col_id] == node_t::NC_LIST; }); - } // copy lists' max_row_offsets to children. diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index 27ed0e00bbb..1d4b742ccc2 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -91,39 +91,27 @@ bool check_equality(cuio_json::tree_meta_t& d_a, stream.synchronize(); auto num_nodes = a.parent_node_ids.size(); - if (b.rowidx.size() != num_nodes + 1) { - return false; - } + if (b.rowidx.size() != num_nodes + 1) { return false; } for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { auto v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { - return false; - } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { return false; } } for (size_t u = 1; u < num_nodes; u++) { auto v = b.colidx[b.rowidx[u]]; - if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { - return false; - } + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { return false; } for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { - return false; - } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { return false; } } } for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) { - return false; - } + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; } } for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { - return false; - } + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; } } return true; } @@ -189,7 +177,8 @@ void run_test(std::string const& input) row_array_parent_col_id, stream); - std::printf("\n========================================================================================\n"); + std::printf( + "\n========================================================================================\n"); auto [d_column_tree_csr, d_column_tree_properties] = cudf::io::json::experimental::detail::reduce_to_column_tree( gpu_tree, gpu_col_id, gpu_row_offsets, false, row_array_parent_col_id, stream); From 0c63f22e1384781166dfc2a9d46600e74d4f0039 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Wed, 18 Sep 2024 22:08:45 +0000 Subject: [PATCH 31/46] finally tests passing --- cpp/src/io/json/column_tree_construction.cu | 71 ++++++++------ cpp/src/io/json/json_column.cu | 76 +++++++++++++++ cpp/tests/io/json/json_tree_csr.cu | 103 +++++++++++++++++--- 3 files changed, 208 insertions(+), 42 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index d9ff6353ad5..6eede0f7020 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -123,11 +123,14 @@ struct level_ordering { device_span parent_node_ids; __device__ bool operator()(NodeIndexT lhs_node_id, NodeIndexT rhs_node_id) const { + auto lhs_parent_col_id = parent_node_ids[lhs_node_id] == -1 ? -1 : col_ids[parent_node_ids[lhs_node_id]]; + auto rhs_parent_col_id = parent_node_ids[rhs_node_id] == -1 ? -1 : col_ids[parent_node_ids[rhs_node_id]]; + return (node_levels[lhs_node_id] < node_levels[rhs_node_id]) || (node_levels[lhs_node_id] == node_levels[rhs_node_id] && - col_ids[parent_node_ids[lhs_node_id]] < col_ids[parent_node_ids[rhs_node_id]]) || + lhs_parent_col_id < rhs_parent_col_id) || (node_levels[lhs_node_id] == node_levels[rhs_node_id] && - col_ids[parent_node_ids[lhs_node_id]] == col_ids[parent_node_ids[rhs_node_id]] && + lhs_parent_col_id == rhs_parent_col_id && col_ids[lhs_node_id] < col_ids[rhs_node_id]); } }; @@ -147,8 +150,6 @@ struct parent_nodeids_to_colids { * * @param tree Node tree representation of JSON string * @param original_col_ids Column ids of nodes - * @param sorted_col_ids Sorted column ids of nodes - * @param ordered_node_ids Node ids of nodes sorted by column ids * @param row_offsets Row offsets of nodes * @param is_array_of_arrays Whether the tree is an array of arrays * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true @@ -173,16 +174,22 @@ std::tuple reduce_to_column_tree( rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), level_ordered_node_ids.end()); // Reorder nodes and column ids in level-wise fashion - thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), + size_t temp_storage_bytes = 0; + cub::DeviceMergeSort::SortPairs(nullptr, temp_storage_bytes, level_ordered_node_ids.begin(), level_ordered_col_ids.begin(), col_ids.size(), level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}, stream.value()); + rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); + cub::DeviceMergeSort::SortPairs(d_temp_storage.data(), temp_storage_bytes, level_ordered_node_ids.begin(), level_ordered_col_ids.begin(), col_ids.size(), level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}, stream.value()); + /* + thrust::sort_by_key(rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), level_ordered_node_ids.end(), level_ordered_col_ids.begin(), level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}); + */ -#ifdef CSR_DEBUG_PRINT print(level_ordered_node_ids, "h_level_ordered_node_ids", stream); print(col_ids, "h_col_ids", stream); print(level_ordered_col_ids, "h_level_ordered_col_ids", stream); +#ifdef CSR_DEBUG_PRINT #endif // 1. get the number of columns in tree, mapping between node tree col ids and csr col ids, and @@ -210,10 +217,10 @@ std::tuple reduce_to_column_tree( mapped_col_ids_copy.end(), rev_mapped_col_ids.begin()); -#ifdef CSR_DEBUG_PRINT print(mapped_col_ids, "h_mapped_col_ids", stream); print(level_ordered_unique_node_ids, "h_level_ordered_unique_node_ids", stream); print(rev_mapped_col_ids, "h_rev_mapped_col_ids", stream); +#ifdef CSR_DEBUG_PRINT #endif // 2. maximum number of rows per column: computed with reduce_by_key {col_id}, {row_offset}, max. @@ -256,8 +263,8 @@ std::tuple reduce_to_column_tree( // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) // children adjacency -#ifdef CSR_DEBUG_PRINT print(parent_col_ids, "h_parent_col_ids", stream); +#ifdef CSR_DEBUG_PRINT #endif auto num_non_leaf_columns = thrust::unique_count( @@ -317,10 +324,10 @@ std::tuple reduce_to_column_tree( map.begin(), colidx.begin()); -#ifdef CSR_DEBUG_PRINT print(colidx, "h_pre_colidx", stream); - print(max_row_offsets, "h_max_row_offsets", stream); +#ifdef CSR_DEBUG_PRINT #endif + print(max_row_offsets, "h_max_row_offsets", stream); // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. @@ -355,6 +362,9 @@ std::tuple reduce_to_column_tree( rowidx.begin(), rowidx.begin() + 1, stream.value()); + CUDF_CUDA_TRY(cudaMemcpyAsync(max_children_max_row_offsets.data(), max_row_offsets.data(), sizeof(row_offset_t), cudaMemcpyDeviceToDevice, stream.value())); + + print(max_children_max_row_offsets, "h_max_children_max_row_offsets", stream); // Skip the parent of root node thrust::scatter(rmm::exec_policy_nosync(stream), @@ -363,29 +373,42 @@ std::tuple reduce_to_column_tree( rowidx.begin() + 1, colidx.begin()); - rmm::device_uvector list_ancestors(num_columns, stream); + // Vector to store the latest ancestor of LIST type. If no such ancestor is found, + // store the root node of tree. Note that a node cannot be an ancestor of itself + auto list_ancestors = cudf::detail::make_zeroed_device_uvector_async( + static_cast(num_columns), stream, cudf::get_current_device_resource_ref()); + auto root_node = column_categories.element(0, stream) == NC_LIST ? 1 : 0; + if(root_node) list_ancestors.set_element_async(root_node, root_node, stream); thrust::for_each_n(rmm::exec_policy_nosync(stream), - thrust::make_counting_iterator(0), - num_columns, + thrust::make_counting_iterator(root_node + 1), + num_columns - root_node - 1, [rowidx = rowidx.begin(), colidx = colidx.begin(), column_categories = column_categories.begin(), dev_num_levels_ptr, + is_array_of_arrays, + row_array_parent_col_id, + root_node, list_ancestors = list_ancestors.begin()] __device__(NodeIndexT node) { auto num_levels = *dev_num_levels_ptr; list_ancestors[node] = colidx[rowidx[node]]; - for (int level = 0; level <= num_levels && list_ancestors[node] && + for (int level = 0; level <= num_levels && list_ancestors[node] != root_node && column_categories[list_ancestors[node]] != NC_LIST; level++) { list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; } }); + print(list_ancestors, "h_list_ancestors", stream); + + // exclude root node thrust::gather(rmm::exec_policy_nosync(stream), list_ancestors.begin(), list_ancestors.end(), max_children_max_row_offsets.begin(), max_row_offsets.begin()); + + print(max_row_offsets, "h_max_row_offsets", stream); } return std::tuple{ @@ -426,12 +449,6 @@ reduce_to_column_tree(tree_meta_t& tree, auto const num_columns = thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end()); -#ifdef CSR_DEBUG_PRINT - print(original_col_ids, "h_original_col_ids", stream); - print(sorted_col_ids, "h_sorted_col_ids", stream); - print(ordered_node_ids, "h_ordered_node_ids", stream); -#endif - // 2. reduce_by_key {col_id}, {row_offset}, max. rmm::device_uvector unique_col_ids(num_columns, stream); rmm::device_uvector max_row_offsets(num_columns, stream); @@ -460,11 +477,6 @@ reduce_to_column_tree(tree_meta_t& tree, thrust::make_discard_iterator(), unique_node_ids.begin()); -#ifdef CSR_DEBUG_PRINT - print(unique_col_ids, "h_unique_col_ids", stream); - print(unique_node_ids, "h_unique_node_ids", stream); -#endif - thrust::copy_n( rmm::exec_policy(stream), thrust::make_zip_iterator( @@ -494,11 +506,10 @@ reduce_to_column_tree(tree_meta_t& tree, return !(parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST && (!is_array_of_arrays || parent_col_id != row_array_parent_col_id)); - return (parent_col_id != parent_node_sentinel) && - (column_categories[parent_col_id] != NC_LIST) || - (is_array_of_arrays == true && parent_col_id == row_array_parent_col_id); }; + print(max_row_offsets, "h_max_row_offsets", stream); + // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. // create list's children max_row_offsets array. (initialize to zero) @@ -540,6 +551,8 @@ reduce_to_column_tree(tree_meta_t& tree, }); } + print(max_row_offsets, "h_max_row_offsets", stream); + // copy lists' max_row_offsets to children. // all structs should have same size. thrust::transform_if( @@ -567,6 +580,8 @@ reduce_to_column_tree(tree_meta_t& tree, return is_non_list_parent(parent_col_id); }); + print(max_row_offsets, "h_max_row_offsets", stream); + // For Struct and List (to avoid copying entire strings when mixed type as string is enabled) thrust::transform_if( rmm::exec_policy(stream), diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 6c2faae4e11..15fe8aabcce 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -257,6 +257,73 @@ struct json_column_data { bitmask_type* validity; }; +struct h_tree_meta_t { + std::vector node_categories; + std::vector parent_node_ids; + std::vector node_range_begin; + std::vector node_range_end; +}; + +struct h_column_tree { + // position of nnzs + std::vector rowidx; + std::vector colidx; + // node properties + std::vector categories; + std::vector column_ids; +}; + + +bool check_equality(tree_meta_t& d_a, + cudf::device_span d_a_max_row_offsets, + experimental::csr& d_b_csr, + experimental::column_tree_properties& d_b_ctp, + rmm::cuda_stream_view stream) +{ + // convert from tree_meta_t to column_tree_csr + stream.synchronize(); + + h_tree_meta_t a{cudf::detail::make_std_vector_async(d_a.node_categories, stream), + cudf::detail::make_std_vector_async(d_a.parent_node_ids, stream), + cudf::detail::make_std_vector_async(d_a.node_range_begin, stream), + cudf::detail::make_std_vector_async(d_a.node_range_end, stream)}; + + h_column_tree b{cudf::detail::make_std_vector_async(d_b_csr.rowidx, stream), + cudf::detail::make_std_vector_async(d_b_csr.colidx, stream), + cudf::detail::make_std_vector_async(d_b_ctp.categories, stream), + cudf::detail::make_std_vector_async(d_b_ctp.mapped_ids, stream)}; + + auto a_max_row_offsets = cudf::detail::make_std_vector_async(d_a_max_row_offsets, stream); + auto b_max_row_offsets = cudf::detail::make_std_vector_async(d_b_ctp.max_row_offsets, stream); + + stream.synchronize(); + + auto num_nodes = a.parent_node_ids.size(); + if (b.rowidx.size() != num_nodes + 1) { return false; } + + for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { + auto v = b.colidx[pos]; + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) {printf("1\n"); return false; } + } + for (size_t u = 1; u < num_nodes; u++) { + auto v = b.colidx[b.rowidx[u]]; + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) {printf("2\n"); return false; } + + for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { + v = b.colidx[pos]; + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) {printf("3\n"); return false; } + } + } + for (size_t u = 0; u < num_nodes; u++) { + if (a.node_categories[b.column_ids[u]] != b.categories[u]) {printf("4\n"); return false; } + } + + for (size_t u = 0; u < num_nodes; u++) { + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {printf("5\n"); return false; } + } + return true; +} + /** * @brief Constructs `d_json_column` from node tree representation * Newly constructed columns are insert into `root`'s children. @@ -324,6 +391,15 @@ void make_device_json_column(device_span input, is_array_of_arrays, row_array_parent_col_id, stream); + auto [d_column_tree_csr, d_column_tree_properties] = + cudf::io::json::experimental::detail::reduce_to_column_tree( + tree, col_ids, row_offsets, is_array_of_arrays, row_array_parent_col_id, stream); + + auto iseq = check_equality( + d_column_tree, d_max_row_offsets, d_column_tree_csr, d_column_tree_properties, stream); + // assert equality between csr and meta formats + CUDF_EXPECTS(iseq, "OH NO!"); + auto num_columns = d_unique_col_ids.size(); auto unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream); auto column_categories = diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index 1d4b742ccc2..317f6672c64 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -95,28 +95,66 @@ bool check_equality(cuio_json::tree_meta_t& d_a, for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { auto v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { return false; } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) {printf("1\n"); return false; } } - for (size_t u = 1; u < num_nodes; u++) { auto v = b.colidx[b.rowidx[u]]; - if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { return false; } + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) {printf("2\n"); return false; } + for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { return false; } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) {printf("3\n"); return false; } } } for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; } + if (a.node_categories[b.column_ids[u]] != b.categories[u]) {printf("4\n"); return false; } + } + + std::printf("rowidx = \n"); + for (size_t u = 0; u < num_nodes; u++) + std::printf("%d ", b.rowidx[u]); + std::printf("\n"); + std::printf("colidx = \n"); + for (size_t u = 0; u < num_nodes; u++) { + for (int pos = b.rowidx[u]; pos < b.rowidx[u + 1]; pos++) + std::printf("%d ", b.colidx[pos]); } + std::printf("\n"); + std::printf("a.parent_node_ids = \n"); + for (size_t u = 0; u < num_nodes; u++) + std::printf("%d ", a.parent_node_ids[u]); + std::printf("\nb.column_ids = \n"); + for (size_t u = 0; u < num_nodes; u++) + std::printf("%d ", b.column_ids[u]); + std::printf("\n"); + + std::printf("a.node_categories = \n"); + for (size_t u = 0; u < num_nodes; u++) + std::printf("%d ", a.node_categories[b.column_ids[u]]); + std::printf("\nb.categories = \n"); + for (size_t u = 0; u < num_nodes; u++) + std::printf("%d ", b.categories[u]); + std::printf("\n"); + + std::printf("a_max_row_offsets = "); + for (size_t u = 0; u < num_nodes; u++) + std::printf("%d ", a_max_row_offsets[u]); + std::printf("\n"); + std::printf("permuted a_max_row_offsets = "); + for (size_t u = 0; u < num_nodes; u++) + std::printf("%d ", a_max_row_offsets[b.column_ids[u]]); + std::printf("\nb_max_row_offsets = "); + for (size_t u = 0; u < num_nodes; u++) + std::printf("%d ", b_max_row_offsets[u]); + std::printf("\n"); for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; } + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {printf("5\n"); return false; } } return true; } -void run_test(std::string const& input) +void run_test(std::string const& input, bool enable_lines = true) { auto const stream = cudf::get_default_stream(); cudf::string_scalar d_scalar(input, true, stream); @@ -124,20 +162,34 @@ void run_test(std::string const& input) static_cast(d_scalar.size())}; cudf::io::json_reader_options options{}; - options.enable_lines(true); + options.enable_lines(enable_lines); + options.enable_mixed_types_as_string(true); // Parse the JSON and get the token stream auto const [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( - d_input, options, stream, rmm::mr::get_current_device_resource()); + d_input, options, stream, cudf::get_current_device_resource_ref()); // Get the JSON's tree representation auto gpu_tree = cuio_json::detail::get_tree_representation( - tokens_gpu, token_indices_gpu, false, stream, rmm::mr::get_current_device_resource()); + tokens_gpu, token_indices_gpu, options.is_enabled_mixed_types_as_string(), stream, cudf::get_current_device_resource_ref()); + + bool const is_array_of_arrays = [&]() { + std::array h_node_categories = {cuio_json::NC_ERR, cuio_json::NC_ERR}; + auto const size_to_copy = std::min(size_t{2}, gpu_tree.node_categories.size()); + CUDF_CUDA_TRY(cudaMemcpyAsync(h_node_categories.data(), + gpu_tree.node_categories.data(), + sizeof(cuio_json::node_t) * size_to_copy, + cudaMemcpyDefault, + stream.value())); + stream.synchronize(); + if (options.is_enabled_lines()) return h_node_categories[0] == cuio_json::NC_LIST; + return h_node_categories[0] == cuio_json::NC_LIST and h_node_categories[1] == cuio_json::NC_LIST; + }(); auto tup = cuio_json::detail::records_orient_tree_traversal(d_input, gpu_tree, - false, + is_array_of_arrays, options.is_enabled_lines(), stream, rmm::mr::get_current_device_resource()); @@ -156,7 +208,7 @@ void run_test(std::string const& input) rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end(), node_ids.begin()); cudf::size_type const row_array_parent_col_id = [&]() { - cudf::size_type value = cudf::io::json::parent_node_sentinel; + cudf::size_type value = cuio_json::parent_node_sentinel; auto const list_node_index = options.is_enabled_lines() ? 0 : 1; CUDF_CUDA_TRY(cudaMemcpyAsync(&value, gpu_col_id.data() + list_node_index, @@ -173,7 +225,7 @@ void run_test(std::string const& input) sorted_col_ids, node_ids, gpu_row_offsets, - false, + is_array_of_arrays, row_array_parent_col_id, stream); @@ -181,7 +233,7 @@ void run_test(std::string const& input) "\n========================================================================================\n"); auto [d_column_tree_csr, d_column_tree_properties] = cudf::io::json::experimental::detail::reduce_to_column_tree( - gpu_tree, gpu_col_id, gpu_row_offsets, false, row_array_parent_col_id, stream); + gpu_tree, gpu_col_id, gpu_row_offsets, is_array_of_arrays, row_array_parent_col_id, stream); auto iseq = check_equality( d_column_tree, d_max_row_offsets, d_column_tree_csr, d_column_tree_properties, stream); @@ -262,3 +314,26 @@ TEST_F(JsonColumnTreeTests, SimpleLines6) )"; run_test(json_stringl); } + +TEST_F(JsonColumnTreeTests, JSON1) +{ + std::string json_string = R"([ + {"a": 1, "b": {"0": "abc", "1": [-1.]}, "c": true}, + {"a": 1, "b": {"0": "abc" }, "c": false}, + {"a": 1, "b": {}}, + {"a": 1, "c": null} + ])"; + run_test(json_string, false); +} + +TEST_F(JsonColumnTreeTests, JSON2) +{ + std::string json_string = + R"([ + {}, + { "a": { "y" : 6, "z": [] }}, + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} + ])"; // Prepare input & output buffers + run_test(json_string, false); +} + From 2d4861e65a383a90260be4cc1d8b601298af7496 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Thu, 19 Sep 2024 22:43:40 +0000 Subject: [PATCH 32/46] fixed all bugs hopefully --- cpp/src/io/json/column_tree_construction.cu | 123 +++++++++++++--- cpp/src/io/json/json_column.cu | 58 +++++--- cpp/src/io/json/nested_json.hpp | 2 +- cpp/tests/io/json/json_tree_csr.cu | 152 +++++++++++--------- 4 files changed, 233 insertions(+), 102 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 6eede0f7020..339d0be0989 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -103,6 +103,11 @@ void max_row_offsets_col_categories(InputIterator1 keys_first, } // debug printing +#ifndef CSR_DEBUG_PRINT +//#define CSR_DEBUG_PRINT +#endif + +#ifdef CSR_DEBUG_PRINT template void print(device_span d_vec, std::string name, rmm::cuda_stream_view stream) { @@ -114,6 +119,7 @@ void print(device_span d_vec, std::string name, rmm::cuda_stream_view s } std::cout << std::endl; } +#endif namespace experimental::detail { @@ -162,11 +168,23 @@ std::tuple reduce_to_column_tree( device_span col_ids, device_span row_offsets, bool is_array_of_arrays, - NodeIndexT const row_array_parent_col_id, + NodeIndexT row_array_parent_col_id, rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); + if(col_ids.empty()) { + rmm::device_uvector empty_rowidx(0, stream); + rmm::device_uvector empty_colidx(0, stream); + rmm::device_uvector empty_column_categories(0, stream); + rmm::device_uvector empty_max_row_offsets(0, stream); + rmm::device_uvector empty_mapped_col_ids(0, stream); + return std::tuple{ + csr{std::move(empty_rowidx), std::move(empty_colidx)}, + column_tree_properties{ + std::move(empty_column_categories), std::move(empty_max_row_offsets), std::move(empty_mapped_col_ids)}}; + } + auto level_ordered_col_ids = cudf::detail::make_device_uvector_async( col_ids, stream, cudf::get_current_device_resource_ref()); rmm::device_uvector level_ordered_node_ids(col_ids.size(), stream); @@ -186,10 +204,10 @@ std::tuple reduce_to_column_tree( level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}); */ +#ifdef CSR_DEBUG_PRINT print(level_ordered_node_ids, "h_level_ordered_node_ids", stream); print(col_ids, "h_col_ids", stream); print(level_ordered_col_ids, "h_level_ordered_col_ids", stream); -#ifdef CSR_DEBUG_PRINT #endif // 1. get the number of columns in tree, mapping between node tree col ids and csr col ids, and @@ -217,10 +235,10 @@ std::tuple reduce_to_column_tree( mapped_col_ids_copy.end(), rev_mapped_col_ids.begin()); +#ifdef CSR_DEBUG_PRINT print(mapped_col_ids, "h_mapped_col_ids", stream); print(level_ordered_unique_node_ids, "h_level_ordered_unique_node_ids", stream); print(rev_mapped_col_ids, "h_rev_mapped_col_ids", stream); -#ifdef CSR_DEBUG_PRINT #endif // 2. maximum number of rows per column: computed with reduce_by_key {col_id}, {row_offset}, max. @@ -263,8 +281,8 @@ std::tuple reduce_to_column_tree( // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) // children adjacency - print(parent_col_ids, "h_parent_col_ids", stream); #ifdef CSR_DEBUG_PRINT + print(parent_col_ids, "h_parent_col_ids", stream); #endif auto num_non_leaf_columns = thrust::unique_count( @@ -285,17 +303,28 @@ std::tuple reduce_to_column_tree( non_leaf_nodes.begin(), rowidx.begin() + 1); - thrust::transform_inclusive_scan( - rmm::exec_policy_nosync(stream), - thrust::make_zip_iterator(thrust::make_counting_iterator(1), rowidx.begin() + 1), - thrust::make_zip_iterator(thrust::make_counting_iterator(1) + num_columns, rowidx.end()), - rowidx.begin() + 1, - cuda::proclaim_return_type([] __device__(auto a) { - auto n = thrust::get<0>(a); - auto idx = thrust::get<1>(a); - return n == 1 ? idx : idx + 1; - }), - thrust::plus{}); + if(num_columns > 1) { + thrust::transform_inclusive_scan( + rmm::exec_policy_nosync(stream), + thrust::make_zip_iterator(thrust::make_counting_iterator(1), rowidx.begin() + 1), + thrust::make_zip_iterator(thrust::make_counting_iterator(1) + num_columns, rowidx.end()), + rowidx.begin() + 1, + cuda::proclaim_return_type([] __device__(auto a) { + auto n = thrust::get<0>(a); + auto idx = thrust::get<1>(a); + return n == 1 ? idx : idx + 1; + return idx + 1; + }), + thrust::plus{}); + } + else { + auto single_node = 1; + rowidx.set_element_async(1, single_node, stream); + } + +#ifdef CSR_DEBUG_PRINT + print(rowidx, "h_rowidx", stream); +#endif rmm::device_uvector colidx((num_columns - 1) * 2, stream); thrust::fill(rmm::exec_policy(stream), colidx.begin(), colidx.end(), -1); @@ -324,15 +353,16 @@ std::tuple reduce_to_column_tree( map.begin(), colidx.begin()); - print(colidx, "h_pre_colidx", stream); #ifdef CSR_DEBUG_PRINT -#endif + print(colidx, "h_pre_colidx", stream); print(max_row_offsets, "h_max_row_offsets", stream); +#endif // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. // create list's children max_row_offsets array // gather the max_row_offsets from children row offset array. + if(num_columns > 1) { auto max_row_offsets_it = thrust::make_transform_iterator( thrust::make_counting_iterator(0), @@ -364,7 +394,26 @@ std::tuple reduce_to_column_tree( stream.value()); CUDF_CUDA_TRY(cudaMemcpyAsync(max_children_max_row_offsets.data(), max_row_offsets.data(), sizeof(row_offset_t), cudaMemcpyDeviceToDevice, stream.value())); +#ifdef CSR_DEBUG_PRINT + print(max_children_max_row_offsets, "h_max_children_max_row_offsets", stream); +#endif + + thrust::transform_if(rmm::exec_policy_nosync(stream), + thrust::make_zip_iterator(thrust::make_counting_iterator(0), max_children_max_row_offsets.begin()), + thrust::make_zip_iterator(thrust::make_counting_iterator(0) + num_columns, max_children_max_row_offsets.end()), + max_children_max_row_offsets.begin(), + [max_row_offsets = max_row_offsets.begin()] __device__ (auto tup) { + auto n = thrust::get<0>(tup); + return max_row_offsets[n]; + }, + [] __device__ (auto tup) { + auto e = thrust::get<1>(tup); + return e == -1; + }); + +#ifdef CSR_DEBUG_PRINT print(max_children_max_row_offsets, "h_max_children_max_row_offsets", stream); +#endif // Skip the parent of root node thrust::scatter(rmm::exec_policy_nosync(stream), @@ -373,12 +422,33 @@ std::tuple reduce_to_column_tree( rowidx.begin() + 1, colidx.begin()); +#ifdef CSR_DEBUG_PRINT + print(colidx, "h_colidx", stream); +#endif + + // condition is true if parent is not a list, or sentinel/root + // Special case to return true if parent is a list and is_array_of_arrays is true + auto is_non_list_parent = [column_categories = column_categories.begin(), + is_array_of_arrays, + row_array_parent_col_id] __device__(auto parent_col_id) -> bool { + return (parent_col_id != parent_node_sentinel && + column_categories[parent_col_id] != NC_LIST || + (is_array_of_arrays && parent_col_id == row_array_parent_col_id)); + }; + // Vector to store the latest ancestor of LIST type. If no such ancestor is found, // store the root node of tree. Note that a node cannot be an ancestor of itself auto list_ancestors = cudf::detail::make_zeroed_device_uvector_async( static_cast(num_columns), stream, cudf::get_current_device_resource_ref()); - auto root_node = column_categories.element(0, stream) == NC_LIST ? 1 : 0; + row_array_parent_col_id = rev_mapped_col_ids.element(row_array_parent_col_id, stream); + auto root_node = (column_categories.element(0, stream) == NC_LIST && !is_array_of_arrays) || (is_array_of_arrays && row_array_parent_col_id) ? 1 : 0; + //root_node = (is_array_of_arrays && row_array_parent_col_id && num_columns == 2) ? 0 : root_node; if(root_node) list_ancestors.set_element_async(root_node, root_node, stream); + /* + std::cout << "root_node = " << root_node << std::endl; + std::cout << "row_array_parent_col_id = " << row_array_parent_col_id << std::endl; + std::cout << "is_array_of_arrays = " << is_array_of_arrays << std::endl; + */ thrust::for_each_n(rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(root_node + 1), num_columns - root_node - 1, @@ -393,13 +463,15 @@ std::tuple reduce_to_column_tree( auto num_levels = *dev_num_levels_ptr; list_ancestors[node] = colidx[rowidx[node]]; for (int level = 0; level <= num_levels && list_ancestors[node] != root_node && - column_categories[list_ancestors[node]] != NC_LIST; + column_categories[list_ancestors[node]] != NC_LIST; level++) { list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; } }); +#ifdef CSR_DEBUG_PRINT print(list_ancestors, "h_list_ancestors", stream); +#endif // exclude root node thrust::gather(rmm::exec_policy_nosync(stream), @@ -408,7 +480,9 @@ std::tuple reduce_to_column_tree( max_children_max_row_offsets.begin(), max_row_offsets.begin()); +#ifdef CSR_DEBUG_PRINT print(max_row_offsets, "h_max_row_offsets", stream); +#endif } return std::tuple{ @@ -498,6 +572,11 @@ reduce_to_column_tree(tree_meta_t& tree, : col_ids[parent_node_id]; }); +#ifdef CSR_DEBUG_PRINT + print(unique_col_ids, "h_unique_col_ids", stream); + print(parent_col_ids, "h_parent_col_ids", stream); +#endif + // condition is true if parent is not a list, or sentinel/root // Special case to return true if parent is a list and is_array_of_arrays is true auto is_non_list_parent = [column_categories = column_categories.begin(), @@ -508,7 +587,9 @@ reduce_to_column_tree(tree_meta_t& tree, (!is_array_of_arrays || parent_col_id != row_array_parent_col_id)); }; +#ifdef CSR_DEBUG_PRINT print(max_row_offsets, "h_max_row_offsets", stream); +#endif // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. @@ -551,7 +632,9 @@ reduce_to_column_tree(tree_meta_t& tree, }); } +#ifdef CSR_DEBUG_PRINT print(max_row_offsets, "h_max_row_offsets", stream); +#endif // copy lists' max_row_offsets to children. // all structs should have same size. @@ -580,7 +663,9 @@ reduce_to_column_tree(tree_meta_t& tree, return is_non_list_parent(parent_col_id); }); +#ifdef CSR_DEBUG_PRINT print(max_row_offsets, "h_max_row_offsets", stream); +#endif // For Struct and List (to avoid copying entire strings when mixed type as string is enabled) thrust::transform_if( diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 15fe8aabcce..f743e65e493 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -56,6 +56,10 @@ namespace cudf::io::json::detail { // DEBUG prints +#ifndef CSR_DEBUG_EQ +#define CSR_DEBUG_EQ +#endif + auto to_cat = [](auto v) -> std::string { switch (v) { case NC_STRUCT: return " S"; @@ -273,7 +277,7 @@ struct h_column_tree { std::vector column_ids; }; - +#ifdef CSR_DEBUG_EQ bool check_equality(tree_meta_t& d_a, cudf::device_span d_a_max_row_offsets, experimental::csr& d_b_csr, @@ -299,30 +303,46 @@ bool check_equality(tree_meta_t& d_a, stream.synchronize(); auto num_nodes = a.parent_node_ids.size(); - if (b.rowidx.size() != num_nodes + 1) { return false; } + if(num_nodes > 1) { + if (b.rowidx.size() != num_nodes + 1) { return false; } - for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { - auto v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) {printf("1\n"); return false; } - } - for (size_t u = 1; u < num_nodes; u++) { - auto v = b.colidx[b.rowidx[u]]; - if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) {printf("2\n"); return false; } - - for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { - v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) {printf("3\n"); return false; } + for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { + auto v = b.colidx[pos]; + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) {return false; } + } + for (size_t u = 1; u < num_nodes; u++) { + auto v = b.colidx[b.rowidx[u]]; + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) {return false; } + + for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { + v = b.colidx[pos]; + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) {return false; } + } + } + for (size_t u = 0; u < num_nodes; u++) { + if (a.node_categories[b.column_ids[u]] != b.categories[u]) {return false; } + } + + for (size_t u = 0; u < num_nodes; u++) { + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {return false; } } } - for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) {printf("4\n"); return false; } - } + else if (num_nodes == 1) { + if (b.rowidx.size() != num_nodes + 1) { return false; } + + if(b.rowidx[0] != 0 || b.rowidx[1] != 1) return false; + if(!b.colidx.empty()) return false; + for (size_t u = 0; u < num_nodes; u++) { + if (a.node_categories[b.column_ids[u]] != b.categories[u]) {printf("4\n"); return false; } + } - for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {printf("5\n"); return false; } + for (size_t u = 0; u < num_nodes; u++) { + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {printf("5\n"); return false; } + } } return true; } +#endif /** * @brief Constructs `d_json_column` from node tree representation @@ -391,6 +411,7 @@ void make_device_json_column(device_span input, is_array_of_arrays, row_array_parent_col_id, stream); +#ifdef CSR_DEBUG_EQ auto [d_column_tree_csr, d_column_tree_properties] = cudf::io::json::experimental::detail::reduce_to_column_tree( tree, col_ids, row_offsets, is_array_of_arrays, row_array_parent_col_id, stream); @@ -399,6 +420,7 @@ void make_device_json_column(device_span input, d_column_tree, d_max_row_offsets, d_column_tree_csr, d_column_tree_properties, stream); // assert equality between csr and meta formats CUDF_EXPECTS(iseq, "OH NO!"); +#endif auto num_columns = d_unique_col_ids.size(); auto unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream); diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 54eaa8a7c27..0c331915a5c 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -246,7 +246,7 @@ std::tuple reduce_to_column_tree( device_span original_col_ids, device_span row_offsets, bool is_array_of_arrays, - NodeIndexT const row_array_parent_col_id, + NodeIndexT row_array_parent_col_id, rmm::cuda_stream_view stream); } // namespace detail diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index 317f6672c64..61db02b6b65 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -57,7 +57,7 @@ struct h_column_tree { // debug printing template -void print(cudf::host_span vec, std::string name, rmm::cuda_stream_view stream) +void print(cudf::host_span vec, std::string name) { std::cout << name << " = "; for (auto e : vec) { @@ -91,65 +91,41 @@ bool check_equality(cuio_json::tree_meta_t& d_a, stream.synchronize(); auto num_nodes = a.parent_node_ids.size(); - if (b.rowidx.size() != num_nodes + 1) { return false; } + if(num_nodes > 1) { + if (b.rowidx.size() != num_nodes + 1) { return false; } - for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { - auto v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) {printf("1\n"); return false; } - } - for (size_t u = 1; u < num_nodes; u++) { - auto v = b.colidx[b.rowidx[u]]; - if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) {printf("2\n"); return false; } - - for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { - v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) {printf("3\n"); return false; } + for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { + auto v = b.colidx[pos]; + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) {printf("1\n"); return false; } + } + for (size_t u = 1; u < num_nodes; u++) { + auto v = b.colidx[b.rowidx[u]]; + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) {printf("2\n"); return false; } + + for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { + v = b.colidx[pos]; + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) {printf("3\n"); return false; } + } + } + for (size_t u = 0; u < num_nodes; u++) { + if (a.node_categories[b.column_ids[u]] != b.categories[u]) {printf("4\n"); return false; } + } + for (size_t u = 0; u < num_nodes; u++) { + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {printf("5\n"); return false; } } } - for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) {printf("4\n"); return false; } - } + else if (num_nodes == 1) { + if (b.rowidx.size() != num_nodes + 1) { return false; } - std::printf("rowidx = \n"); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", b.rowidx[u]); - std::printf("\n"); - std::printf("colidx = \n"); - for (size_t u = 0; u < num_nodes; u++) { - for (int pos = b.rowidx[u]; pos < b.rowidx[u + 1]; pos++) - std::printf("%d ", b.colidx[pos]); - } - std::printf("\n"); - std::printf("a.parent_node_ids = \n"); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", a.parent_node_ids[u]); - std::printf("\nb.column_ids = \n"); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", b.column_ids[u]); - std::printf("\n"); - - std::printf("a.node_categories = \n"); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", a.node_categories[b.column_ids[u]]); - std::printf("\nb.categories = \n"); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", b.categories[u]); - std::printf("\n"); - - std::printf("a_max_row_offsets = "); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", a_max_row_offsets[u]); - std::printf("\n"); - std::printf("permuted a_max_row_offsets = "); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", a_max_row_offsets[b.column_ids[u]]); - std::printf("\nb_max_row_offsets = "); - for (size_t u = 0; u < num_nodes; u++) - std::printf("%d ", b_max_row_offsets[u]); - std::printf("\n"); - - for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {printf("5\n"); return false; } + if(b.rowidx[0] != 0 || b.rowidx[1] != 1) return false; + if(!b.colidx.empty()) return false; + for (size_t u = 0; u < num_nodes; u++) { + if (a.node_categories[b.column_ids[u]] != b.categories[u]) {printf("4\n"); return false; } + } + + for (size_t u = 0; u < num_nodes; u++) { + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {printf("5\n"); return false; } + } } return true; } @@ -229,8 +205,6 @@ void run_test(std::string const& input, bool enable_lines = true) row_array_parent_col_id, stream); - std::printf( - "\n========================================================================================\n"); auto [d_column_tree_csr, d_column_tree_properties] = cudf::io::json::experimental::detail::reduce_to_column_tree( gpu_tree, gpu_col_id, gpu_row_offsets, is_array_of_arrays, row_array_parent_col_id, stream); @@ -243,7 +217,7 @@ void run_test(std::string const& input, bool enable_lines = true) struct JsonColumnTreeTests : public cudf::test::BaseFixture {}; -TEST_F(JsonColumnTreeTests, SimpleLines1) +TEST_F(JsonColumnTreeTests, JSONL1) { std::string const input = R"( {} @@ -252,7 +226,7 @@ TEST_F(JsonColumnTreeTests, SimpleLines1) run_test(input); } -TEST_F(JsonColumnTreeTests, SimpleLines2) +TEST_F(JsonColumnTreeTests, JSONL2) { std::string const input = R"( {} @@ -268,7 +242,7 @@ TEST_F(JsonColumnTreeTests, SimpleLines2) run_test(input); } -TEST_F(JsonColumnTreeTests, SimpleLines3) +TEST_F(JsonColumnTreeTests, JSONL3) { std::string const input = R"( { "Root": { "Key": [ { "EE": "A" } ] } } @@ -278,7 +252,7 @@ TEST_F(JsonColumnTreeTests, SimpleLines3) run_test(input); } -TEST_F(JsonColumnTreeTests, SimpleLines4) +TEST_F(JsonColumnTreeTests, JSONL4) { std::string json_stringl = R"( {"a": 1, "b": {"0": "abc", "1": [-1.]}, "c": true} @@ -289,7 +263,7 @@ TEST_F(JsonColumnTreeTests, SimpleLines4) run_test(json_stringl); } -TEST_F(JsonColumnTreeTests, SimpleLines5) +TEST_F(JsonColumnTreeTests, JSONL5) { std::string json_stringl = R"( { "foo1": [1,2,3], "bar": 123 } @@ -302,7 +276,7 @@ TEST_F(JsonColumnTreeTests, SimpleLines5) run_test(json_stringl); } -TEST_F(JsonColumnTreeTests, SimpleLines6) +TEST_F(JsonColumnTreeTests, JSONL6) { std::string json_stringl = R"( { "foo1": [1,2,3], "bar": 123 } @@ -337,3 +311,53 @@ TEST_F(JsonColumnTreeTests, JSON2) run_test(json_string, false); } +TEST_F(JsonColumnTreeTests, JSONLA1) +{ + std::string json_string = + R"([123, [1,2,3]] + [456, null, { "a": 1 }])"; + run_test(json_string); +} + +TEST_F(JsonColumnTreeTests, JSONA1) +{ + std::string json_string = R"([[[1,2,3], null, 123], + [null, { "a": 1 }, 456 ]])"; + run_test(json_string, false); +} + +TEST_F(JsonColumnTreeTests, CornerCase1) +{ + std::string json_string = R"([])"; + run_test(json_string, false); +} + +TEST_F(JsonColumnTreeTests, CornerCase2) +{ + std::string json_string = R"([123])"; + run_test(json_string, true); +} + +TEST_F(JsonColumnTreeTests, CornerCase3) +{ + std::string json_string = R"([[[]]])"; + run_test(json_string, false); +} + +TEST_F(JsonColumnTreeTests, CornerCase4) +{ + std::string json_string = R"([[], [], []])"; + run_test(json_string, false); +} + +TEST_F(JsonColumnTreeTests, CornerCase5) +{ + std::string json_string = R"([[1, 2, 3], [4, 5, null], []])"; + run_test(json_string, true); +} + +TEST_F(JsonColumnTreeTests, CornerCase6) +{ + std::string json_string = R"([[]])"; + run_test(json_string, true); +} From 7759a911c421abda1734013e6ffcfaa0d9cb3fe2 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Thu, 19 Sep 2024 22:45:49 +0000 Subject: [PATCH 33/46] formatting --- cpp/src/io/json/column_tree_construction.cu | 101 ++++++++++++-------- cpp/src/io/json/json_column.cu | 31 +++--- cpp/tests/io/json/json_tree_csr.cu | 63 ++++++++---- 3 files changed, 124 insertions(+), 71 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 339d0be0989..bdce5bac44f 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -104,7 +104,7 @@ void max_row_offsets_col_categories(InputIterator1 keys_first, // debug printing #ifndef CSR_DEBUG_PRINT -//#define CSR_DEBUG_PRINT +// #define CSR_DEBUG_PRINT #endif #ifdef CSR_DEBUG_PRINT @@ -129,15 +129,16 @@ struct level_ordering { device_span parent_node_ids; __device__ bool operator()(NodeIndexT lhs_node_id, NodeIndexT rhs_node_id) const { - auto lhs_parent_col_id = parent_node_ids[lhs_node_id] == -1 ? -1 : col_ids[parent_node_ids[lhs_node_id]]; - auto rhs_parent_col_id = parent_node_ids[rhs_node_id] == -1 ? -1 : col_ids[parent_node_ids[rhs_node_id]]; + auto lhs_parent_col_id = + parent_node_ids[lhs_node_id] == -1 ? -1 : col_ids[parent_node_ids[lhs_node_id]]; + auto rhs_parent_col_id = + parent_node_ids[rhs_node_id] == -1 ? -1 : col_ids[parent_node_ids[rhs_node_id]]; return (node_levels[lhs_node_id] < node_levels[rhs_node_id]) || (node_levels[lhs_node_id] == node_levels[rhs_node_id] && lhs_parent_col_id < rhs_parent_col_id) || (node_levels[lhs_node_id] == node_levels[rhs_node_id] && - lhs_parent_col_id == rhs_parent_col_id && - col_ids[lhs_node_id] < col_ids[rhs_node_id]); + lhs_parent_col_id == rhs_parent_col_id && col_ids[lhs_node_id] < col_ids[rhs_node_id]); } }; @@ -173,16 +174,16 @@ std::tuple reduce_to_column_tree( { CUDF_FUNC_RANGE(); - if(col_ids.empty()) { + if (col_ids.empty()) { rmm::device_uvector empty_rowidx(0, stream); rmm::device_uvector empty_colidx(0, stream); rmm::device_uvector empty_column_categories(0, stream); rmm::device_uvector empty_max_row_offsets(0, stream); rmm::device_uvector empty_mapped_col_ids(0, stream); - return std::tuple{ - csr{std::move(empty_rowidx), std::move(empty_colidx)}, - column_tree_properties{ - std::move(empty_column_categories), std::move(empty_max_row_offsets), std::move(empty_mapped_col_ids)}}; + return std::tuple{csr{std::move(empty_rowidx), std::move(empty_colidx)}, + column_tree_properties{std::move(empty_column_categories), + std::move(empty_max_row_offsets), + std::move(empty_mapped_col_ids)}}; } auto level_ordered_col_ids = cudf::detail::make_device_uvector_async( @@ -193,9 +194,21 @@ std::tuple reduce_to_column_tree( // Reorder nodes and column ids in level-wise fashion size_t temp_storage_bytes = 0; - cub::DeviceMergeSort::SortPairs(nullptr, temp_storage_bytes, level_ordered_node_ids.begin(), level_ordered_col_ids.begin(), col_ids.size(), level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}, stream.value()); + cub::DeviceMergeSort::SortPairs(nullptr, + temp_storage_bytes, + level_ordered_node_ids.begin(), + level_ordered_col_ids.begin(), + col_ids.size(), + level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}, + stream.value()); rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); - cub::DeviceMergeSort::SortPairs(d_temp_storage.data(), temp_storage_bytes, level_ordered_node_ids.begin(), level_ordered_col_ids.begin(), col_ids.size(), level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}, stream.value()); + cub::DeviceMergeSort::SortPairs(d_temp_storage.data(), + temp_storage_bytes, + level_ordered_node_ids.begin(), + level_ordered_col_ids.begin(), + col_ids.size(), + level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}, + stream.value()); /* thrust::sort_by_key(rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), @@ -303,7 +316,7 @@ std::tuple reduce_to_column_tree( non_leaf_nodes.begin(), rowidx.begin() + 1); - if(num_columns > 1) { + if (num_columns > 1) { thrust::transform_inclusive_scan( rmm::exec_policy_nosync(stream), thrust::make_zip_iterator(thrust::make_counting_iterator(1), rowidx.begin() + 1), @@ -312,12 +325,11 @@ std::tuple reduce_to_column_tree( cuda::proclaim_return_type([] __device__(auto a) { auto n = thrust::get<0>(a); auto idx = thrust::get<1>(a); - return n == 1 ? idx : idx + 1; + return n == 1 ? idx : idx + 1; return idx + 1; }), thrust::plus{}); - } - else { + } else { auto single_node = 1; rowidx.set_element_async(1, single_node, stream); } @@ -362,8 +374,7 @@ std::tuple reduce_to_column_tree( // so all immediate children of list column should have same max_row_offsets. // create list's children max_row_offsets array // gather the max_row_offsets from children row offset array. - if(num_columns > 1) - { + if (num_columns > 1) { auto max_row_offsets_it = thrust::make_transform_iterator( thrust::make_counting_iterator(0), cuda::proclaim_return_type( @@ -392,24 +403,31 @@ std::tuple reduce_to_column_tree( rowidx.begin(), rowidx.begin() + 1, stream.value()); - CUDF_CUDA_TRY(cudaMemcpyAsync(max_children_max_row_offsets.data(), max_row_offsets.data(), sizeof(row_offset_t), cudaMemcpyDeviceToDevice, stream.value())); + CUDF_CUDA_TRY(cudaMemcpyAsync(max_children_max_row_offsets.data(), + max_row_offsets.data(), + sizeof(row_offset_t), + cudaMemcpyDeviceToDevice, + stream.value())); #ifdef CSR_DEBUG_PRINT print(max_children_max_row_offsets, "h_max_children_max_row_offsets", stream); #endif - thrust::transform_if(rmm::exec_policy_nosync(stream), - thrust::make_zip_iterator(thrust::make_counting_iterator(0), max_children_max_row_offsets.begin()), - thrust::make_zip_iterator(thrust::make_counting_iterator(0) + num_columns, max_children_max_row_offsets.end()), - max_children_max_row_offsets.begin(), - [max_row_offsets = max_row_offsets.begin()] __device__ (auto tup) { - auto n = thrust::get<0>(tup); - return max_row_offsets[n]; - }, - [] __device__ (auto tup) { - auto e = thrust::get<1>(tup); - return e == -1; - }); + thrust::transform_if( + rmm::exec_policy_nosync(stream), + thrust::make_zip_iterator(thrust::make_counting_iterator(0), + max_children_max_row_offsets.begin()), + thrust::make_zip_iterator(thrust::make_counting_iterator(0) + num_columns, + max_children_max_row_offsets.end()), + max_children_max_row_offsets.begin(), + [max_row_offsets = max_row_offsets.begin()] __device__(auto tup) { + auto n = thrust::get<0>(tup); + return max_row_offsets[n]; + }, + [] __device__(auto tup) { + auto e = thrust::get<1>(tup); + return e == -1; + }); #ifdef CSR_DEBUG_PRINT print(max_children_max_row_offsets, "h_max_children_max_row_offsets", stream); @@ -432,8 +450,8 @@ std::tuple reduce_to_column_tree( is_array_of_arrays, row_array_parent_col_id] __device__(auto parent_col_id) -> bool { return (parent_col_id != parent_node_sentinel && - column_categories[parent_col_id] != NC_LIST || - (is_array_of_arrays && parent_col_id == row_array_parent_col_id)); + column_categories[parent_col_id] != NC_LIST || + (is_array_of_arrays && parent_col_id == row_array_parent_col_id)); }; // Vector to store the latest ancestor of LIST type. If no such ancestor is found, @@ -441,16 +459,20 @@ std::tuple reduce_to_column_tree( auto list_ancestors = cudf::detail::make_zeroed_device_uvector_async( static_cast(num_columns), stream, cudf::get_current_device_resource_ref()); row_array_parent_col_id = rev_mapped_col_ids.element(row_array_parent_col_id, stream); - auto root_node = (column_categories.element(0, stream) == NC_LIST && !is_array_of_arrays) || (is_array_of_arrays && row_array_parent_col_id) ? 1 : 0; - //root_node = (is_array_of_arrays && row_array_parent_col_id && num_columns == 2) ? 0 : root_node; - if(root_node) list_ancestors.set_element_async(root_node, root_node, stream); + auto root_node = (column_categories.element(0, stream) == NC_LIST && !is_array_of_arrays) || + (is_array_of_arrays && row_array_parent_col_id) + ? 1 + : 0; + // root_node = (is_array_of_arrays && row_array_parent_col_id && num_columns == 2) ? 0 : + // root_node; + if (root_node) list_ancestors.set_element_async(root_node, root_node, stream); /* std::cout << "root_node = " << root_node << std::endl; std::cout << "row_array_parent_col_id = " << row_array_parent_col_id << std::endl; std::cout << "is_array_of_arrays = " << is_array_of_arrays << std::endl; */ thrust::for_each_n(rmm::exec_policy_nosync(stream), - thrust::make_counting_iterator(root_node + 1), + thrust::make_counting_iterator(root_node + 1), num_columns - root_node - 1, [rowidx = rowidx.begin(), colidx = colidx.begin(), @@ -462,8 +484,9 @@ std::tuple reduce_to_column_tree( list_ancestors = list_ancestors.begin()] __device__(NodeIndexT node) { auto num_levels = *dev_num_levels_ptr; list_ancestors[node] = colidx[rowidx[node]]; - for (int level = 0; level <= num_levels && list_ancestors[node] != root_node && - column_categories[list_ancestors[node]] != NC_LIST; + for (int level = 0; + level <= num_levels && list_ancestors[node] != root_node && + column_categories[list_ancestors[node]] != NC_LIST; level++) { list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; } diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index f743e65e493..a80555383a4 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -303,41 +303,46 @@ bool check_equality(tree_meta_t& d_a, stream.synchronize(); auto num_nodes = a.parent_node_ids.size(); - if(num_nodes > 1) { + if (num_nodes > 1) { if (b.rowidx.size() != num_nodes + 1) { return false; } for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { auto v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) {return false; } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { return false; } } for (size_t u = 1; u < num_nodes; u++) { auto v = b.colidx[b.rowidx[u]]; - if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) {return false; } - + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { return false; } + for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) {return false; } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { return false; } } } for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) {return false; } + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; } } for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {return false; } + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; } } - } - else if (num_nodes == 1) { + } else if (num_nodes == 1) { if (b.rowidx.size() != num_nodes + 1) { return false; } - if(b.rowidx[0] != 0 || b.rowidx[1] != 1) return false; - if(!b.colidx.empty()) return false; + if (b.rowidx[0] != 0 || b.rowidx[1] != 1) return false; + if (!b.colidx.empty()) return false; for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) {printf("4\n"); return false; } + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { + printf("4\n"); + return false; + } } for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {printf("5\n"); return false; } + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { + printf("5\n"); + return false; + } } } return true; diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index 61db02b6b65..76e509a13d1 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -91,40 +91,60 @@ bool check_equality(cuio_json::tree_meta_t& d_a, stream.synchronize(); auto num_nodes = a.parent_node_ids.size(); - if(num_nodes > 1) { + if (num_nodes > 1) { if (b.rowidx.size() != num_nodes + 1) { return false; } for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { auto v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) {printf("1\n"); return false; } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { + printf("1\n"); + return false; + } } for (size_t u = 1; u < num_nodes; u++) { auto v = b.colidx[b.rowidx[u]]; - if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) {printf("2\n"); return false; } - + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { + printf("2\n"); + return false; + } + for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) {printf("3\n"); return false; } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { + printf("3\n"); + return false; + } } } for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) {printf("4\n"); return false; } + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { + printf("4\n"); + return false; + } } for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {printf("5\n"); return false; } + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { + printf("5\n"); + return false; + } } - } - else if (num_nodes == 1) { + } else if (num_nodes == 1) { if (b.rowidx.size() != num_nodes + 1) { return false; } - if(b.rowidx[0] != 0 || b.rowidx[1] != 1) return false; - if(!b.colidx.empty()) return false; + if (b.rowidx[0] != 0 || b.rowidx[1] != 1) return false; + if (!b.colidx.empty()) return false; for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) {printf("4\n"); return false; } + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { + printf("4\n"); + return false; + } } for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) {printf("5\n"); return false; } + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { + printf("5\n"); + return false; + } } } return true; @@ -146,12 +166,16 @@ void run_test(std::string const& input, bool enable_lines = true) d_input, options, stream, cudf::get_current_device_resource_ref()); // Get the JSON's tree representation - auto gpu_tree = cuio_json::detail::get_tree_representation( - tokens_gpu, token_indices_gpu, options.is_enabled_mixed_types_as_string(), stream, cudf::get_current_device_resource_ref()); + auto gpu_tree = + cuio_json::detail::get_tree_representation(tokens_gpu, + token_indices_gpu, + options.is_enabled_mixed_types_as_string(), + stream, + cudf::get_current_device_resource_ref()); bool const is_array_of_arrays = [&]() { std::array h_node_categories = {cuio_json::NC_ERR, cuio_json::NC_ERR}; - auto const size_to_copy = std::min(size_t{2}, gpu_tree.node_categories.size()); + auto const size_to_copy = std::min(size_t{2}, gpu_tree.node_categories.size()); CUDF_CUDA_TRY(cudaMemcpyAsync(h_node_categories.data(), gpu_tree.node_categories.data(), sizeof(cuio_json::node_t) * size_to_copy, @@ -159,7 +183,8 @@ void run_test(std::string const& input, bool enable_lines = true) stream.value())); stream.synchronize(); if (options.is_enabled_lines()) return h_node_categories[0] == cuio_json::NC_LIST; - return h_node_categories[0] == cuio_json::NC_LIST and h_node_categories[1] == cuio_json::NC_LIST; + return h_node_categories[0] == cuio_json::NC_LIST and + h_node_categories[1] == cuio_json::NC_LIST; }(); auto tup = @@ -302,7 +327,7 @@ TEST_F(JsonColumnTreeTests, JSON1) TEST_F(JsonColumnTreeTests, JSON2) { - std::string json_string = + std::string json_string = R"([ {}, { "a": { "y" : 6, "z": [] }}, @@ -313,7 +338,7 @@ TEST_F(JsonColumnTreeTests, JSON2) TEST_F(JsonColumnTreeTests, JSONLA1) { - std::string json_string = + std::string json_string = R"([123, [1,2,3]] [456, null, { "a": 1 }])"; run_test(json_string); From e5d4a35423ef23736e937dd0c47f0cd392f133dd Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 20 Sep 2024 16:31:37 +0000 Subject: [PATCH 34/46] pr reviews --- cpp/src/io/json/json_utils.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/json/json_utils.hpp b/cpp/src/io/json/json_utils.hpp index 8864bde84d8..995f5d0405f 100644 --- a/cpp/src/io/json/json_utils.hpp +++ b/cpp/src/io/json/json_utils.hpp @@ -61,8 +61,8 @@ std::pair, rmm::device_uvector> stable_s nullptr, temp_storage_bytes, keys_buffer, order_buffer, keys.size()); rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); - thrust::copy(rmm::exec_policy(stream), keys.begin(), keys.end(), keys_buffer1.begin()); - thrust::sequence(rmm::exec_policy(stream), order_buffer1.begin(), order_buffer1.end()); + thrust::copy(rmm::exec_policy_nosync(stream), keys.begin(), keys.end(), keys_buffer1.begin()); + thrust::sequence(rmm::exec_policy_nosync(stream), order_buffer1.begin(), order_buffer1.end()); cub::DeviceRadixSort::SortPairs(d_temp_storage.data(), temp_storage_bytes, From 3cdc2118affbbf4c50a6ec9611484c2621b44a8a Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 20 Sep 2024 17:08:12 +0000 Subject: [PATCH 35/46] exec policy sync -> nosync --- cpp/src/io/json/column_tree_construction.cu | 22 ++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index bdce5bac44f..3a26254ac04 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -339,7 +339,7 @@ std::tuple reduce_to_column_tree( #endif rmm::device_uvector colidx((num_columns - 1) * 2, stream); - thrust::fill(rmm::exec_policy(stream), colidx.begin(), colidx.end(), -1); + thrust::fill(rmm::exec_policy_nosync(stream), colidx.begin(), colidx.end(), -1); // excluding root node, construct scatter map rmm::device_uvector map(num_columns - 1, stream); thrust::inclusive_scan_by_key(rmm::exec_policy_nosync(stream), @@ -543,8 +543,8 @@ reduce_to_column_tree(tree_meta_t& tree, { CUDF_FUNC_RANGE(); // 1. column count for allocation - auto const num_columns = - thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end()); + auto const num_columns = thrust::unique_count( + rmm::exec_policy_nosync(stream), sorted_col_ids.begin(), sorted_col_ids.end()); // 2. reduce_by_key {col_id}, {row_offset}, max. rmm::device_uvector unique_col_ids(num_columns, stream); @@ -567,7 +567,7 @@ reduce_to_column_tree(tree_meta_t& tree, rmm::device_uvector col_range_begin(num_columns, stream); // Field names rmm::device_uvector col_range_end(num_columns, stream); rmm::device_uvector unique_node_ids(num_columns, stream); - thrust::unique_by_key_copy(rmm::exec_policy(stream), + thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream), sorted_col_ids.begin(), sorted_col_ids.end(), ordered_node_ids.begin(), @@ -575,7 +575,7 @@ reduce_to_column_tree(tree_meta_t& tree, unique_node_ids.begin()); thrust::copy_n( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), thrust::make_zip_iterator( thrust::make_permutation_iterator(tree.parent_node_ids.begin(), unique_node_ids.begin()), thrust::make_permutation_iterator(tree.node_range_begin.begin(), unique_node_ids.begin()), @@ -586,7 +586,7 @@ reduce_to_column_tree(tree_meta_t& tree, // convert parent_node_ids to parent_col_ids thrust::transform( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), parent_col_ids.begin(), parent_col_ids.end(), parent_col_ids.begin(), @@ -621,11 +621,11 @@ reduce_to_column_tree(tree_meta_t& tree, // gather the max_row_offsets from children row offset array. { rmm::device_uvector list_parents_children_max_row_offsets(num_columns, stream); - thrust::fill(rmm::exec_policy(stream), + thrust::fill(rmm::exec_policy_nosync(stream), list_parents_children_max_row_offsets.begin(), list_parents_children_max_row_offsets.end(), 0); - thrust::for_each(rmm::exec_policy(stream), + thrust::for_each(rmm::exec_policy_nosync(stream), unique_col_ids.begin(), unique_col_ids.end(), [column_categories = column_categories.begin(), @@ -643,7 +643,7 @@ reduce_to_column_tree(tree_meta_t& tree, }); thrust::gather_if( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), parent_col_ids.begin(), parent_col_ids.end(), parent_col_ids.begin(), @@ -662,7 +662,7 @@ reduce_to_column_tree(tree_meta_t& tree, // copy lists' max_row_offsets to children. // all structs should have same size. thrust::transform_if( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), unique_col_ids.begin(), unique_col_ids.end(), max_row_offsets.begin(), @@ -692,7 +692,7 @@ reduce_to_column_tree(tree_meta_t& tree, // For Struct and List (to avoid copying entire strings when mixed type as string is enabled) thrust::transform_if( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), col_range_begin.begin(), col_range_begin.end(), column_categories.begin(), From 9ca7b5e03683d4b0bc2b1b6267700efd949c7262 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 20 Sep 2024 19:27:32 +0000 Subject: [PATCH 36/46] pr reviews --- cpp/src/io/json/column_tree_construction.cu | 327 ++++++++++---------- cpp/src/io/json/json_column.cu | 14 +- cpp/src/io/json/nested_json.hpp | 12 +- cpp/tests/io/json/json_tree_csr.cu | 2 +- 4 files changed, 165 insertions(+), 190 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 3a26254ac04..a7865321300 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -94,8 +94,9 @@ void max_row_offsets_col_categories(InputIterator1 keys_first, ctg = (type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b)); } else if (is_b_leaf) { ctg = (type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a)); - } else + } else { ctg = NC_ERR; + } return thrust::make_pair( thrust::maximum{}(row_offset_a, row_offset_b), ctg); @@ -124,7 +125,7 @@ void print(device_span d_vec, std::string name, rmm::cuda_stream_view s namespace experimental::detail { struct level_ordering { - device_span node_levels; + device_span node_levels; device_span col_ids; device_span parent_node_ids; __device__ bool operator()(NodeIndexT lhs_node_id, NodeIndexT rhs_node_id) const @@ -144,7 +145,7 @@ struct level_ordering { struct parent_nodeids_to_colids { device_span col_ids; - device_span rev_mapped_col_ids; + device_span rev_mapped_col_ids; __device__ auto operator()(NodeIndexT parent_node_id) -> NodeIndexT { return parent_node_id == parent_node_sentinel ? parent_node_sentinel @@ -164,7 +165,7 @@ struct parent_nodeids_to_colids { * @return A tuple of column tree representation of JSON string, column ids of columns, and * max row offsets of columns */ -std::tuple reduce_to_column_tree( +std::tuple reduce_to_column_tree( tree_meta_t& tree, device_span col_ids, device_span row_offsets, @@ -180,105 +181,94 @@ std::tuple reduce_to_column_tree( rmm::device_uvector empty_column_categories(0, stream); rmm::device_uvector empty_max_row_offsets(0, stream); rmm::device_uvector empty_mapped_col_ids(0, stream); - return std::tuple{csr{std::move(empty_rowidx), std::move(empty_colidx)}, + return std::tuple{compressed_sparse_row{std::move(empty_rowidx), std::move(empty_colidx)}, column_tree_properties{std::move(empty_column_categories), std::move(empty_max_row_offsets), std::move(empty_mapped_col_ids)}}; } + NodeIndexT num_columns; auto level_ordered_col_ids = cudf::detail::make_device_uvector_async( col_ids, stream, cudf::get_current_device_resource_ref()); rmm::device_uvector level_ordered_node_ids(col_ids.size(), stream); - thrust::sequence( - rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), level_ordered_node_ids.end()); - - // Reorder nodes and column ids in level-wise fashion - size_t temp_storage_bytes = 0; - cub::DeviceMergeSort::SortPairs(nullptr, - temp_storage_bytes, - level_ordered_node_ids.begin(), - level_ordered_col_ids.begin(), - col_ids.size(), - level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}, - stream.value()); - rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); - cub::DeviceMergeSort::SortPairs(d_temp_storage.data(), - temp_storage_bytes, - level_ordered_node_ids.begin(), - level_ordered_col_ids.begin(), - col_ids.size(), - level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}, - stream.value()); - /* - thrust::sort_by_key(rmm::exec_policy_nosync(stream), - level_ordered_node_ids.begin(), - level_ordered_node_ids.end(), - level_ordered_col_ids.begin(), - level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}); - */ + + { + thrust::sequence(rmm::exec_policy_nosync(stream), + level_ordered_node_ids.begin(), + level_ordered_node_ids.end()); + // Reorder nodes and column ids in level-wise fashion + thrust::sort_by_key(rmm::exec_policy_nosync(stream), + level_ordered_node_ids.begin(), + level_ordered_node_ids.end(), + level_ordered_col_ids.begin(), + level_ordering{tree.node_levels, col_ids, tree.parent_node_ids}); #ifdef CSR_DEBUG_PRINT - print(level_ordered_node_ids, "h_level_ordered_node_ids", stream); - print(col_ids, "h_col_ids", stream); - print(level_ordered_col_ids, "h_level_ordered_col_ids", stream); + print(level_ordered_node_ids, "h_level_ordered_node_ids", stream); + print(col_ids, "h_col_ids", stream); + print(level_ordered_col_ids, "h_level_ordered_col_ids", stream); #endif - // 1. get the number of columns in tree, mapping between node tree col ids and csr col ids, and - // the node id of first row in each column - auto const num_columns = thrust::unique_count( - rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_col_ids.end()); - rmm::device_uvector level_ordered_unique_node_ids(num_columns, stream); + // 1. get the number of columns in tree, mapping between node tree col ids and csr col ids, and + // the node id of first row in each column + num_columns = thrust::unique_count( + rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_col_ids.end()); + } + rmm::device_uvector mapped_col_ids(num_columns, stream); + rmm::device_uvector max_row_offsets(num_columns, stream); + rmm::device_uvector column_categories(num_columns, stream); + rmm::device_uvector parent_col_ids(num_columns, stream); rmm::device_uvector rev_mapped_col_ids(num_columns, stream); - thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream), - level_ordered_col_ids.begin(), - level_ordered_col_ids.end(), - level_ordered_node_ids.begin(), - mapped_col_ids.begin(), - level_ordered_unique_node_ids.begin()); - auto* dev_num_levels_ptr = thrust::max_element( - rmm::exec_policy_nosync(stream), tree.node_levels.begin(), tree.node_levels.end()); - - auto mapped_col_ids_copy = cudf::detail::make_device_uvector_async( - mapped_col_ids, stream, cudf::get_current_device_resource_ref()); - thrust::sequence( - rmm::exec_policy_nosync(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end()); - thrust::sort_by_key(rmm::exec_policy_nosync(stream), - mapped_col_ids_copy.begin(), - mapped_col_ids_copy.end(), - rev_mapped_col_ids.begin()); + + { + rmm::device_uvector level_ordered_unique_node_ids(num_columns, stream); + thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream), + level_ordered_col_ids.begin(), + level_ordered_col_ids.end(), + level_ordered_node_ids.begin(), + mapped_col_ids.begin(), + level_ordered_unique_node_ids.begin()); + + auto mapped_col_ids_copy = cudf::detail::make_device_uvector_async( + mapped_col_ids, stream, cudf::get_current_device_resource_ref()); + thrust::sequence( + rmm::exec_policy_nosync(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end()); + thrust::sort_by_key(rmm::exec_policy_nosync(stream), + mapped_col_ids_copy.begin(), + mapped_col_ids_copy.end(), + rev_mapped_col_ids.begin()); #ifdef CSR_DEBUG_PRINT - print(mapped_col_ids, "h_mapped_col_ids", stream); - print(level_ordered_unique_node_ids, "h_level_ordered_unique_node_ids", stream); - print(rev_mapped_col_ids, "h_rev_mapped_col_ids", stream); + print(mapped_col_ids, "h_mapped_col_ids", stream); + print(level_ordered_unique_node_ids, "h_level_ordered_unique_node_ids", stream); + print(rev_mapped_col_ids, "h_rev_mapped_col_ids", stream); #endif - // 2. maximum number of rows per column: computed with reduce_by_key {col_id}, {row_offset}, max. - // 3. category for each column node by aggregating all nodes in node tree corresponding to same - // column: - // reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) - rmm::device_uvector max_row_offsets(num_columns, stream); - rmm::device_uvector column_categories(num_columns, stream); - max_row_offsets_col_categories( - level_ordered_col_ids.begin(), - level_ordered_col_ids.end(), - thrust::make_zip_iterator( - thrust::make_permutation_iterator(row_offsets.begin(), level_ordered_node_ids.begin()), - thrust::make_permutation_iterator(tree.node_categories.begin(), - level_ordered_node_ids.begin())), - thrust::make_discard_iterator(), - thrust::make_zip_iterator(max_row_offsets.begin(), column_categories.begin()), - stream); - // 4. construct parent_col_ids using permutation iterator - rmm::device_uvector parent_col_ids(num_columns, stream); - thrust::transform_output_iterator parent_col_ids_it( - parent_col_ids.begin(), parent_nodeids_to_colids{col_ids, rev_mapped_col_ids}); - thrust::copy_n(rmm::exec_policy_nosync(stream), - thrust::make_permutation_iterator(tree.parent_node_ids.begin(), - level_ordered_unique_node_ids.begin()), - num_columns, - parent_col_ids_it); + // 2. maximum number of rows per column: computed with reduce_by_key {col_id}, {row_offset}, + // max. + // 3. category for each column node by aggregating all nodes in node tree corresponding to same + // column: + // reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) + max_row_offsets_col_categories( + level_ordered_col_ids.begin(), + level_ordered_col_ids.end(), + thrust::make_zip_iterator( + thrust::make_permutation_iterator(row_offsets.begin(), level_ordered_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_categories.begin(), + level_ordered_node_ids.begin())), + thrust::make_discard_iterator(), + thrust::make_zip_iterator(max_row_offsets.begin(), column_categories.begin()), + stream); + // 4. construct parent_col_ids using permutation iterator + thrust::transform_output_iterator parent_col_ids_it( + parent_col_ids.begin(), parent_nodeids_to_colids{col_ids, rev_mapped_col_ids}); + thrust::copy_n(rmm::exec_policy_nosync(stream), + thrust::make_permutation_iterator(tree.parent_node_ids.begin(), + level_ordered_unique_node_ids.begin()), + num_columns, + parent_col_ids_it); + } /* 5. CSR construction: @@ -288,87 +278,90 @@ std::tuple reduce_to_column_tree( ii. row idx[coln] = size of adj_coln + 1 iii. col idx[coln] = adj_coln U {parent_col_id[coln]} */ - auto rowidx = cudf::detail::make_zeroed_device_uvector_async( static_cast(num_columns + 1), stream, cudf::get_current_device_resource_ref()); - // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) - // children adjacency + { + // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) + // children adjacency #ifdef CSR_DEBUG_PRINT - print(parent_col_ids, "h_parent_col_ids", stream); + print(parent_col_ids, "h_parent_col_ids", stream); #endif - auto num_non_leaf_columns = thrust::unique_count( - rmm::exec_policy_nosync(stream), parent_col_ids.begin() + 1, parent_col_ids.end()); - rmm::device_uvector non_leaf_nodes(num_non_leaf_columns, stream); - rmm::device_uvector non_leaf_nodes_children(num_non_leaf_columns, stream); - thrust::reduce_by_key(rmm::exec_policy_nosync(stream), - parent_col_ids.begin() + 1, - parent_col_ids.end(), - thrust::make_constant_iterator(1), - non_leaf_nodes.begin(), - non_leaf_nodes_children.begin(), - thrust::equal_to()); - - thrust::scatter(rmm::exec_policy_nosync(stream), - non_leaf_nodes_children.begin(), - non_leaf_nodes_children.end(), - non_leaf_nodes.begin(), - rowidx.begin() + 1); + auto num_non_leaf_columns = thrust::unique_count( + rmm::exec_policy_nosync(stream), parent_col_ids.begin() + 1, parent_col_ids.end()); + rmm::device_uvector non_leaf_nodes(num_non_leaf_columns, stream); + rmm::device_uvector non_leaf_nodes_children(num_non_leaf_columns, stream); + thrust::reduce_by_key(rmm::exec_policy_nosync(stream), + parent_col_ids.begin() + 1, + parent_col_ids.end(), + thrust::make_constant_iterator(1), + non_leaf_nodes.begin(), + non_leaf_nodes_children.begin(), + thrust::equal_to()); - if (num_columns > 1) { - thrust::transform_inclusive_scan( - rmm::exec_policy_nosync(stream), - thrust::make_zip_iterator(thrust::make_counting_iterator(1), rowidx.begin() + 1), - thrust::make_zip_iterator(thrust::make_counting_iterator(1) + num_columns, rowidx.end()), - rowidx.begin() + 1, - cuda::proclaim_return_type([] __device__(auto a) { - auto n = thrust::get<0>(a); - auto idx = thrust::get<1>(a); - return n == 1 ? idx : idx + 1; - return idx + 1; - }), - thrust::plus{}); - } else { - auto single_node = 1; - rowidx.set_element_async(1, single_node, stream); - } + thrust::scatter(rmm::exec_policy_nosync(stream), + non_leaf_nodes_children.begin(), + non_leaf_nodes_children.end(), + non_leaf_nodes.begin(), + rowidx.begin() + 1); + + if (num_columns > 1) { + thrust::transform_inclusive_scan( + rmm::exec_policy_nosync(stream), + thrust::make_zip_iterator(thrust::make_counting_iterator(1), rowidx.begin() + 1), + thrust::make_zip_iterator(thrust::make_counting_iterator(1) + num_columns, rowidx.end()), + rowidx.begin() + 1, + cuda::proclaim_return_type([] __device__(auto a) { + auto n = thrust::get<0>(a); + auto idx = thrust::get<1>(a); + return n == 1 ? idx : idx + 1; + return idx + 1; + }), + thrust::plus{}); + } else { + auto single_node = 1; + rowidx.set_element_async(1, single_node, stream); + } #ifdef CSR_DEBUG_PRINT - print(rowidx, "h_rowidx", stream); + print(rowidx, "h_rowidx", stream); #endif + } rmm::device_uvector colidx((num_columns - 1) * 2, stream); - thrust::fill(rmm::exec_policy_nosync(stream), colidx.begin(), colidx.end(), -1); - // excluding root node, construct scatter map - rmm::device_uvector map(num_columns - 1, stream); - thrust::inclusive_scan_by_key(rmm::exec_policy_nosync(stream), - parent_col_ids.begin() + 1, - parent_col_ids.end(), - thrust::make_constant_iterator(1), - map.begin()); - thrust::for_each_n(rmm::exec_policy_nosync(stream), - thrust::make_counting_iterator(1), - num_columns - 1, - [rowidx = rowidx.begin(), - map = map.begin(), - parent_col_ids = parent_col_ids.begin()] __device__(auto i) { - auto parent_col_id = parent_col_ids[i]; - if (parent_col_id == 0) - map[i - 1]--; - else - map[i - 1] += rowidx[parent_col_id]; - }); - thrust::scatter(rmm::exec_policy_nosync(stream), - thrust::make_counting_iterator(1), - thrust::make_counting_iterator(1) + num_columns - 1, - map.begin(), - colidx.begin()); + { + thrust::fill(rmm::exec_policy_nosync(stream), colidx.begin(), colidx.end(), -1); + // excluding root node, construct scatter map + rmm::device_uvector map(num_columns - 1, stream); + thrust::inclusive_scan_by_key(rmm::exec_policy_nosync(stream), + parent_col_ids.begin() + 1, + parent_col_ids.end(), + thrust::make_constant_iterator(1), + map.begin()); + thrust::for_each_n(rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(1), + num_columns - 1, + [rowidx = rowidx.begin(), + map = map.begin(), + parent_col_ids = parent_col_ids.begin()] __device__(auto i) { + auto parent_col_id = parent_col_ids[i]; + if (parent_col_id == 0) + --map[i - 1]; + else + map[i - 1] += rowidx[parent_col_id]; + }); + thrust::scatter(rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(1), + thrust::make_counting_iterator(1) + num_columns - 1, + map.begin(), + colidx.begin()); #ifdef CSR_DEBUG_PRINT - print(colidx, "h_pre_colidx", stream); - print(max_row_offsets, "h_max_row_offsets", stream); + print(colidx, "h_pre_colidx", stream); + print(max_row_offsets, "h_max_row_offsets", stream); #endif + } // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. @@ -403,11 +396,11 @@ std::tuple reduce_to_column_tree( rowidx.begin(), rowidx.begin() + 1, stream.value()); - CUDF_CUDA_TRY(cudaMemcpyAsync(max_children_max_row_offsets.data(), - max_row_offsets.data(), - sizeof(row_offset_t), - cudaMemcpyDeviceToDevice, - stream.value())); + cudf::detail::cuda_memcpy_async(max_children_max_row_offsets.data(), + max_row_offsets.data(), + sizeof(row_offset_t), + cudf::detail::host_memory_kind::PAGEABLE, + stream); #ifdef CSR_DEBUG_PRINT print(max_children_max_row_offsets, "h_max_children_max_row_offsets", stream); @@ -444,20 +437,12 @@ std::tuple reduce_to_column_tree( print(colidx, "h_colidx", stream); #endif - // condition is true if parent is not a list, or sentinel/root - // Special case to return true if parent is a list and is_array_of_arrays is true - auto is_non_list_parent = [column_categories = column_categories.begin(), - is_array_of_arrays, - row_array_parent_col_id] __device__(auto parent_col_id) -> bool { - return (parent_col_id != parent_node_sentinel && - column_categories[parent_col_id] != NC_LIST || - (is_array_of_arrays && parent_col_id == row_array_parent_col_id)); - }; - // Vector to store the latest ancestor of LIST type. If no such ancestor is found, // store the root node of tree. Note that a node cannot be an ancestor of itself auto list_ancestors = cudf::detail::make_zeroed_device_uvector_async( static_cast(num_columns), stream, cudf::get_current_device_resource_ref()); + auto* dev_num_levels_ptr = thrust::max_element( + rmm::exec_policy_nosync(stream), tree.node_levels.begin(), tree.node_levels.end()); row_array_parent_col_id = rev_mapped_col_ids.element(row_array_parent_col_id, stream); auto root_node = (column_categories.element(0, stream) == NC_LIST && !is_array_of_arrays) || (is_array_of_arrays && row_array_parent_col_id) @@ -509,7 +494,7 @@ std::tuple reduce_to_column_tree( } return std::tuple{ - csr{std::move(rowidx), std::move(colidx)}, + compressed_sparse_row{std::move(rowidx), std::move(colidx)}, column_tree_properties{ std::move(column_categories), std::move(max_row_offsets), std::move(mapped_col_ids)}}; } @@ -620,11 +605,9 @@ reduce_to_column_tree(tree_meta_t& tree, // atomicMax on children max_row_offsets array. // gather the max_row_offsets from children row offset array. { - rmm::device_uvector list_parents_children_max_row_offsets(num_columns, stream); - thrust::fill(rmm::exec_policy_nosync(stream), - list_parents_children_max_row_offsets.begin(), - list_parents_children_max_row_offsets.end(), - 0); + auto list_parents_children_max_row_offsets = + cudf::detail::make_zeroed_device_uvector_async( + static_cast(num_columns), stream, cudf::get_current_device_resource_ref()); thrust::for_each(rmm::exec_policy_nosync(stream), unique_col_ids.begin(), unique_col_ids.end(), diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index a80555383a4..63d196b19e8 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -269,7 +269,7 @@ struct h_tree_meta_t { }; struct h_column_tree { - // position of nnzs + // concatenated adjacency list std::vector rowidx; std::vector colidx; // node properties @@ -280,7 +280,7 @@ struct h_column_tree { #ifdef CSR_DEBUG_EQ bool check_equality(tree_meta_t& d_a, cudf::device_span d_a_max_row_offsets, - experimental::csr& d_b_csr, + experimental::compressed_sparse_row& d_b_csr, experimental::column_tree_properties& d_b_ctp, rmm::cuda_stream_view stream) { @@ -332,17 +332,11 @@ bool check_equality(tree_meta_t& d_a, if (b.rowidx[0] != 0 || b.rowidx[1] != 1) return false; if (!b.colidx.empty()) return false; for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) { - printf("4\n"); - return false; - } + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; } } for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { - printf("5\n"); - return false; - } + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; } } } return true; diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 0c331915a5c..d7e23565079 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -189,7 +189,7 @@ namespace experimental { /* * @brief Sparse graph adjacency matrix stored in Compressed Sparse Row (CSR) format. */ -struct csr { +struct compressed_sparse_row { rmm::device_uvector rowidx; rmm::device_uvector colidx; }; @@ -205,16 +205,14 @@ struct column_tree_properties { }; /* - * @brief Unvalidated column tree stored in Compressed Sparse Row (CSR) format. The device json + * @brief Unverified column tree stored in Compressed Sparse Row (CSR) format. The device json * column subtree - the subgraph that conforms to column tree properties - is extracted and further * processed according to the JSON reader options passed. Only the final processed subgraph is * annotated with information required to construct cuDF columns. */ struct column_tree { - // position of nnzs - csr adjacency; - rmm::device_uvector rowidx; - rmm::device_uvector colidx; + // concatenated adjacency list + compressed_sparse_row adjacency; // device_json_column properties using row_offset_t = size_type; // Indicator array for the device column subtree @@ -241,7 +239,7 @@ namespace detail { * in each column */ CUDF_EXPORT -std::tuple reduce_to_column_tree( +std::tuple reduce_to_column_tree( tree_meta_t& tree, device_span original_col_ids, device_span row_offsets, diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index 76e509a13d1..d677c4678a7 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -68,7 +68,7 @@ void print(cudf::host_span vec, std::string name) bool check_equality(cuio_json::tree_meta_t& d_a, cudf::device_span d_a_max_row_offsets, - cuio_json::experimental::csr& d_b_csr, + cuio_json::experimental::compressed_sparse_row& d_b_csr, cuio_json::experimental::column_tree_properties& d_b_ctp, rmm::cuda_stream_view stream) { From 29be4300240d8a677b66b7ef409dc2d17342adb0 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 20 Sep 2024 19:32:41 +0000 Subject: [PATCH 37/46] cleanup --- cpp/tests/io/json/json_tree_csr.cu | 35 ++++++------------------------ 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu index d677c4678a7..71cd8bdf032 100644 --- a/cpp/tests/io/json/json_tree_csr.cu +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -96,37 +96,22 @@ bool check_equality(cuio_json::tree_meta_t& d_a, for (auto pos = b.rowidx[0]; pos < b.rowidx[1]; pos++) { auto v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { - printf("1\n"); - return false; - } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { return false; } } for (size_t u = 1; u < num_nodes; u++) { auto v = b.colidx[b.rowidx[u]]; - if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { - printf("2\n"); - return false; - } + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { return false; } for (auto pos = b.rowidx[u] + 1; pos < b.rowidx[u + 1]; pos++) { v = b.colidx[pos]; - if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { - printf("3\n"); - return false; - } + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { return false; } } } for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) { - printf("4\n"); - return false; - } + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; } } for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { - printf("5\n"); - return false; - } + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; } } } else if (num_nodes == 1) { if (b.rowidx.size() != num_nodes + 1) { return false; } @@ -134,17 +119,11 @@ bool check_equality(cuio_json::tree_meta_t& d_a, if (b.rowidx[0] != 0 || b.rowidx[1] != 1) return false; if (!b.colidx.empty()) return false; for (size_t u = 0; u < num_nodes; u++) { - if (a.node_categories[b.column_ids[u]] != b.categories[u]) { - printf("4\n"); - return false; - } + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; } } for (size_t u = 0; u < num_nodes; u++) { - if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { - printf("5\n"); - return false; - } + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; } } } return true; From 023a4a8abd3300d49d9f38286a12952a29e5e023 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 20 Sep 2024 20:49:55 +0000 Subject: [PATCH 38/46] moving steps to lambdas to handle intermediate vectors --- cpp/src/io/json/column_tree_construction.cu | 95 ++++++++++++--------- 1 file changed, 57 insertions(+), 38 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index a7865321300..88d53ee3160 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -187,12 +187,12 @@ std::tuple reduce_to_column_tree( std::move(empty_mapped_col_ids)}}; } - NodeIndexT num_columns; - auto level_ordered_col_ids = cudf::detail::make_device_uvector_async( - col_ids, stream, cudf::get_current_device_resource_ref()); - rmm::device_uvector level_ordered_node_ids(col_ids.size(), stream); + auto level_orderings = [&tree, &col_ids, &stream]() { + NodeIndexT num_columns; + auto level_ordered_col_ids = cudf::detail::make_device_uvector_async( + col_ids, stream, cudf::get_current_device_resource_ref()); + rmm::device_uvector level_ordered_node_ids(col_ids.size(), stream); - { thrust::sequence(rmm::exec_policy_nosync(stream), level_ordered_node_ids.begin(), level_ordered_node_ids.end()); @@ -213,15 +213,15 @@ std::tuple reduce_to_column_tree( // the node id of first row in each column num_columns = thrust::unique_count( rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_col_ids.end()); - } - rmm::device_uvector mapped_col_ids(num_columns, stream); - rmm::device_uvector max_row_offsets(num_columns, stream); - rmm::device_uvector column_categories(num_columns, stream); - rmm::device_uvector parent_col_ids(num_columns, stream); - rmm::device_uvector rev_mapped_col_ids(num_columns, stream); + return std::tuple{num_columns, std::move(level_ordered_node_ids), std::move(level_ordered_col_ids)}; + }; + + auto col_tree_adjs = [&tree, &col_ids, &stream](NodeIndexT num_columns, device_span level_ordered_col_ids, device_span level_ordered_node_ids) { + rmm::device_uvector mapped_col_ids(num_columns, stream); + rmm::device_uvector parent_col_ids(num_columns, stream); + rmm::device_uvector rev_mapped_col_ids(num_columns, stream); - { rmm::device_uvector level_ordered_unique_node_ids(num_columns, stream); thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), @@ -229,7 +229,6 @@ std::tuple reduce_to_column_tree( level_ordered_node_ids.begin(), mapped_col_ids.begin(), level_ordered_unique_node_ids.begin()); - auto mapped_col_ids_copy = cudf::detail::make_device_uvector_async( mapped_col_ids, stream, cudf::get_current_device_resource_ref()); thrust::sequence( @@ -238,13 +237,28 @@ std::tuple reduce_to_column_tree( mapped_col_ids_copy.begin(), mapped_col_ids_copy.end(), rev_mapped_col_ids.begin()); - #ifdef CSR_DEBUG_PRINT print(mapped_col_ids, "h_mapped_col_ids", stream); print(level_ordered_unique_node_ids, "h_level_ordered_unique_node_ids", stream); print(rev_mapped_col_ids, "h_rev_mapped_col_ids", stream); #endif + // 4. construct parent_col_ids using permutation iterator + thrust::transform_output_iterator parent_col_ids_it( + parent_col_ids.begin(), parent_nodeids_to_colids{col_ids, rev_mapped_col_ids}); + thrust::copy_n(rmm::exec_policy_nosync(stream), + thrust::make_permutation_iterator(tree.parent_node_ids.begin(), + level_ordered_unique_node_ids.begin()), + num_columns, + parent_col_ids_it); + + return std::tuple{std::move(mapped_col_ids), std::move(parent_col_ids), std::move(rev_mapped_col_ids)}; + }; + + auto col_tree_props = [&tree, &row_offsets, &stream](NodeIndexT num_columns, device_span level_ordered_col_ids, device_span level_ordered_node_ids) { + rmm::device_uvector max_row_offsets(num_columns, stream); + rmm::device_uvector column_categories(num_columns, stream); + // 2. maximum number of rows per column: computed with reduce_by_key {col_id}, {row_offset}, // max. // 3. category for each column node by aggregating all nodes in node tree corresponding to same @@ -260,27 +274,13 @@ std::tuple reduce_to_column_tree( thrust::make_discard_iterator(), thrust::make_zip_iterator(max_row_offsets.begin(), column_categories.begin()), stream); - // 4. construct parent_col_ids using permutation iterator - thrust::transform_output_iterator parent_col_ids_it( - parent_col_ids.begin(), parent_nodeids_to_colids{col_ids, rev_mapped_col_ids}); - thrust::copy_n(rmm::exec_policy_nosync(stream), - thrust::make_permutation_iterator(tree.parent_node_ids.begin(), - level_ordered_unique_node_ids.begin()), - num_columns, - parent_col_ids_it); - } - /* - 5. CSR construction: - a. Sort column levels and get their ordering - b. For each column node coln iterated according to sorted_column_levels; do - i. Find nodes that have coln as the parent node -> set adj_coln - ii. row idx[coln] = size of adj_coln + 1 - iii. col idx[coln] = adj_coln U {parent_col_id[coln]} - */ - auto rowidx = cudf::detail::make_zeroed_device_uvector_async( - static_cast(num_columns + 1), stream, cudf::get_current_device_resource_ref()); - { + return std::tuple{std::move(max_row_offsets), std::move(column_categories)}; + }; + + auto construct_rowidx = [&stream](NodeIndexT num_columns, device_span parent_col_ids) { + auto rowidx = cudf::detail::make_zeroed_device_uvector_async( + static_cast(num_columns + 1), stream, cudf::get_current_device_resource_ref()); // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) // children adjacency @@ -327,10 +327,11 @@ std::tuple reduce_to_column_tree( #ifdef CSR_DEBUG_PRINT print(rowidx, "h_rowidx", stream); #endif - } + return rowidx; + }; - rmm::device_uvector colidx((num_columns - 1) * 2, stream); - { + auto partially_construct_colidx = [&stream](NodeIndexT num_columns, device_span parent_col_ids, device_span rowidx) { + rmm::device_uvector colidx((num_columns - 1) * 2, stream); thrust::fill(rmm::exec_policy_nosync(stream), colidx.begin(), colidx.end(), -1); // excluding root node, construct scatter map rmm::device_uvector map(num_columns - 1, stream); @@ -361,7 +362,25 @@ std::tuple reduce_to_column_tree( print(colidx, "h_pre_colidx", stream); print(max_row_offsets, "h_max_row_offsets", stream); #endif - } + + return colidx; + }; + + auto [num_columns, level_ordered_node_ids, level_ordered_col_ids] = level_orderings(); + auto [mapped_col_ids, parent_col_ids, rev_mapped_col_ids] = col_tree_adjs(num_columns, level_ordered_col_ids, level_ordered_node_ids); + auto [max_row_offsets, column_categories] = col_tree_props(num_columns, level_ordered_col_ids, level_ordered_node_ids); + + /* + 5. CSR construction: + a. Sort column levels and get their ordering + b. For each column node coln iterated according to sorted_column_levels; do + i. Find nodes that have coln as the parent node -> set adj_coln + ii. row idx[coln] = size of adj_coln + 1 + iii. col idx[coln] = adj_coln U {parent_col_id[coln]} + */ + auto rowidx = construct_rowidx(num_columns, parent_col_ids); + auto colidx = partially_construct_colidx(num_columns, parent_col_ids, rowidx); + // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. From ded2c5ec422a0a865a5601f7d3f5d44087fb2647 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 20 Sep 2024 20:51:01 +0000 Subject: [PATCH 39/46] formatting --- cpp/src/io/json/column_tree_construction.cu | 30 ++++++++++++++------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 88d53ee3160..f3054a210cb 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -214,10 +214,14 @@ std::tuple reduce_to_column_tree( num_columns = thrust::unique_count( rmm::exec_policy_nosync(stream), level_ordered_col_ids.begin(), level_ordered_col_ids.end()); - return std::tuple{num_columns, std::move(level_ordered_node_ids), std::move(level_ordered_col_ids)}; + return std::tuple{ + num_columns, std::move(level_ordered_node_ids), std::move(level_ordered_col_ids)}; }; - auto col_tree_adjs = [&tree, &col_ids, &stream](NodeIndexT num_columns, device_span level_ordered_col_ids, device_span level_ordered_node_ids) { + auto col_tree_adjs = [&tree, &col_ids, &stream]( + NodeIndexT num_columns, + device_span level_ordered_col_ids, + device_span level_ordered_node_ids) { rmm::device_uvector mapped_col_ids(num_columns, stream); rmm::device_uvector parent_col_ids(num_columns, stream); rmm::device_uvector rev_mapped_col_ids(num_columns, stream); @@ -252,10 +256,14 @@ std::tuple reduce_to_column_tree( num_columns, parent_col_ids_it); - return std::tuple{std::move(mapped_col_ids), std::move(parent_col_ids), std::move(rev_mapped_col_ids)}; + return std::tuple{ + std::move(mapped_col_ids), std::move(parent_col_ids), std::move(rev_mapped_col_ids)}; }; - auto col_tree_props = [&tree, &row_offsets, &stream](NodeIndexT num_columns, device_span level_ordered_col_ids, device_span level_ordered_node_ids) { + auto col_tree_props = [&tree, &row_offsets, &stream]( + NodeIndexT num_columns, + device_span level_ordered_col_ids, + device_span level_ordered_node_ids) { rmm::device_uvector max_row_offsets(num_columns, stream); rmm::device_uvector column_categories(num_columns, stream); @@ -278,7 +286,8 @@ std::tuple reduce_to_column_tree( return std::tuple{std::move(max_row_offsets), std::move(column_categories)}; }; - auto construct_rowidx = [&stream](NodeIndexT num_columns, device_span parent_col_ids) { + auto construct_rowidx = [&stream](NodeIndexT num_columns, + device_span parent_col_ids) { auto rowidx = cudf::detail::make_zeroed_device_uvector_async( static_cast(num_columns + 1), stream, cudf::get_current_device_resource_ref()); // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) @@ -330,7 +339,9 @@ std::tuple reduce_to_column_tree( return rowidx; }; - auto partially_construct_colidx = [&stream](NodeIndexT num_columns, device_span parent_col_ids, device_span rowidx) { + auto partially_construct_colidx = [&stream](NodeIndexT num_columns, + device_span parent_col_ids, + device_span rowidx) { rmm::device_uvector colidx((num_columns - 1) * 2, stream); thrust::fill(rmm::exec_policy_nosync(stream), colidx.begin(), colidx.end(), -1); // excluding root node, construct scatter map @@ -367,8 +378,10 @@ std::tuple reduce_to_column_tree( }; auto [num_columns, level_ordered_node_ids, level_ordered_col_ids] = level_orderings(); - auto [mapped_col_ids, parent_col_ids, rev_mapped_col_ids] = col_tree_adjs(num_columns, level_ordered_col_ids, level_ordered_node_ids); - auto [max_row_offsets, column_categories] = col_tree_props(num_columns, level_ordered_col_ids, level_ordered_node_ids); + auto [mapped_col_ids, parent_col_ids, rev_mapped_col_ids] = + col_tree_adjs(num_columns, level_ordered_col_ids, level_ordered_node_ids); + auto [max_row_offsets, column_categories] = + col_tree_props(num_columns, level_ordered_col_ids, level_ordered_node_ids); /* 5. CSR construction: @@ -381,7 +394,6 @@ std::tuple reduce_to_column_tree( auto rowidx = construct_rowidx(num_columns, parent_col_ids); auto colidx = partially_construct_colidx(num_columns, parent_col_ids, rowidx); - // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. // create list's children max_row_offsets array From b2d11dd93f41de159ec7c794f6b7596c30a369f8 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 20 Sep 2024 21:27:57 +0000 Subject: [PATCH 40/46] more lambdas --- cpp/src/io/json/column_tree_construction.cu | 37 +++++++++++---------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index f3054a210cb..819ec82222f 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -394,11 +394,7 @@ std::tuple reduce_to_column_tree( auto rowidx = construct_rowidx(num_columns, parent_col_ids); auto colidx = partially_construct_colidx(num_columns, parent_col_ids, rowidx); - // Mixed types in List children go to different columns, - // so all immediate children of list column should have same max_row_offsets. - // create list's children max_row_offsets array - // gather the max_row_offsets from children row offset array. - if (num_columns > 1) { + auto max_children_max_row_offsets_colidx_update = [&colidx, &stream](NodeIndexT num_columns, device_span rowidx, device_span parent_col_ids, device_span max_row_offsets) { auto max_row_offsets_it = thrust::make_transform_iterator( thrust::make_counting_iterator(0), cuda::proclaim_return_type( @@ -468,25 +464,23 @@ std::tuple reduce_to_column_tree( print(colidx, "h_colidx", stream); #endif + return max_children_max_row_offsets; + }; + + row_array_parent_col_id = rev_mapped_col_ids.element(row_array_parent_col_id, stream); + auto root_node = (column_categories.element(0, stream) == NC_LIST && !is_array_of_arrays) || + (is_array_of_arrays && row_array_parent_col_id) + ? 1 + : 0; + + auto update_max_row_offsets = [&tree, row_array_parent_col_id, is_array_of_arrays, root_node, &stream](NodeIndexT num_columns, device_span rowidx, device_span colidx, device_span column_categories, device_span max_row_offsets, device_span max_children_max_row_offsets) { // Vector to store the latest ancestor of LIST type. If no such ancestor is found, // store the root node of tree. Note that a node cannot be an ancestor of itself auto list_ancestors = cudf::detail::make_zeroed_device_uvector_async( static_cast(num_columns), stream, cudf::get_current_device_resource_ref()); auto* dev_num_levels_ptr = thrust::max_element( rmm::exec_policy_nosync(stream), tree.node_levels.begin(), tree.node_levels.end()); - row_array_parent_col_id = rev_mapped_col_ids.element(row_array_parent_col_id, stream); - auto root_node = (column_categories.element(0, stream) == NC_LIST && !is_array_of_arrays) || - (is_array_of_arrays && row_array_parent_col_id) - ? 1 - : 0; - // root_node = (is_array_of_arrays && row_array_parent_col_id && num_columns == 2) ? 0 : - // root_node; if (root_node) list_ancestors.set_element_async(root_node, root_node, stream); - /* - std::cout << "root_node = " << root_node << std::endl; - std::cout << "row_array_parent_col_id = " << row_array_parent_col_id << std::endl; - std::cout << "is_array_of_arrays = " << is_array_of_arrays << std::endl; - */ thrust::for_each_n(rmm::exec_policy_nosync(stream), thrust::make_counting_iterator(root_node + 1), num_columns - root_node - 1, @@ -522,6 +516,15 @@ std::tuple reduce_to_column_tree( #ifdef CSR_DEBUG_PRINT print(max_row_offsets, "h_max_row_offsets", stream); #endif + }; + + // Mixed types in List children go to different columns, + // so all immediate children of list column should have same max_row_offsets. + // create list's children max_row_offsets array + // gather the max_row_offsets from children row offset array. + if (num_columns > 1) { + auto max_children_max_row_offsets = max_children_max_row_offsets_colidx_update(num_columns, rowidx, parent_col_ids, max_row_offsets); + update_max_row_offsets(num_columns, rowidx, colidx, column_categories, max_row_offsets, max_children_max_row_offsets); } return std::tuple{ From 7260ae60b73b8903a3ebebf130a421300c11c5a5 Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Fri, 20 Sep 2024 21:29:07 +0000 Subject: [PATCH 41/46] formatting --- cpp/src/io/json/column_tree_construction.cu | 216 +++++++++++--------- 1 file changed, 117 insertions(+), 99 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 819ec82222f..aa7b6a91ced 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -394,78 +394,83 @@ std::tuple reduce_to_column_tree( auto rowidx = construct_rowidx(num_columns, parent_col_ids); auto colidx = partially_construct_colidx(num_columns, parent_col_ids, rowidx); - auto max_children_max_row_offsets_colidx_update = [&colidx, &stream](NodeIndexT num_columns, device_span rowidx, device_span parent_col_ids, device_span max_row_offsets) { - auto max_row_offsets_it = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - cuda::proclaim_return_type( - [colidx = colidx.begin(), max_row_offsets = max_row_offsets.begin()] __device__(size_t i) { - if (colidx[i] == -1) - return -1; - else - return max_row_offsets[colidx[i]]; - })); - rmm::device_uvector max_children_max_row_offsets(num_columns, stream); - size_t temp_storage_bytes = 0; - cub::DeviceSegmentedReduce::Max(nullptr, - temp_storage_bytes, - max_row_offsets_it, - max_children_max_row_offsets.begin(), - num_columns, - rowidx.begin(), - rowidx.begin() + 1, - stream.value()); - rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); - cub::DeviceSegmentedReduce::Max(d_temp_storage.data(), - temp_storage_bytes, - max_row_offsets_it, - max_children_max_row_offsets.begin(), - num_columns, - rowidx.begin(), - rowidx.begin() + 1, - stream.value()); - cudf::detail::cuda_memcpy_async(max_children_max_row_offsets.data(), - max_row_offsets.data(), - sizeof(row_offset_t), - cudf::detail::host_memory_kind::PAGEABLE, - stream); + auto max_children_max_row_offsets_colidx_update = + [&colidx, &stream](NodeIndexT num_columns, + device_span rowidx, + device_span parent_col_ids, + device_span max_row_offsets) { + auto max_row_offsets_it = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type( + [colidx = colidx.begin(), + max_row_offsets = max_row_offsets.begin()] __device__(size_t i) { + if (colidx[i] == -1) + return -1; + else + return max_row_offsets[colidx[i]]; + })); + rmm::device_uvector max_children_max_row_offsets(num_columns, stream); + size_t temp_storage_bytes = 0; + cub::DeviceSegmentedReduce::Max(nullptr, + temp_storage_bytes, + max_row_offsets_it, + max_children_max_row_offsets.begin(), + num_columns, + rowidx.begin(), + rowidx.begin() + 1, + stream.value()); + rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); + cub::DeviceSegmentedReduce::Max(d_temp_storage.data(), + temp_storage_bytes, + max_row_offsets_it, + max_children_max_row_offsets.begin(), + num_columns, + rowidx.begin(), + rowidx.begin() + 1, + stream.value()); + cudf::detail::cuda_memcpy_async(max_children_max_row_offsets.data(), + max_row_offsets.data(), + sizeof(row_offset_t), + cudf::detail::host_memory_kind::PAGEABLE, + stream); #ifdef CSR_DEBUG_PRINT - print(max_children_max_row_offsets, "h_max_children_max_row_offsets", stream); + print(max_children_max_row_offsets, "h_max_children_max_row_offsets", stream); #endif - thrust::transform_if( - rmm::exec_policy_nosync(stream), - thrust::make_zip_iterator(thrust::make_counting_iterator(0), - max_children_max_row_offsets.begin()), - thrust::make_zip_iterator(thrust::make_counting_iterator(0) + num_columns, - max_children_max_row_offsets.end()), - max_children_max_row_offsets.begin(), - [max_row_offsets = max_row_offsets.begin()] __device__(auto tup) { - auto n = thrust::get<0>(tup); - return max_row_offsets[n]; - }, - [] __device__(auto tup) { - auto e = thrust::get<1>(tup); - return e == -1; - }); + thrust::transform_if( + rmm::exec_policy_nosync(stream), + thrust::make_zip_iterator(thrust::make_counting_iterator(0), + max_children_max_row_offsets.begin()), + thrust::make_zip_iterator(thrust::make_counting_iterator(0) + num_columns, + max_children_max_row_offsets.end()), + max_children_max_row_offsets.begin(), + [max_row_offsets = max_row_offsets.begin()] __device__(auto tup) { + auto n = thrust::get<0>(tup); + return max_row_offsets[n]; + }, + [] __device__(auto tup) { + auto e = thrust::get<1>(tup); + return e == -1; + }); #ifdef CSR_DEBUG_PRINT - print(max_children_max_row_offsets, "h_max_children_max_row_offsets", stream); + print(max_children_max_row_offsets, "h_max_children_max_row_offsets", stream); #endif - // Skip the parent of root node - thrust::scatter(rmm::exec_policy_nosync(stream), - parent_col_ids.begin() + 1, - parent_col_ids.end(), - rowidx.begin() + 1, - colidx.begin()); + // Skip the parent of root node + thrust::scatter(rmm::exec_policy_nosync(stream), + parent_col_ids.begin() + 1, + parent_col_ids.end(), + rowidx.begin() + 1, + colidx.begin()); #ifdef CSR_DEBUG_PRINT - print(colidx, "h_colidx", stream); + print(colidx, "h_colidx", stream); #endif - return max_children_max_row_offsets; - }; + return max_children_max_row_offsets; + }; row_array_parent_col_id = rev_mapped_col_ids.element(row_array_parent_col_id, stream); auto root_node = (column_categories.element(0, stream) == NC_LIST && !is_array_of_arrays) || @@ -473,58 +478,71 @@ std::tuple reduce_to_column_tree( ? 1 : 0; - auto update_max_row_offsets = [&tree, row_array_parent_col_id, is_array_of_arrays, root_node, &stream](NodeIndexT num_columns, device_span rowidx, device_span colidx, device_span column_categories, device_span max_row_offsets, device_span max_children_max_row_offsets) { - // Vector to store the latest ancestor of LIST type. If no such ancestor is found, - // store the root node of tree. Note that a node cannot be an ancestor of itself - auto list_ancestors = cudf::detail::make_zeroed_device_uvector_async( - static_cast(num_columns), stream, cudf::get_current_device_resource_ref()); - auto* dev_num_levels_ptr = thrust::max_element( - rmm::exec_policy_nosync(stream), tree.node_levels.begin(), tree.node_levels.end()); - if (root_node) list_ancestors.set_element_async(root_node, root_node, stream); - thrust::for_each_n(rmm::exec_policy_nosync(stream), - thrust::make_counting_iterator(root_node + 1), - num_columns - root_node - 1, - [rowidx = rowidx.begin(), - colidx = colidx.begin(), - column_categories = column_categories.begin(), - dev_num_levels_ptr, - is_array_of_arrays, - row_array_parent_col_id, - root_node, - list_ancestors = list_ancestors.begin()] __device__(NodeIndexT node) { - auto num_levels = *dev_num_levels_ptr; - list_ancestors[node] = colidx[rowidx[node]]; - for (int level = 0; - level <= num_levels && list_ancestors[node] != root_node && - column_categories[list_ancestors[node]] != NC_LIST; - level++) { - list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; - } - }); + auto update_max_row_offsets = + [&tree, row_array_parent_col_id, is_array_of_arrays, root_node, &stream]( + NodeIndexT num_columns, + device_span rowidx, + device_span colidx, + device_span column_categories, + device_span max_row_offsets, + device_span max_children_max_row_offsets) { + // Vector to store the latest ancestor of LIST type. If no such ancestor is found, + // store the root node of tree. Note that a node cannot be an ancestor of itself + auto list_ancestors = cudf::detail::make_zeroed_device_uvector_async( + static_cast(num_columns), stream, cudf::get_current_device_resource_ref()); + auto* dev_num_levels_ptr = thrust::max_element( + rmm::exec_policy_nosync(stream), tree.node_levels.begin(), tree.node_levels.end()); + if (root_node) list_ancestors.set_element_async(root_node, root_node, stream); + thrust::for_each_n(rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(root_node + 1), + num_columns - root_node - 1, + [rowidx = rowidx.begin(), + colidx = colidx.begin(), + column_categories = column_categories.begin(), + dev_num_levels_ptr, + is_array_of_arrays, + row_array_parent_col_id, + root_node, + list_ancestors = list_ancestors.begin()] __device__(NodeIndexT node) { + auto num_levels = *dev_num_levels_ptr; + list_ancestors[node] = colidx[rowidx[node]]; + for (int level = 0; + level <= num_levels && list_ancestors[node] != root_node && + column_categories[list_ancestors[node]] != NC_LIST; + level++) { + list_ancestors[node] = colidx[rowidx[list_ancestors[node]]]; + } + }); #ifdef CSR_DEBUG_PRINT - print(list_ancestors, "h_list_ancestors", stream); + print(list_ancestors, "h_list_ancestors", stream); #endif - // exclude root node - thrust::gather(rmm::exec_policy_nosync(stream), - list_ancestors.begin(), - list_ancestors.end(), - max_children_max_row_offsets.begin(), - max_row_offsets.begin()); + // exclude root node + thrust::gather(rmm::exec_policy_nosync(stream), + list_ancestors.begin(), + list_ancestors.end(), + max_children_max_row_offsets.begin(), + max_row_offsets.begin()); #ifdef CSR_DEBUG_PRINT - print(max_row_offsets, "h_max_row_offsets", stream); + print(max_row_offsets, "h_max_row_offsets", stream); #endif - }; + }; // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. // create list's children max_row_offsets array // gather the max_row_offsets from children row offset array. if (num_columns > 1) { - auto max_children_max_row_offsets = max_children_max_row_offsets_colidx_update(num_columns, rowidx, parent_col_ids, max_row_offsets); - update_max_row_offsets(num_columns, rowidx, colidx, column_categories, max_row_offsets, max_children_max_row_offsets); + auto max_children_max_row_offsets = max_children_max_row_offsets_colidx_update( + num_columns, rowidx, parent_col_ids, max_row_offsets); + update_max_row_offsets(num_columns, + rowidx, + colidx, + column_categories, + max_row_offsets, + max_children_max_row_offsets); } return std::tuple{ From 14ba59eeccaa49bcf49e73cbf4bbc0ceb38eb6a2 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Sat, 21 Sep 2024 03:13:33 +0000 Subject: [PATCH 42/46] move back reduce_to_column_tree --- cpp/src/io/json/column_tree_construction.cu | 193 ----------------- cpp/src/io/json/json_column.cu | 217 ++++++++++++++++++++ 2 files changed, 217 insertions(+), 193 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index aa7b6a91ced..9bf300393a8 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -552,197 +552,4 @@ std::tuple reduce_to_column_tree( } } // namespace experimental::detail - -namespace detail { -/** - * @brief Reduces node tree representation to column tree representation. - * - * @param tree Node tree representation of JSON string - * @param original_col_ids Column ids of nodes - * @param sorted_col_ids Sorted column ids of nodes - * @param ordered_node_ids Node ids of nodes sorted by column ids - * @param row_offsets Row offsets of nodes - * @param is_array_of_arrays Whether the tree is an array of arrays - * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true - * @param stream CUDA stream used for device memory operations and kernel launches - * @return A tuple of column tree representation of JSON string, column ids of columns, and - * max row offsets of columns - */ -std::tuple, rmm::device_uvector> -reduce_to_column_tree(tree_meta_t& tree, - device_span original_col_ids, - device_span sorted_col_ids, - device_span ordered_node_ids, - device_span row_offsets, - bool is_array_of_arrays, - NodeIndexT const row_array_parent_col_id, - rmm::cuda_stream_view stream) -{ - CUDF_FUNC_RANGE(); - // 1. column count for allocation - auto const num_columns = thrust::unique_count( - rmm::exec_policy_nosync(stream), sorted_col_ids.begin(), sorted_col_ids.end()); - - // 2. reduce_by_key {col_id}, {row_offset}, max. - rmm::device_uvector unique_col_ids(num_columns, stream); - rmm::device_uvector max_row_offsets(num_columns, stream); - // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) - rmm::device_uvector column_categories(num_columns, stream); - max_row_offsets_col_categories( - sorted_col_ids.begin(), - sorted_col_ids.end(), - thrust::make_zip_iterator( - thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()), - thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin())), - unique_col_ids.begin(), - thrust::make_zip_iterator(max_row_offsets.begin(), column_categories.begin()), - stream); - - // 4. unique_copy parent_node_ids, ranges - rmm::device_uvector column_levels(0, stream); // not required - rmm::device_uvector parent_col_ids(num_columns, stream); - rmm::device_uvector col_range_begin(num_columns, stream); // Field names - rmm::device_uvector col_range_end(num_columns, stream); - rmm::device_uvector unique_node_ids(num_columns, stream); - thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream), - sorted_col_ids.begin(), - sorted_col_ids.end(), - ordered_node_ids.begin(), - thrust::make_discard_iterator(), - unique_node_ids.begin()); - - thrust::copy_n( - rmm::exec_policy_nosync(stream), - thrust::make_zip_iterator( - thrust::make_permutation_iterator(tree.parent_node_ids.begin(), unique_node_ids.begin()), - thrust::make_permutation_iterator(tree.node_range_begin.begin(), unique_node_ids.begin()), - thrust::make_permutation_iterator(tree.node_range_end.begin(), unique_node_ids.begin())), - unique_node_ids.size(), - thrust::make_zip_iterator( - parent_col_ids.begin(), col_range_begin.begin(), col_range_end.begin())); - - // convert parent_node_ids to parent_col_ids - thrust::transform( - rmm::exec_policy_nosync(stream), - parent_col_ids.begin(), - parent_col_ids.end(), - parent_col_ids.begin(), - [col_ids = original_col_ids.begin()] __device__(auto parent_node_id) -> size_type { - return parent_node_id == parent_node_sentinel ? parent_node_sentinel - : col_ids[parent_node_id]; - }); - -#ifdef CSR_DEBUG_PRINT - print(unique_col_ids, "h_unique_col_ids", stream); - print(parent_col_ids, "h_parent_col_ids", stream); -#endif - - // condition is true if parent is not a list, or sentinel/root - // Special case to return true if parent is a list and is_array_of_arrays is true - auto is_non_list_parent = [column_categories = column_categories.begin(), - is_array_of_arrays, - row_array_parent_col_id] __device__(auto parent_col_id) -> bool { - return !(parent_col_id == parent_node_sentinel || - column_categories[parent_col_id] == NC_LIST && - (!is_array_of_arrays || parent_col_id != row_array_parent_col_id)); - }; - -#ifdef CSR_DEBUG_PRINT - print(max_row_offsets, "h_max_row_offsets", stream); -#endif - - // Mixed types in List children go to different columns, - // so all immediate children of list column should have same max_row_offsets. - // create list's children max_row_offsets array. (initialize to zero) - // atomicMax on children max_row_offsets array. - // gather the max_row_offsets from children row offset array. - { - auto list_parents_children_max_row_offsets = - cudf::detail::make_zeroed_device_uvector_async( - static_cast(num_columns), stream, cudf::get_current_device_resource_ref()); - thrust::for_each(rmm::exec_policy_nosync(stream), - unique_col_ids.begin(), - unique_col_ids.end(), - [column_categories = column_categories.begin(), - parent_col_ids = parent_col_ids.begin(), - max_row_offsets = max_row_offsets.begin(), - list_parents_children_max_row_offsets = - list_parents_children_max_row_offsets.begin()] __device__(auto col_id) { - auto parent_col_id = parent_col_ids[col_id]; - if (parent_col_id != parent_node_sentinel and - column_categories[parent_col_id] == node_t::NC_LIST) { - cuda::atomic_ref ref{ - *(list_parents_children_max_row_offsets + parent_col_id)}; - ref.fetch_max(max_row_offsets[col_id], cuda::std::memory_order_relaxed); - } - }); - - thrust::gather_if( - rmm::exec_policy_nosync(stream), - parent_col_ids.begin(), - parent_col_ids.end(), - parent_col_ids.begin(), - list_parents_children_max_row_offsets.begin(), - max_row_offsets.begin(), - [column_categories = column_categories.begin()] __device__(size_type parent_col_id) { - return parent_col_id != parent_node_sentinel and - column_categories[parent_col_id] == node_t::NC_LIST; - }); - } - -#ifdef CSR_DEBUG_PRINT - print(max_row_offsets, "h_max_row_offsets", stream); -#endif - - // copy lists' max_row_offsets to children. - // all structs should have same size. - thrust::transform_if( - rmm::exec_policy_nosync(stream), - unique_col_ids.begin(), - unique_col_ids.end(), - max_row_offsets.begin(), - [column_categories = column_categories.begin(), - is_non_list_parent, - parent_col_ids = parent_col_ids.begin(), - max_row_offsets = max_row_offsets.begin()] __device__(size_type col_id) { - auto parent_col_id = parent_col_ids[col_id]; - // condition is true if parent is not a list, or sentinel/root - while (is_non_list_parent(parent_col_id)) { - col_id = parent_col_id; - parent_col_id = parent_col_ids[parent_col_id]; - } - return max_row_offsets[col_id]; - }, - [column_categories = column_categories.begin(), - is_non_list_parent, - parent_col_ids = parent_col_ids.begin()] __device__(size_type col_id) { - auto parent_col_id = parent_col_ids[col_id]; - // condition is true if parent is not a list, or sentinel/root - return is_non_list_parent(parent_col_id); - }); - -#ifdef CSR_DEBUG_PRINT - print(max_row_offsets, "h_max_row_offsets", stream); -#endif - - // For Struct and List (to avoid copying entire strings when mixed type as string is enabled) - thrust::transform_if( - rmm::exec_policy_nosync(stream), - col_range_begin.begin(), - col_range_begin.end(), - column_categories.begin(), - col_range_end.begin(), - [] __device__(auto i) { return i + 1; }, - [] __device__(NodeT type) { return type == NC_STRUCT || type == NC_LIST; }); - - return std::tuple{tree_meta_t{std::move(column_categories), - std::move(parent_col_ids), - std::move(column_levels), - std::move(col_range_begin), - std::move(col_range_end)}, - std::move(unique_col_ids), - std::move(max_row_offsets)}; -} - -} // namespace detail } // namespace cudf::io::json diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 63d196b19e8..d1921622225 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -102,6 +102,223 @@ void print_tree(host_span input, printf(" (JSON)\n"); } +/** + * @brief Reduces node tree representation to column tree representation. + * + * @param tree Node tree representation of JSON string + * @param original_col_ids Column ids of nodes + * @param sorted_col_ids Sorted column ids of nodes + * @param ordered_node_ids Node ids of nodes sorted by column ids + * @param row_offsets Row offsets of nodes + * @param is_array_of_arrays Whether the tree is an array of arrays + * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true + * @param stream CUDA stream used for device memory operations and kernel launches + * @return A tuple of column tree representation of JSON string, column ids of columns, and + * max row offsets of columns + */ +std::tuple, rmm::device_uvector> +reduce_to_column_tree(tree_meta_t& tree, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, + bool is_array_of_arrays, + NodeIndexT const row_array_parent_col_id, + rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + // 1. column count for allocation + auto const num_columns = thrust::unique_count( + rmm::exec_policy_nosync(stream), sorted_col_ids.begin(), sorted_col_ids.end()); + + // 2. reduce_by_key {col_id}, {row_offset}, max. + rmm::device_uvector unique_col_ids(num_columns, stream); + rmm::device_uvector max_row_offsets(num_columns, stream); + // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) + rmm::device_uvector column_categories(num_columns, stream); + thrust::reduce_by_key(rmm::exec_policy_nosync(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + thrust::make_zip_iterator( + thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin())), + unique_col_ids.begin(), + thrust::make_zip_iterator(max_row_offsets.begin(), column_categories.begin()), + thrust::equal_to(), + [] __device__(auto a, auto b) { + auto row_offset_a = thrust::get<0>(a); + auto row_offset_b = thrust::get<0>(b); + auto type_a = thrust::get<1>(a); + auto type_b = thrust::get<1>(b); + + NodeT ctg; + auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); + auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); + // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) + // *+*=*, v+v=v + if (type_a == type_b) { + ctg = type_a; + } else if (is_a_leaf) { + // *+v=*, N+V=N + // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + + // STR = STR + ctg = (type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b)); + } else if (is_b_leaf) { + ctg = (type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a)); + } else { + ctg = NC_ERR; + } + + return thrust::make_pair( + thrust::maximum{}(row_offset_a, row_offset_b), ctg); + }); + + // 4. unique_copy parent_node_ids, ranges + rmm::device_uvector column_levels(0, stream); // not required + rmm::device_uvector parent_col_ids(num_columns, stream); + rmm::device_uvector col_range_begin(num_columns, stream); // Field names + rmm::device_uvector col_range_end(num_columns, stream); + rmm::device_uvector unique_node_ids(num_columns, stream); + thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + ordered_node_ids.begin(), + thrust::make_discard_iterator(), + unique_node_ids.begin()); + + thrust::copy_n( + rmm::exec_policy_nosync(stream), + thrust::make_zip_iterator( + thrust::make_permutation_iterator(tree.parent_node_ids.begin(), unique_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_range_begin.begin(), unique_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_range_end.begin(), unique_node_ids.begin())), + unique_node_ids.size(), + thrust::make_zip_iterator( + parent_col_ids.begin(), col_range_begin.begin(), col_range_end.begin())); + + // convert parent_node_ids to parent_col_ids + thrust::transform( + rmm::exec_policy_nosync(stream), + parent_col_ids.begin(), + parent_col_ids.end(), + parent_col_ids.begin(), + [col_ids = original_col_ids.begin()] __device__(auto parent_node_id) -> size_type { + return parent_node_id == parent_node_sentinel ? parent_node_sentinel + : col_ids[parent_node_id]; + }); + +#ifdef CSR_DEBUG_PRINT + print(unique_col_ids, "h_unique_col_ids", stream); + print(parent_col_ids, "h_parent_col_ids", stream); +#endif + + // condition is true if parent is not a list, or sentinel/root + // Special case to return true if parent is a list and is_array_of_arrays is true + auto is_non_list_parent = [column_categories = column_categories.begin(), + is_array_of_arrays, + row_array_parent_col_id] __device__(auto parent_col_id) -> bool { + return !(parent_col_id == parent_node_sentinel || + column_categories[parent_col_id] == NC_LIST && + (!is_array_of_arrays || parent_col_id != row_array_parent_col_id)); + }; + +#ifdef CSR_DEBUG_PRINT + print(max_row_offsets, "h_max_row_offsets", stream); +#endif + + // Mixed types in List children go to different columns, + // so all immediate children of list column should have same max_row_offsets. + // create list's children max_row_offsets array. (initialize to zero) + // atomicMax on children max_row_offsets array. + // gather the max_row_offsets from children row offset array. + { + auto list_parents_children_max_row_offsets = + cudf::detail::make_zeroed_device_uvector_async( + static_cast(num_columns), stream, cudf::get_current_device_resource_ref()); + thrust::for_each(rmm::exec_policy_nosync(stream), + unique_col_ids.begin(), + unique_col_ids.end(), + [column_categories = column_categories.begin(), + parent_col_ids = parent_col_ids.begin(), + max_row_offsets = max_row_offsets.begin(), + list_parents_children_max_row_offsets = + list_parents_children_max_row_offsets.begin()] __device__(auto col_id) { + auto parent_col_id = parent_col_ids[col_id]; + if (parent_col_id != parent_node_sentinel and + column_categories[parent_col_id] == node_t::NC_LIST) { + cuda::atomic_ref ref{ + *(list_parents_children_max_row_offsets + parent_col_id)}; + ref.fetch_max(max_row_offsets[col_id], cuda::std::memory_order_relaxed); + } + }); + + thrust::gather_if( + rmm::exec_policy_nosync(stream), + parent_col_ids.begin(), + parent_col_ids.end(), + parent_col_ids.begin(), + list_parents_children_max_row_offsets.begin(), + max_row_offsets.begin(), + [column_categories = column_categories.begin()] __device__(size_type parent_col_id) { + return parent_col_id != parent_node_sentinel and + column_categories[parent_col_id] == node_t::NC_LIST; + }); + } + +#ifdef CSR_DEBUG_PRINT + print(max_row_offsets, "h_max_row_offsets", stream); +#endif + + // copy lists' max_row_offsets to children. + // all structs should have same size. + thrust::transform_if( + rmm::exec_policy_nosync(stream), + unique_col_ids.begin(), + unique_col_ids.end(), + max_row_offsets.begin(), + [column_categories = column_categories.begin(), + is_non_list_parent, + parent_col_ids = parent_col_ids.begin(), + max_row_offsets = max_row_offsets.begin()] __device__(size_type col_id) { + auto parent_col_id = parent_col_ids[col_id]; + // condition is true if parent is not a list, or sentinel/root + while (is_non_list_parent(parent_col_id)) { + col_id = parent_col_id; + parent_col_id = parent_col_ids[parent_col_id]; + } + return max_row_offsets[col_id]; + }, + [column_categories = column_categories.begin(), + is_non_list_parent, + parent_col_ids = parent_col_ids.begin()] __device__(size_type col_id) { + auto parent_col_id = parent_col_ids[col_id]; + // condition is true if parent is not a list, or sentinel/root + return is_non_list_parent(parent_col_id); + }); + +#ifdef CSR_DEBUG_PRINT + print(max_row_offsets, "h_max_row_offsets", stream); +#endif + + // For Struct and List (to avoid copying entire strings when mixed type as string is enabled) + thrust::transform_if( + rmm::exec_policy_nosync(stream), + col_range_begin.begin(), + col_range_begin.end(), + column_categories.begin(), + col_range_end.begin(), + [] __device__(auto i) { return i + 1; }, + [] __device__(NodeT type) { return type == NC_STRUCT || type == NC_LIST; }); + + return std::tuple{tree_meta_t{std::move(column_categories), + std::move(parent_col_ids), + std::move(column_levels), + std::move(col_range_begin), + std::move(col_range_end)}, + std::move(unique_col_ids), + std::move(max_row_offsets)}; +} + /** * @brief Get the column indices for the values column for array of arrays rows * From 9e582d5d8038f2f1851edf35199754af375e8d2d Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Sat, 21 Sep 2024 03:24:40 +0000 Subject: [PATCH 43/46] cleanup: remove unused header --- cpp/src/io/json/json_tree.cu | 51 ++++++++++++++++++++- cpp/src/io/json/json_utils.hpp | 82 ---------------------------------- 2 files changed, 50 insertions(+), 83 deletions(-) delete mode 100644 cpp/src/io/json/json_utils.hpp diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu index 70e69e1aa91..3bfa3c42243 100644 --- a/cpp/src/io/json/json_tree.cu +++ b/cpp/src/io/json/json_tree.cu @@ -15,7 +15,6 @@ */ #include "io/utilities/hostdevice_vector.hpp" -#include "json_utils.hpp" #include "nested_json.hpp" #include @@ -34,6 +33,7 @@ #include #include +#include #include #include #include @@ -139,6 +139,55 @@ struct is_nested_end { } }; +/** + * @brief Returns stable sorted keys and its sorted order + * + * Uses cub stable radix sort. The order is internally generated, hence it saves a copy and memory. + * Since the key and order is returned, using double buffer helps to avoid extra copy to user + * provided output iterator. + * + * @tparam IndexType sorted order type + * @tparam KeyType key type + * @param keys keys to sort + * @param stream CUDA stream used for device memory operations and kernel launches. + * @return Sorted keys and indices producing that sorted order + */ +template +std::pair, rmm::device_uvector> stable_sorted_key_order( + cudf::device_span keys, rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + + // Determine temporary device storage requirements + rmm::device_uvector keys_buffer1(keys.size(), stream); + rmm::device_uvector keys_buffer2(keys.size(), stream); + rmm::device_uvector order_buffer1(keys.size(), stream); + rmm::device_uvector order_buffer2(keys.size(), stream); + cub::DoubleBuffer order_buffer(order_buffer1.data(), order_buffer2.data()); + cub::DoubleBuffer keys_buffer(keys_buffer1.data(), keys_buffer2.data()); + size_t temp_storage_bytes = 0; + cub::DeviceRadixSort::SortPairs( + nullptr, temp_storage_bytes, keys_buffer, order_buffer, keys.size()); + rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); + + thrust::copy(rmm::exec_policy_nosync(stream), keys.begin(), keys.end(), keys_buffer1.begin()); + thrust::sequence(rmm::exec_policy_nosync(stream), order_buffer1.begin(), order_buffer1.end()); + + cub::DeviceRadixSort::SortPairs(d_temp_storage.data(), + temp_storage_bytes, + keys_buffer, + order_buffer, + keys.size(), + 0, + sizeof(KeyType) * 8, + stream.value()); + + return std::pair{keys_buffer.Current() == keys_buffer1.data() ? std::move(keys_buffer1) + : std::move(keys_buffer2), + order_buffer.Current() == order_buffer1.data() ? std::move(order_buffer1) + : std::move(order_buffer2)}; +} + /** * @brief Propagate parent node from first sibling to other siblings. * diff --git a/cpp/src/io/json/json_utils.hpp b/cpp/src/io/json/json_utils.hpp deleted file mode 100644 index 995f5d0405f..00000000000 --- a/cpp/src/io/json/json_utils.hpp +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -namespace cudf::io::json::detail { -/** - * @brief Returns stable sorted keys and its sorted order - * - * Uses cub stable radix sort. The order is internally generated, hence it saves a copy and memory. - * Since the key and order is returned, using double buffer helps to avoid extra copy to user - * provided output iterator. - * - * @tparam IndexType sorted order type - * @tparam KeyType key type - * @param keys keys to sort - * @param stream CUDA stream used for device memory operations and kernel launches. - * @return Sorted keys and indices producing that sorted order - */ -template -std::pair, rmm::device_uvector> stable_sorted_key_order( - cudf::device_span keys, rmm::cuda_stream_view stream) -{ - CUDF_FUNC_RANGE(); - - // Determine temporary device storage requirements - rmm::device_uvector keys_buffer1(keys.size(), stream); - rmm::device_uvector keys_buffer2(keys.size(), stream); - rmm::device_uvector order_buffer1(keys.size(), stream); - rmm::device_uvector order_buffer2(keys.size(), stream); - cub::DoubleBuffer order_buffer(order_buffer1.data(), order_buffer2.data()); - cub::DoubleBuffer keys_buffer(keys_buffer1.data(), keys_buffer2.data()); - size_t temp_storage_bytes = 0; - cub::DeviceRadixSort::SortPairs( - nullptr, temp_storage_bytes, keys_buffer, order_buffer, keys.size()); - rmm::device_buffer d_temp_storage(temp_storage_bytes, stream); - - thrust::copy(rmm::exec_policy_nosync(stream), keys.begin(), keys.end(), keys_buffer1.begin()); - thrust::sequence(rmm::exec_policy_nosync(stream), order_buffer1.begin(), order_buffer1.end()); - - cub::DeviceRadixSort::SortPairs(d_temp_storage.data(), - temp_storage_bytes, - keys_buffer, - order_buffer, - keys.size(), - 0, - sizeof(KeyType) * 8, - stream.value()); - - return std::pair{keys_buffer.Current() == keys_buffer1.data() ? std::move(keys_buffer1) - : std::move(keys_buffer2), - order_buffer.Current() == order_buffer1.data() ? std::move(order_buffer1) - : std::move(order_buffer2)}; -} - -} // namespace cudf::io::json::detail From cd69fd159b24417ee50f478df526b6d60f3b2edb Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Sat, 21 Sep 2024 03:36:22 +0000 Subject: [PATCH 44/46] undo merging of reduce_by_key --- cpp/src/io/json/column_tree_construction.cu | 1 - cpp/src/io/json/json_column.cu | 68 ++++++++++----------- 2 files changed, 33 insertions(+), 36 deletions(-) diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu index 9bf300393a8..63a1ca3f98e 100644 --- a/cpp/src/io/json/column_tree_construction.cu +++ b/cpp/src/io/json/column_tree_construction.cu @@ -14,7 +14,6 @@ * limitations under the License. */ -#include "json_utils.hpp" #include "nested_json.hpp" #include diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index d1921622225..7191ed3f2f0 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -16,7 +16,6 @@ #include "io/utilities/parsing_utils.cuh" #include "io/utilities/string_parsing.hpp" -#include "json_utils.hpp" #include "nested_json.hpp" #include @@ -134,45 +133,44 @@ reduce_to_column_tree(tree_meta_t& tree, // 2. reduce_by_key {col_id}, {row_offset}, max. rmm::device_uvector unique_col_ids(num_columns, stream); rmm::device_uvector max_row_offsets(num_columns, stream); + auto ordered_row_offsets = + thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()); + thrust::reduce_by_key(rmm::exec_policy_nosync(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + ordered_row_offsets, + unique_col_ids.begin(), + max_row_offsets.begin(), + thrust::equal_to(), + thrust::maximum()); + // 3. reduce_by_key {col_id}, {node_categories} - custom opp (*+v=*, v+v=v, *+#=E) rmm::device_uvector column_categories(num_columns, stream); - thrust::reduce_by_key(rmm::exec_policy_nosync(stream), + thrust::reduce_by_key( + rmm::exec_policy_nosync(stream), sorted_col_ids.begin(), sorted_col_ids.end(), - thrust::make_zip_iterator( - thrust::make_permutation_iterator(row_offsets.begin(), ordered_node_ids.begin()), - thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin())), + thrust::make_permutation_iterator(tree.node_categories.begin(), ordered_node_ids.begin()), unique_col_ids.begin(), - thrust::make_zip_iterator(max_row_offsets.begin(), column_categories.begin()), - thrust::equal_to(), - [] __device__(auto a, auto b) { - auto row_offset_a = thrust::get<0>(a); - auto row_offset_b = thrust::get<0>(b); - auto type_a = thrust::get<1>(a); - auto type_b = thrust::get<1>(b); - - NodeT ctg; - auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); - auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); - // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) - // *+*=*, v+v=v - if (type_a == type_b) { - ctg = type_a; - } else if (is_a_leaf) { - // *+v=*, N+V=N - // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + - // STR = STR - ctg = (type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b)); - } else if (is_b_leaf) { - ctg = (type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a)); - } else { - ctg = NC_ERR; - } - - return thrust::make_pair( - thrust::maximum{}(row_offset_a, row_offset_b), ctg); - }); - + column_categories.begin(), + thrust::equal_to(), + [] __device__(NodeT type_a, NodeT type_b) -> NodeT { + auto is_a_leaf = (type_a == NC_VAL || type_a == NC_STR); + auto is_b_leaf = (type_b == NC_VAL || type_b == NC_STR); + // (v+v=v, *+*=*, *+v=*, *+#=E, NESTED+VAL=NESTED) + // *+*=*, v+v=v + if (type_a == type_b) { + return type_a; + } else if (is_a_leaf) { + // *+v=*, N+V=N + // STRUCT/LIST + STR/VAL = STRUCT/LIST, STR/VAL + FN = ERR, STR/VAL + STR = STR + return type_b == NC_FN ? NC_ERR : (is_b_leaf ? NC_STR : type_b); + } else if (is_b_leaf) { + return type_a == NC_FN ? NC_ERR : (is_a_leaf ? NC_STR : type_a); + } + // *+#=E + return NC_ERR; + }); // 4. unique_copy parent_node_ids, ranges rmm::device_uvector column_levels(0, stream); // not required rmm::device_uvector parent_col_ids(num_columns, stream); From 4d4ce13c46fbc902ef7b496100793e653f10ac6c Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Sat, 21 Sep 2024 04:04:22 +0000 Subject: [PATCH 45/46] move debug flags --- cpp/src/io/json/host_tree_algorithms.cu | 3 +++ cpp/src/io/json/json_column.cu | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/json/host_tree_algorithms.cu b/cpp/src/io/json/host_tree_algorithms.cu index a0415b24fc3..547d3a74a58 100644 --- a/cpp/src/io/json/host_tree_algorithms.cu +++ b/cpp/src/io/json/host_tree_algorithms.cu @@ -44,6 +44,9 @@ #include +#ifndef CSR_DEBUG_EQ +#define CSR_DEBUG_EQ +#endif namespace cudf::io::json::detail { /** diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index b3837cdb7b3..acc4f731186 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -48,9 +48,6 @@ namespace cudf::io::json::detail { // DEBUG prints -#ifndef CSR_DEBUG_EQ -#define CSR_DEBUG_EQ -#endif auto to_cat = [](auto v) -> std::string { switch (v) { From 7de2ce31bc588609cfdc970982bb89b0392b4310 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Sat, 21 Sep 2024 04:27:43 +0000 Subject: [PATCH 46/46] use result from reduce_to_column_tree for csr --- cpp/src/io/json/host_tree_algorithms.cu | 2 +- cpp/src/io/json/json_column.cu | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/src/io/json/host_tree_algorithms.cu b/cpp/src/io/json/host_tree_algorithms.cu index 547d3a74a58..a0e2399702b 100644 --- a/cpp/src/io/json/host_tree_algorithms.cu +++ b/cpp/src/io/json/host_tree_algorithms.cu @@ -390,7 +390,7 @@ void make_device_json_column(device_span input, #ifdef CSR_DEBUG_EQ auto [d_column_tree_csr, d_column_tree_properties] = cudf::io::json::experimental::detail::reduce_to_column_tree( - tree, col_ids, row_offsets, is_array_of_arrays, row_array_parent_col_id, stream); + d_column_tree, d_unique_col_ids, d_max_row_offsets, is_array_of_arrays, row_array_parent_col_id, stream); auto iseq = check_equality( d_column_tree, d_max_row_offsets, d_column_tree_csr, d_column_tree_properties, stream); diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index acc4f731186..04794f21748 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -162,7 +162,7 @@ reduce_to_column_tree(tree_meta_t& tree, return NC_ERR; }); // 4. unique_copy parent_node_ids, ranges - rmm::device_uvector column_levels(0, stream); // not required + rmm::device_uvector column_levels(num_columns, stream); // required for CSR rmm::device_uvector parent_col_ids(num_columns, stream); rmm::device_uvector col_range_begin(num_columns, stream); // Field names rmm::device_uvector col_range_end(num_columns, stream); @@ -179,10 +179,11 @@ reduce_to_column_tree(tree_meta_t& tree, thrust::make_zip_iterator( thrust::make_permutation_iterator(tree.parent_node_ids.begin(), unique_node_ids.begin()), thrust::make_permutation_iterator(tree.node_range_begin.begin(), unique_node_ids.begin()), - thrust::make_permutation_iterator(tree.node_range_end.begin(), unique_node_ids.begin())), + thrust::make_permutation_iterator(tree.node_range_end.begin(), unique_node_ids.begin()), + thrust::make_permutation_iterator(tree.node_levels.begin(), unique_node_ids.begin())), unique_node_ids.size(), thrust::make_zip_iterator( - parent_col_ids.begin(), col_range_begin.begin(), col_range_end.begin())); + parent_col_ids.begin(), col_range_begin.begin(), col_range_end.begin(), column_levels.begin())); // convert parent_node_ids to parent_col_ids thrust::transform(