diff --git a/cpp/include/cudf_test/print_utilities.cuh b/cpp/include/cudf_test/print_utilities.cuh new file mode 100644 index 00000000000..37ffcd401fc --- /dev/null +++ b/cpp/include/cudf_test/print_utilities.cuh @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +#include + +#include + +namespace cudf::test::print { + +constexpr int32_t hex_tag = 0; + +template +struct TaggedType { + T v; +}; + +template +using hex_t = TaggedType; + +/** + * @brief Function object to transform a built-in type to a tagged type (e.g., in order to print + * values from an iterator returning uint32_t as hex values) + * + * @tparam TaggedTypeT A TaggedType template specialisation + */ +template +struct ToTaggedType { + template + CUDF_HOST_DEVICE TaggedTypeT operator()(T const& v) const + { + return TaggedTypeT{v}; + } +}; + +/** + * @brief Returns an iterator that causes the values from \p it to be printed as hex values. + * + * @tparam InItT A random-access input iterator type + * @param it A random-access input iterator t + * @return + */ +template +auto hex(InItT it) +{ + using value_t = typename std::iterator_traits::value_type; + using tagged_t = hex_t; + return thrust::make_transform_iterator(it, ToTaggedType{}); +} + +template && std::is_signed_v)> +CUDF_HOST_DEVICE void print_value(int32_t width, T arg) +{ + printf("%*d", width, arg); +} + +template && std::is_unsigned_v)> +CUDF_HOST_DEVICE void print_value(int32_t width, T arg) +{ + printf("%*d", width, arg); +} + +CUDF_HOST_DEVICE void print_value(int32_t width, char arg) { printf("%*c", width, arg); } + +template +CUDF_HOST_DEVICE void print_value(int32_t width, hex_t arg) +{ + printf("%*X", width, arg.v); +} + +namespace detail { +template +CUDF_HOST_DEVICE void print_values(int32_t width, char delimiter, T arg) +{ + print_value(width, arg); +} + +template +CUDF_HOST_DEVICE void print_values(int32_t width, char delimiter, T arg, Ts... args) +{ + print_value(width, arg); + if (delimiter) printf("%c", delimiter); + print_values(width, delimiter, args...); +} + +template +__global__ void print_array_kernel(std::size_t count, int32_t width, char delimiter, Ts... args) +{ + if (threadIdx.x == 0 && blockIdx.x == 0) { + for (std::size_t i = 0; i < count; i++) { + printf("%6lu: ", i); + print_values(width, delimiter, args[i]...); + printf("\n"); + } + } +} +} // namespace detail + +/** + * @brief Prints \p count elements from each of the given device-accessible iterators. + * + * @param count The number of items to print from each device-accessible iterator + * @param stream The cuda stream to which the printing kernel shall be dispatched + * @param args List of iterators to be printed + */ +template +void print_array(std::size_t count, rmm::cuda_stream_view stream, Ts... args) +{ + // The width to pad printed numbers to + constexpr int32_t width = 6; + + // Delimiter used for separating values from subsequent iterators + constexpr char delimiter = ','; + + // TODO we want this to compile to nothing dependnig on compiler flag, rather than runtime + if (std::getenv("CUDA_DBG_DUMP") != nullptr) { + detail::print_array_kernel<<<1, 1, 0, stream.value()>>>(count, width, delimiter, args...); + } +} + +} // namespace cudf::test::print diff --git a/cpp/src/io/fst/logical_stack.cuh b/cpp/src/io/fst/logical_stack.cuh new file mode 100644 index 00000000000..9502922a379 --- /dev/null +++ b/cpp/src/io/fst/logical_stack.cuh @@ -0,0 +1,464 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +namespace cudf::io::fst { + +/** + * @brief Describes the kind of stack operation. + */ +enum class stack_op_type : int8_t { + READ = 0, ///< Operation reading what is currently on top of the stack + PUSH = 1, ///< Operation pushing a new item on top of the stack + POP = 2 ///< Operation popping the item currently on top of the stack +}; + +namespace detail { + +/** + * @brief A convenience struct that represents a stack operation as a pair, where the stack_level + * represents the stack's level and the value represents the stack symbol. + * + * @tparam StackLevelT The stack level type sufficient to cover all stack levels. Must be signed + * type as any subsequence of stack operations must be able to be covered. E.g., consider the first + * 10 operations are all push and the last 10 operations are all pop operations, we need to be able + * to represent a partial aggregate of the first ten items, which is '+10', just as well as a + * partial aggregate of the last ten items, which is '-10'. + * @tparam ValueT The value type that corresponds to the stack symbols (i.e., covers the stack + * alphabet). + */ +template +struct StackOp { + // Must be signed type as any subsequence of stack operations must be able to be covered. + static_assert(std::is_signed_v, "StackLevelT has to be a signed type"); + + StackLevelT stack_level; + ValueT value; +}; + +/** + * @brief Helper class to assist with radix sorting StackOp instances by stack level. + * + * @tparam BYTE_SIZE The size of the StackOp. + */ +template +struct StackOpToUnsigned { + using UnsignedT = void; +}; + +template <> +struct StackOpToUnsigned<2U> { + using UnsignedT = uint16_t; +}; + +template <> +struct StackOpToUnsigned<4U> { + using UnsignedT = uint32_t; +}; + +template <> +struct StackOpToUnsigned<8U> { + using UnsignedT = uint64_t; +}; + +/** + * @brief Alias template to retrieve an unsigned bit-representation that can be used for radix + * sorting the stack level of a StackOp. + * + * @tparam StackOpT The StackOp class template instance for which to get an unsigned + * bit-representation + */ +template +using UnsignedStackOpType = typename StackOpToUnsigned::UnsignedT; + +/** + * @brief Function object class template used for converting a stack symbol to a stack + * operation that has a stack level to which an operation applies. + * + * @tparam StackOpT + * @tparam StackSymbolToStackOpTypeT + */ +template +struct StackSymbolToStackOp { + template + constexpr CUDF_HOST_DEVICE StackOpT operator()(StackSymbolT const& stack_symbol) const + { + stack_op_type stack_op = symbol_to_stack_op_type(stack_symbol); + // PUSH => +1, POP => -1, READ => 0 + int32_t level_delta = stack_op == stack_op_type::PUSH ? 1 + : stack_op == stack_op_type::POP ? -1 + : 0; + return StackOpT{static_cast(level_delta), stack_symbol}; + } + + /// Function object returning a stack operation type for a given stack symbol + StackSymbolToStackOpTypeT symbol_to_stack_op_type; +}; + +/** + * @brief Binary reduction operator to compute the absolute stack level from relative stack levels + * (i.e., +1 for a PUSH, -1 for a POP operation). + */ +struct AddStackLevelFromStackOp { + template + constexpr CUDF_HOST_DEVICE StackOp operator()( + StackOp const& lhs, StackOp const& rhs) const + { + StackLevelT new_level = lhs.stack_level + rhs.stack_level; + return StackOp{new_level, rhs.value}; + } +}; + +/** + * @brief Binary reduction operator that propagates a write operation for a specific stack level to + * all reads of that same stack level. That is, if the stack level of LHS compares equal to the + * stack level of the RHS and if the RHS is a read and the LHS is a write operation type, then we + * return LHS, otherwise we return the RHS. + */ +template +struct PopulatePopWithPush { + template + constexpr CUDF_HOST_DEVICE StackOp operator()( + StackOp const& lhs, StackOp const& rhs) const + { + // If RHS is a read, then we need to figure out whether we can propagate the value from the LHS + bool is_rhs_read = symbol_to_stack_op_type(rhs.value) != stack_op_type::PUSH; + + // Whether LHS is a matching write (i.e., the push operation that is on top of the stack for the + // RHS's read) + bool is_lhs_matching_write = (lhs.stack_level == rhs.stack_level) && + symbol_to_stack_op_type(lhs.value) == stack_op_type::PUSH; + + return (is_rhs_read && is_lhs_matching_write) ? lhs : rhs; + } + + /// Function object returning a stack operation type for a given stack symbol + StackSymbolToStackOpTypeT symbol_to_stack_op_type; +}; + +/** + * @brief Binary reduction operator that is used to replace each read_symbol occurrence with the + * last non-read_symbol that precedes such read_symbol. + */ +template +struct PropagateLastWrite { + constexpr CUDF_HOST_DEVICE StackSymbolT operator()(StackSymbolT const& lhs, + StackSymbolT const& rhs) const + { + // If RHS is a yet-to-be-propagated, then we need to check whether we can use the LHS to fill + bool is_rhs_read = (rhs == read_symbol); + + // We propagate the write from the LHS if it's a write + bool is_lhs_write = (lhs != read_symbol); + + return (is_rhs_read && is_lhs_write) ? lhs : rhs; + } + + /// The read_symbol that is supposed to be replaced + StackSymbolT read_symbol; +}; + +/** + * @brief Helper function object class to convert a StackOp to the stack symbol of that + * StackOp. + */ +struct StackOpToStackSymbol { + template + constexpr CUDF_HOST_DEVICE ValueT operator()(StackOp const& kv_op) const + { + return kv_op.value; + } +}; + +/** + * @brief Replaces all operations that apply to stack level '0' with the empty stack symbol + */ +template +struct RemapEmptyStack { + constexpr CUDF_HOST_DEVICE StackOpT operator()(StackOpT const& kv_op) const + { + return kv_op.stack_level == 0 ? empty_stack_symbol : kv_op; + } + StackOpT empty_stack_symbol; +}; + +} // namespace detail + +/** + * @brief Takes a sparse representation of a sequence of stack operations that either push something + * onto the stack or pop something from the stack and resolves the symbol that is on top of the + * stack. + * + * @tparam StackLevelT Signed integer type that must be sufficient to cover [-max_stack_level, + * max_stack_level] for the given sequence of stack operations. Must be signed as it needs to cover + * the stack level of any arbitrary subsequence of stack operations. + * @tparam StackSymbolItT An input iterator type that provides the sequence of symbols that + * represent stack operations + * @tparam SymbolPositionT The index that this stack operation is supposed to apply to + * @tparam StackSymbolToStackOpTypeT Function object class to transform items from StackSymbolItT to + * stack_op_type + * @tparam TopOfStackOutItT Output iterator type to which StackSymbolT are being assigned + * @tparam StackSymbolT The internal type being used (usually corresponding to StackSymbolItT's + * value_type) + * @tparam OffsetT Signed or unsigned integer type large enough to index into both the sparse input + * sequence and the top-of-stack output sequence + * + * @param[in] d_symbols Sequence of symbols that represent stack operations. Memory may alias with + * \p d_top_of_stack + * @param[in,out] d_symbol_positions Sequence of symbol positions (for a sparse representation), + * sequence must be ordered in ascending order. Note, the memory of this array is repurposed for + * double-buffering. + * @param[in] symbol_to_stack_op Function object that returns a stack operation type (push, pop, or + * read) for a given symbol from \p d_symbols + * @param[out] d_top_of_stack A random access output iterator that will be populated with + * what-is-on-top-of-the-stack for the given sequence of stack operations \p d_symbols + * @param[in] empty_stack_symbol The symbol that will be written to top_of_stack whenever the stack + * was empty + * @param[in] read_symbol A symbol that may not be confused for a symbol that would push to the + * stack + * @param[in] num_symbols_out The number of symbols that are supposed to be filled with + * what-is-on-top-of-the-stack + * @param[in] stream The cuda stream to which to dispatch the work + */ +template +void sparse_stack_op_to_top_of_stack(StackSymbolItT d_symbols, + device_span d_symbol_positions, + StackSymbolToStackOpTypeT symbol_to_stack_op, + TopOfStackOutItT d_top_of_stack, + StackSymbolT const empty_stack_symbol, + StackSymbolT const read_symbol, + std::size_t const num_symbols_out, + rmm::cuda_stream_view stream = cudf::default_stream_value) +{ + rmm::device_buffer temp_storage{}; + + // Type used to hold pairs of (stack_level, value) pairs + using StackOpT = detail::StackOp; + + // The unsigned integer type that we use for radix sorting items of type StackOpT + using StackOpUnsignedT = detail::UnsignedStackOpType; + static_assert(!std::is_void(), "unsupported StackOpT size"); + + // Transforming sequence of stack symbols to stack operations + using StackSymbolToStackOpT = detail::StackSymbolToStackOp; + + // TransformInputIterator converting stack symbols to stack operations + using TransformInputItT = + cub::TransformInputIterator; + + auto const num_symbols_in = d_symbol_positions.size(); + + // Converting a stack symbol that may either push or pop to a stack operation: + // stack_symbol -> ([+1,0,-1], stack_symbol) + StackSymbolToStackOpT stack_sym_to_kv_op{symbol_to_stack_op}; + TransformInputItT stack_symbols_in(d_symbols, stack_sym_to_kv_op); + + // Double-buffer for sorting along the given sequence of symbol positions (the sparse + // representation) + cub::DoubleBuffer d_symbol_positions_db{nullptr, nullptr}; + + // Double-buffer for sorting the stack operations by the stack level to which such operation + // applies + cub::DoubleBuffer d_kv_operations{nullptr, nullptr}; + + // A double-buffer that aliases memory from d_kv_operations with unsigned types in order to + // be able to perform a radix sort + cub::DoubleBuffer d_kv_operations_unsigned{nullptr, nullptr}; + + constexpr std::size_t bits_per_byte = 8; + constexpr std::size_t begin_bit = offsetof(StackOpT, stack_level) * bits_per_byte; + constexpr std::size_t end_bit = begin_bit + (sizeof(StackOpT::stack_level) * bits_per_byte); + + // The stack operation that makes sure that reads for stack level '0' will be populated + // with the empty_stack_symbol + StackOpT const empty_stack{0, empty_stack_symbol}; + + cub::TransformInputIterator, StackOpT*> + kv_ops_scan_in(nullptr, detail::RemapEmptyStack{empty_stack}); + StackOpT* kv_ops_scan_out = nullptr; + + std::size_t stack_level_scan_bytes = 0; + std::size_t stack_level_sort_bytes = 0; + std::size_t match_level_scan_bytes = 0; + std::size_t propagate_writes_scan_bytes = 0; + + // Getting temporary storage requirements for the prefix sum of the stack level after each + // operation + CUDF_CUDA_TRY(cub::DeviceScan::InclusiveScan(nullptr, + stack_level_scan_bytes, + stack_symbols_in, + d_kv_operations.Current(), + detail::AddStackLevelFromStackOp{}, + num_symbols_in, + stream)); + + // Getting temporary storage requirements for the stable radix sort (sorting by stack level of the + // operations) + CUDF_CUDA_TRY(cub::DeviceRadixSort::SortPairs(nullptr, + stack_level_sort_bytes, + d_kv_operations_unsigned, + d_symbol_positions_db, + num_symbols_in, + begin_bit, + end_bit, + stream)); + + // Getting temporary storage requirements for the scan to match pop operations with the latest + // push of the same level + CUDF_CUDA_TRY(cub::DeviceScan::InclusiveScan( + nullptr, + match_level_scan_bytes, + kv_ops_scan_in, + kv_ops_scan_out, + detail::PopulatePopWithPush{symbol_to_stack_op}, + num_symbols_in, + stream)); + + // Getting temporary storage requirements for the scan to propagate top-of-stack for spots that + // didn't push or pop + CUDF_CUDA_TRY( + cub::DeviceScan::ExclusiveScan(nullptr, + propagate_writes_scan_bytes, + d_top_of_stack, + d_top_of_stack, + detail::PropagateLastWrite{read_symbol}, + empty_stack_symbol, + num_symbols_out, + stream)); + + // Scratch memory required by the algorithms + auto total_temp_storage_bytes = std::max({stack_level_scan_bytes, + stack_level_sort_bytes, + match_level_scan_bytes, + propagate_writes_scan_bytes}); + + if (temp_storage.size() < total_temp_storage_bytes) { + temp_storage.resize(total_temp_storage_bytes, stream); + } + // Actual device buffer size, as we need to pass in an lvalue-ref to cub algorithms as + // temp_storage_bytes + total_temp_storage_bytes = temp_storage.size(); + + rmm::device_uvector d_symbol_position_alt{num_symbols_in, stream}; + rmm::device_uvector d_kv_ops_current{num_symbols_in, stream}; + rmm::device_uvector d_kv_ops_alt{num_symbols_in, stream}; + + //------------------------------------------------------------------------------ + // ALGORITHM + //------------------------------------------------------------------------------ + // Initialize double-buffer for sorting the indexes of the sequence of sparse stack operations + d_symbol_positions_db = + cub::DoubleBuffer{d_symbol_positions.data(), d_symbol_position_alt.data()}; + + // Initialize double-buffer for sorting the indexes of the sequence of sparse stack operations + d_kv_operations = cub::DoubleBuffer{d_kv_ops_current.data(), d_kv_ops_alt.data()}; + + // Compute prefix sum of the stack level after each operation + CUDF_CUDA_TRY(cub::DeviceScan::InclusiveScan(temp_storage.data(), + total_temp_storage_bytes, + stack_symbols_in, + d_kv_operations.Current(), + detail::AddStackLevelFromStackOp{}, + num_symbols_in, + stream)); + + // Stable radix sort, sorting by stack level of the operations + d_kv_operations_unsigned = cub::DoubleBuffer{ + reinterpret_cast(d_kv_operations.Current()), + reinterpret_cast(d_kv_operations.Alternate())}; + CUDF_CUDA_TRY(cub::DeviceRadixSort::SortPairs(temp_storage.data(), + total_temp_storage_bytes, + d_kv_operations_unsigned, + d_symbol_positions_db, + num_symbols_in, + begin_bit, + end_bit, + stream)); + + // TransformInputIterator that remaps all operations on stack level 0 to the empty stack symbol + kv_ops_scan_in = {reinterpret_cast(d_kv_operations_unsigned.Current()), + detail::RemapEmptyStack{empty_stack}}; + kv_ops_scan_out = reinterpret_cast(d_kv_operations_unsigned.Alternate()); + + // Inclusive scan to match pop operations with the latest push operation of that level + CUDF_CUDA_TRY(cub::DeviceScan::InclusiveScan( + temp_storage.data(), + total_temp_storage_bytes, + kv_ops_scan_in, + kv_ops_scan_out, + detail::PopulatePopWithPush{symbol_to_stack_op}, + num_symbols_in, + stream)); + + // Fill the output tape with read-symbol + thrust::fill(rmm::exec_policy(stream), + thrust::device_ptr{d_top_of_stack}, + thrust::device_ptr{d_top_of_stack + num_symbols_out}, + read_symbol); + + // Transform the stack operations to the stack symbol they represent + cub::TransformInputIterator + kv_op_to_stack_sym_it(kv_ops_scan_out, detail::StackOpToStackSymbol{}); + + // Scatter the stack symbols to the output tape (spots that are not scattered to have been + // pre-filled with the read-symbol) + thrust::scatter(rmm::exec_policy(stream), + kv_op_to_stack_sym_it, + kv_op_to_stack_sym_it + num_symbols_in, + d_symbol_positions_db.Current(), + d_top_of_stack); + + // We perform an exclusive scan in order to fill the items at the very left that may + // be reading the empty stack before there's the first push occurrence in the sequence. + // Also, we're interested in the top-of-the-stack symbol before the operation was applied. + CUDF_CUDA_TRY( + cub::DeviceScan::ExclusiveScan(temp_storage.data(), + total_temp_storage_bytes, + d_top_of_stack, + d_top_of_stack, + detail::PropagateLastWrite{read_symbol}, + empty_stack_symbol, + num_symbols_out, + stream)); +} + +} // namespace cudf::io::fst diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 8d8fc3210bb..e1970af6212 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -224,6 +224,7 @@ ConfigureTest(PARQUET_TEST io/parquet_test.cpp) ConfigureTest(JSON_TEST io/json_test.cpp) ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp) ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp) +ConfigureTest(LOGICAL_STACK_TEST io/fst/logical_stack_test.cu) if(CUDF_ENABLE_ARROW_S3) target_compile_definitions(ARROW_IO_SOURCE_TEST PRIVATE "S3_ENABLED") endif() diff --git a/cpp/tests/io/fst/logical_stack_test.cu b/cpp/tests/io/fst/logical_stack_test.cu new file mode 100644 index 00000000000..3c2cdd7fb5c --- /dev/null +++ b/cpp/tests/io/fst/logical_stack_test.cu @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace { +namespace fst = cudf::io::fst; + +/** + * @brief Generates the sparse representation of stack operations to feed into the logical + * stack + * + * @param begin Forward input iterator to the first item of symbols that are checked for whether + * they push or pop + * @param end Forward input iterator to one one past the last item of symbols that are checked for + * whether they push or pop + * @param to_stack_op A function object that takes an instance of InputItT's value type and + * returns the kind of stack operation such item represents (i.e., of type stack_op_type) + * @param stack_symbol_out Forward output iterator to which symbols that either push or pop are + * assigned + * @param stack_op_index_out Forward output iterator to which the indexes of symbols that either + * push or pop are assigned + * @return Pair of iterators to one past the last item of the items written to \p stack_symbol_out + * and \p stack_op_index_out, respectively + */ +template +std::pair to_sparse_stack_symbols( + InputItT begin, + InputItT end, + ToStackOpTypeT to_stack_op, + StackSymbolOutItT stack_symbol_out, + StackOpIndexOutItT stack_op_index_out) +{ + std::size_t index = 0; + for (auto it = begin; it < end; it++) { + fst::stack_op_type op_type = to_stack_op(*it); + if (op_type == fst::stack_op_type::PUSH || op_type == fst::stack_op_type::POP) { + *stack_symbol_out = *it; + *stack_op_index_out = index; + stack_symbol_out++; + stack_op_index_out++; + } + index++; + } + return std::make_pair(stack_symbol_out, stack_op_index_out); +} + +/** + * @brief Reads in a sequence of items that represent stack operations, applies these operations to + * a stack, and, for every operation being read in, outputs what was the symbol on top of the stack + * before the operations was applied. In case the stack is empty before any operation, + * \p empty_stack will be output instead. + * + * @tparam InputItT Forward input iterator type to items representing stack operations + * @tparam ToStackOpTypeT A transform function object class that maps an item representing a stack + * operation to the stack_op_type of such item + * @tparam StackSymbolT Type representing items being pushed onto the stack + * @tparam TopOfStackOutItT A forward output iterator type being assigned items of StackSymbolT + * @param[in] begin Forward iterator to the beginning of the items representing stack operations + * @param[in] end Iterator to one past the last item representing the stack operation + * @param[in] to_stack_op A function object that takes an instance of InputItT's value type and + * returns the kind of stack operation such item represents (i.e., of type stack_op_type) + * @param[in] empty_stack A symbol that will be written to top_of_stack_out_it whenever the stack + * was empty + * @param[out] top_of_stack The output iterator to which the item will be written to + * @return TopOfStackOutItT Iterators to one past the last element that was written + */ +template +TopOfStackOutItT to_top_of_stack(InputItT begin, + InputItT end, + ToStackOpTypeT to_stack_op, + StackSymbolT empty_stack, + TopOfStackOutItT top_of_stack_out_it) +{ + // This is the data structure that keeps track of the full stack state for each input symbol + std::stack stack_state; + + for (auto it = begin; it < end; it++) { + // Write what is currently on top of the stack when reading in the current symbol + *top_of_stack_out_it = stack_state.empty() ? empty_stack : stack_state.top(); + top_of_stack_out_it++; + + auto const& current = *it; + fst::stack_op_type op_type = to_stack_op(current); + + // Check whether this symbol corresponds to a push or pop operation and modify the stack + // accordingly + if (op_type == fst::stack_op_type::PUSH) { + stack_state.push(current); + } else if (op_type == fst::stack_op_type::POP) { + stack_state.pop(); + } + } + return top_of_stack_out_it; +} + +/** + * @brief Function object used to filter for brackets and braces that represent push and pop + * operations + * + */ +struct JSONToStackOp { + template + constexpr CUDF_HOST_DEVICE fst::stack_op_type operator()(StackSymbolT const& stack_symbol) const + { + return (stack_symbol == '{' || stack_symbol == '[') ? fst::stack_op_type::PUSH + : (stack_symbol == '}' || stack_symbol == ']') ? fst::stack_op_type::POP + : fst::stack_op_type::READ; + } +}; +} // namespace + +// Base test fixture for tests +struct LogicalStackTest : public cudf::test::BaseFixture { +}; + +TEST_F(LogicalStackTest, GroundTruth) +{ + // Type sufficient to cover any stack level (must be a signed type) + using StackLevelT = int8_t; + using SymbolT = char; + using SymbolOffsetT = uint32_t; + + // The stack symbol that we'll fill everywhere where there's nothing on the stack + constexpr SymbolT empty_stack_symbol = '_'; + + // This just has to be a stack symbol that may not be confused with a symbol that would push + constexpr SymbolT read_symbol = 'x'; + + // Prepare cuda stream for data transfers & kernels + rmm::cuda_stream stream{}; + rmm::cuda_stream_view stream_view(stream); + + // Test input, + std::string input = R"( {)" + R"(category": "reference",)" + R"("index:" [4,12,42],)" + R"("author": "Nigel Rees",)" + R"("title": "Sayings of the Century",)" + R"("price": 8.95)" + R"(} )" + R"({)" + R"("category": "reference",)" + R"("index:" [4,{},null,{"a":[]}],)" + R"("author": "Nigel Rees",)" + R"("title": "Sayings of the Century",)" + R"("price": 8.95)" + R"(} {} [] [ ])"; + + // Repeat input sample 1024x + for (std::size_t i = 0; i < 10; i++) + input += input; + + // Input's size + std::size_t string_size = input.size(); + + // Getting the symbols that actually modify the stack (i.e., symbols that push or pop) + std::string stack_symbols{}; + std::vector stack_op_indexes; + stack_op_indexes.reserve(string_size); + + // Get the sparse representation of stack operations + to_sparse_stack_symbols(std::cbegin(input), + std::cend(input), + JSONToStackOp{}, + std::back_inserter(stack_symbols), + std::back_inserter(stack_op_indexes)); + + rmm::device_uvector d_stack_ops{stack_symbols.size(), stream_view}; + rmm::device_uvector d_stack_op_indexes{stack_op_indexes.size(), stream_view}; + hostdevice_vector top_of_stack_gpu{string_size, stream_view}; + cudf::device_span d_stack_op_idx_span{d_stack_op_indexes}; + + cudaMemcpyAsync(d_stack_ops.data(), + stack_symbols.data(), + stack_symbols.size() * sizeof(SymbolT), + cudaMemcpyHostToDevice, + stream.value()); + + cudaMemcpyAsync(d_stack_op_indexes.data(), + stack_op_indexes.data(), + stack_op_indexes.size() * sizeof(SymbolOffsetT), + cudaMemcpyHostToDevice, + stream.value()); + + // Run algorithm + fst::sparse_stack_op_to_top_of_stack(d_stack_ops.data(), + d_stack_op_idx_span, + JSONToStackOp{}, + top_of_stack_gpu.device_ptr(), + empty_stack_symbol, + read_symbol, + string_size, + stream.value()); + + // Async copy results from device to host + top_of_stack_gpu.device_to_host(stream_view); + + // Get CPU-side results for verification + std::string top_of_stack_cpu{}; + top_of_stack_cpu.reserve(string_size); + to_top_of_stack(std::cbegin(input), + std::cend(input), + JSONToStackOp{}, + empty_stack_symbol, + std::back_inserter(top_of_stack_cpu)); + + // Make sure results have been copied back to host + stream.synchronize(); + + // Verify results + ASSERT_EQ(string_size, top_of_stack_cpu.size()); + ASSERT_EQ(top_of_stack_gpu.size(), top_of_stack_cpu.size()); + for (size_t i = 0; i < string_size && i < top_of_stack_cpu.size(); i++) { + ASSERT_EQ(top_of_stack_gpu.host_ptr()[i], top_of_stack_cpu[i]) << "Mismatch at index #" << i; + } +} + +CUDF_TEST_PROGRAM_MAIN()