Skip to content

Commit

Permalink
Enable text build without relying on relaxed constexpr (#17647)
Browse files Browse the repository at this point in the history
Contributes to #7795

This PR updates `text` to build without depending on the relaxed constexpr build option.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Basit Ayantunde (https://github.com/lamarrr)
  - Bradley Dice (https://github.com/bdice)
  - David Wendt (https://github.com/davidwendt)

URL: #17647
  • Loading branch information
PointKernel authored Jan 6, 2025
1 parent 955b1f4 commit c4f2e8e
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 36 deletions.
4 changes: 2 additions & 2 deletions cpp/include/cudf/utilities/type_dispatcher.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,7 +53,7 @@ namespace CUDF_EXPORT cudf {
* @return The `cudf::type_id` corresponding to the specified type
*/
template <typename T>
inline constexpr type_id type_to_id()
CUDF_HOST_DEVICE inline constexpr type_id type_to_id()
{
return type_id::EMPTY;
};
Expand Down
17 changes: 9 additions & 8 deletions cpp/src/text/edit_distance.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,6 +30,7 @@
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <cuda/std/functional>
#include <thrust/execution_policy.h>
#include <thrust/for_each.h>
#include <thrust/functional.h>
Expand Down Expand Up @@ -64,10 +65,10 @@ __device__ cudf::size_type compute_distance(cudf::string_view const& d_str,
if (str_length == 0) return tgt_length;
if (tgt_length == 0) return str_length;

auto begin = str_length < tgt_length ? d_str.begin() : d_tgt.begin();
auto itr = str_length < tgt_length ? d_tgt.begin() : d_str.begin();
// .first is min and .second is max
auto const [n, m] = std::minmax(str_length, tgt_length);
auto begin = str_length < tgt_length ? d_str.begin() : d_tgt.begin();
auto itr = str_length < tgt_length ? d_tgt.begin() : d_str.begin();
auto const n = cuda::std::min(str_length, tgt_length);
auto const m = cuda::std::max(str_length, tgt_length);
// setup compute buffer pointers
auto v0 = buffer;
auto v1 = v0 + n + 1;
Expand All @@ -81,7 +82,7 @@ __device__ cudf::size_type compute_distance(cudf::string_view const& d_str,
auto sub_cost = v0[j] + (*itr != *itr_tgt);
auto del_cost = v0[j + 1] + 1;
auto ins_cost = v1[j] + 1;
v1[j + 1] = std::min(std::min(sub_cost, del_cost), ins_cost);
v1[j + 1] = cuda::std::min(cuda::std::min(sub_cost, del_cost), ins_cost);
}
thrust::swap(v0, v1);
}
Expand Down Expand Up @@ -170,7 +171,7 @@ std::unique_ptr<cudf::column> edit_distance(cudf::strings_column_view const& str
? d_targets.element<cudf::string_view>(0)
: d_targets.element<cudf::string_view>(idx);
// just need 2 integers for each character of the shorter string
return (std::min(d_str.length(), d_tgt.length()) + 1) * 2;
return (cuda::std::min(d_str.length(), d_tgt.length()) + 1) * 2;
});

// get the total size of the temporary compute buffer
Expand Down Expand Up @@ -241,7 +242,7 @@ std::unique_ptr<cudf::column> edit_distance_matrix(cudf::strings_column_view con
if (d_str1.empty() || d_str2.empty()) { return; }
// the temp size needed is 2 integers per character of the shorter string
d_offsets[idx - ((row + 1) * (row + 2)) / 2] =
(std::min(d_str1.length(), d_str2.length()) + 1) * 2;
(cuda::std::min(d_str1.length(), d_str2.length()) + 1) * 2;
});

// get the total size for the compute buffer
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/text/jaccard.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -36,6 +36,7 @@
#include <rmm/exec_policy.hpp>

#include <cub/cub.cuh>
#include <cuda/std/functional>
#include <thrust/binary_search.h>
#include <thrust/execution_policy.h>
#include <thrust/iterator/counting_iterator.h>
Expand Down Expand Up @@ -243,7 +244,7 @@ CUDF_KERNEL void count_substrings_kernel(cudf::column_device_view const d_string
}
}
auto const char_count = warp_reduce(temp_storage).Sum(count);
if (lane_idx == 0) { d_counts[str_idx] = std::max(1, char_count - width + 1); }
if (lane_idx == 0) { d_counts[str_idx] = cuda::std::max(1, char_count - width + 1); }
}

/**
Expand Down
11 changes: 5 additions & 6 deletions cpp/src/text/minhash.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -40,14 +40,13 @@

#include <cooperative_groups.h>
#include <cuda/atomic>
#include <cuda/std/limits>
#include <thrust/binary_search.h>
#include <thrust/execution_policy.h>
#include <thrust/fill.h>
#include <thrust/sequence.h>
#include <thrust/sort.h>

#include <limits>

namespace nvtext {
namespace detail {
namespace {
Expand Down Expand Up @@ -156,7 +155,7 @@ CUDF_KERNEL void minhash_seed_kernel(cudf::column_device_view const d_strings,
// initialize the output -- only needed for wider strings
auto d_output = d_results + (str_idx * param_count);
for (auto i = lane_idx; i < param_count; i += tile_size) {
d_output[i] = std::numeric_limits<hash_value_type>::max();
d_output[i] = cuda::std::numeric_limits<hash_value_type>::max();
}
}
}
Expand Down Expand Up @@ -226,7 +225,7 @@ CUDF_KERNEL void minhash_kernel(cudf::column_device_view const d_strings,
? section_size
: cuda::std::max(static_cast<cudf::size_type>(size_bytes > 0), section_size - width + 1);

auto const init = size_bytes == 0 ? 0 : std::numeric_limits<hash_value_type>::max();
auto const init = size_bytes == 0 ? 0 : cuda::std::numeric_limits<hash_value_type>::max();
auto const lane_idx = block.thread_rank();
auto const d_output = d_results + (str_idx * parameter_a.size());

Expand All @@ -235,7 +234,7 @@ CUDF_KERNEL void minhash_kernel(cudf::column_device_view const d_strings,

// constants used in the permutation calculations
constexpr uint64_t mersenne_prime = (1UL << 61) - 1;
constexpr hash_value_type hash_max = std::numeric_limits<hash_value_type>::max();
constexpr hash_value_type hash_max = cuda::std::numeric_limits<hash_value_type>::max();

// found to be an efficient shared memory size for both hash types
__shared__ hash_value_type block_values[block_size * params_per_thread];
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/text/replace.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -35,6 +35,7 @@
#include <rmm/cuda_stream_view.hpp>

#include <cuda/atomic>
#include <cuda/std/functional>
#include <thrust/binary_search.h>
#include <thrust/distance.h>
#include <thrust/execution_policy.h>
Expand Down Expand Up @@ -196,7 +197,7 @@ struct sub_offset_fn {
{
// keep delimiter search within this sub-block
auto const end =
d_input_chars + std::min(last_offset, ((idx + 2) * LS_SUB_BLOCK_SIZE) + first_offset);
d_input_chars + cuda::std::min(last_offset, ((idx + 2) * LS_SUB_BLOCK_SIZE) + first_offset);
// starting point of this sub-block
auto itr = d_input_chars + first_offset + ((idx + 1) * LS_SUB_BLOCK_SIZE);
while ((itr < end) &&
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/text/subword/data_normalizer.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -26,6 +26,7 @@
#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <cuda/std/functional>
#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/pair.h>
Expand Down Expand Up @@ -134,8 +135,8 @@ extract_code_points_from_utf8(unsigned char const* strings,
constexpr uint8_t max_utf8_blocks_for_char = 4;
uint8_t utf8_blocks[max_utf8_blocks_for_char] = {0};

for (int i = 0; i < std::min(static_cast<size_t>(max_utf8_blocks_for_char),
total_bytes - start_byte_for_thread);
for (int i = 0; i < cuda::std::min(static_cast<size_t>(max_utf8_blocks_for_char),
total_bytes - start_byte_for_thread);
++i) {
utf8_blocks[i] = strings[start_byte_for_thread + i];
}
Expand Down
32 changes: 19 additions & 13 deletions cpp/src/text/subword/wordpiece_tokenizer.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,6 +27,8 @@
#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <cuda/std/functional>
#include <cuda/std/limits>
#include <thrust/copy.h>
#include <thrust/distance.h>
#include <thrust/execution_policy.h>
Expand Down Expand Up @@ -87,23 +89,23 @@ CUDF_KERNEL void init_data_and_mark_word_start_and_ends(uint32_t const* code_poi

// Deal with the start_word_indices array
if (char_for_thread < num_code_points) {
uint32_t val_to_write = std::numeric_limits<uint32_t>::max();
uint32_t val_to_write = cuda::std::numeric_limits<uint32_t>::max();
if ((code_points[char_for_thread] != SPACE_CODE_POINT) && (char_for_thread > 0) &&
(code_points[char_for_thread - 1] == SPACE_CODE_POINT)) {
val_to_write = char_for_thread;
}
start_word_indices[char_for_thread] = val_to_write;

// Deal with the end_word_indices_array
val_to_write = std::numeric_limits<uint32_t>::max();
val_to_write = cuda::std::numeric_limits<uint32_t>::max();
if ((code_points[char_for_thread] != SPACE_CODE_POINT) &&
(char_for_thread + 1 < num_code_points) &&
(code_points[char_for_thread + 1] == SPACE_CODE_POINT)) {
val_to_write = char_for_thread + 1;
}
end_word_indices[char_for_thread] = val_to_write;

token_ids[char_for_thread] = std::numeric_limits<uint32_t>::max();
token_ids[char_for_thread] = cuda::std::numeric_limits<uint32_t>::max();
tokens_per_word[char_for_thread] = 0;
}
}
Expand Down Expand Up @@ -214,7 +216,7 @@ struct mark_special_tokens {
__device__ void operator()(size_t idx) const
{
uint32_t const start_index = start_word_indices[idx];
if ((start_index == std::numeric_limits<uint32_t>::max()) ||
if ((start_index == cuda::std::numeric_limits<uint32_t>::max()) ||
((start_index + MIN_ST_WIDTH + 2) > num_code_points))
return;
if (code_points[start_index] != '[') return;
Expand All @@ -225,12 +227,12 @@ struct mark_special_tokens {
uint32_t const end_index = [&] {
auto const begin = start_word_indices + start_pos;
auto const width =
std::min(static_cast<size_t>(MAX_ST_WIDTH + 1), (num_code_points - start_pos));
cuda::std::min(static_cast<size_t>(MAX_ST_WIDTH + 1), (num_code_points - start_pos));
auto const end = begin + width;
// checking the next start-word is more reliable than arbitrarily searching for ']'
// in case the text is split across string rows
auto const iter = thrust::find_if(thrust::seq, begin + 1, end, [](auto swi) {
return swi != std::numeric_limits<uint32_t>::max();
return swi != cuda::std::numeric_limits<uint32_t>::max();
});
return iter == end ? start_index : static_cast<uint32_t>(iter - start_word_indices);
}();
Expand All @@ -254,11 +256,11 @@ struct mark_special_tokens {
thrust::fill(thrust::seq,
start_word_indices + start_index + 1, // keep the first one
start_word_indices + end_index + 1,
std::numeric_limits<uint32_t>::max());
cuda::std::numeric_limits<uint32_t>::max());
thrust::fill(thrust::seq,
end_word_indices + start_index,
end_word_indices + end_index + 1,
std::numeric_limits<uint32_t>::max());
cuda::std::numeric_limits<uint32_t>::max());

// reset the new end-word index
end_word_indices[end_pos] = end_pos + 1;
Expand Down Expand Up @@ -382,7 +384,7 @@ CUDF_KERNEL void kernel_wordpiece_tokenizer(uint32_t const* code_points,
// We need to clean up the global array. This case is very uncommon.
// Only 0.016% of words cannot be resolved to a token from the squad dev set.
for (uint32_t i = 1; i < num_values_tokenized; ++i) {
token_ids[token_start + i] = std::numeric_limits<uint32_t>::max();
token_ids[token_start + i] = cuda::std::numeric_limits<uint32_t>::max();
}
num_values_tokenized = 0;
}
Expand Down Expand Up @@ -423,7 +425,10 @@ uvector_pair wordpiece_tokenizer::tokenize(cudf::strings_column_view const& inpu
}

struct copy_if_fn { // inline lambda not allowed in private or protected member function
__device__ bool operator()(uint32_t cp) { return cp != std::numeric_limits<uint32_t>::max(); }
__device__ bool operator()(uint32_t cp)
{
return cp != cuda::std::numeric_limits<uint32_t>::max();
}
};

struct tranform_fn { // just converting uint8 value to uint32
Expand Down Expand Up @@ -487,7 +492,7 @@ void wordpiece_tokenizer::tokenize(uvector_pair& cps_and_offsets, rmm::cuda_stre
auto itr_end = thrust::remove(rmm::exec_policy(stream),
device_word_indices.begin(),
device_word_indices.end(),
std::numeric_limits<uint32_t>::max());
cuda::std::numeric_limits<uint32_t>::max());

// The number of tokens selected will be double the number of words since we
// select from both the start and end index arrays.
Expand Down Expand Up @@ -523,7 +528,8 @@ void wordpiece_tokenizer::tokenize(uvector_pair& cps_and_offsets, rmm::cuda_stre
// token so this will always have enough memory to store the contiguous tokens.
uint32_t* contiguous_token_ids = device_code_points;
auto const copy_size = // thrust::copy_if limited to copying int-max values
std::min(device_token_ids.size(), static_cast<std::size_t>(std::numeric_limits<int>::max()));
cuda::std::min(device_token_ids.size(),
static_cast<std::size_t>(cuda::std::numeric_limits<int>::max()));
auto ids_itr = device_token_ids.begin();
auto const ids_end = device_token_ids.end();
while (ids_itr != ids_end) {
Expand Down

0 comments on commit c4f2e8e

Please sign in to comment.