From 3b0c0d93f1fb061c06976f55c005e74e35acc0f6 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 30 Jun 2023 08:30:37 -0400 Subject: [PATCH] Fix memcheck error found in nvtext tokenize functions --- cpp/src/text/utilities/tokenize_ops.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/text/utilities/tokenize_ops.cuh b/cpp/src/text/utilities/tokenize_ops.cuh index 003c041c0bf..89825e31e5c 100644 --- a/cpp/src/text/utilities/tokenize_ops.cuh +++ b/cpp/src/text/utilities/tokenize_ops.cuh @@ -89,6 +89,7 @@ struct characters_tokenizer { __device__ bool next_token() { auto const src_ptr = d_str.data(); + if (current_position >= d_str.size_bytes()) { return false; } if (current_position != 0) { // skip these 2 lines the first time through current_position += cudf::strings::detail::bytes_in_char_utf8(src_ptr[current_position]); start_position = current_position;