Skip to content

Commit

Permalink
ARROW-9460: [C++] Fix BinaryContainsExact for pattern with repeated c…
Browse files Browse the repository at this point in the history
…haracters

Closes #7750 from xhochy/ARROW-9460

Authored-by: Uwe L. Korn <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
  • Loading branch information
xhochy authored and wesm committed Jul 14, 2020
1 parent 6d7e4ec commit 1d7d919
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 9 deletions.
17 changes: 8 additions & 9 deletions cpp/src/arrow/compute/kernels/scalar_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ void TransformBinaryContainsExact(const uint8_t* pattern, int64_t pattern_length
prefix_table[0] = -1;
for (offset_type pos = 0; pos < pattern_length; ++pos) {
// The prefix cannot be expanded, reset.
if (prefix_length >= 0 && pattern[pos] != pattern[prefix_length]) {
while (prefix_length >= 0 && pattern[pos] != pattern[prefix_length]) {
prefix_length = prefix_table[prefix_length];
}
prefix_length++;
Expand All @@ -371,14 +371,13 @@ void TransformBinaryContainsExact(const uint8_t* pattern, int64_t pattern_length

int64_t pattern_pos = 0;
for (int64_t k = 0; k < current_length; k++) {
if (pattern[pattern_pos] == current_data[k]) {
pattern_pos++;
if (pattern_pos == pattern_length) {
bitmap_writer.Set();
break;
}
} else {
pattern_pos = std::max<offset_type>(0, prefix_table[pattern_pos]);
while ((pattern_pos >= 0) && (pattern[pattern_pos] != current_data[k])) {
pattern_pos = prefix_table[pattern_pos];
}
pattern_pos++;
if (pattern_pos == pattern_length) {
bitmap_writer.Set();
break;
}
}
bitmap_writer.Next();
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_string_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,14 @@ TYPED_TEST(TestStringKernels, BinaryContainsExact) {
BinaryContainsExactOptions options_repeated{"abab"};
this->CheckUnary("binary_contains_exact", R"(["abab", "ab", "cababc", null, "bac"])",
boolean(), "[true, false, true, null, false]", &options_repeated);

// ARROW-9460
BinaryContainsExactOptions options_double_char{"aab"};
this->CheckUnary("binary_contains_exact", R"(["aacb", "aab", "ab", "aaab"])", boolean(),
"[false, true, false, true]", &options_double_char);
BinaryContainsExactOptions options_double_char_2{"bbcaa"};
this->CheckUnary("binary_contains_exact", R"(["abcbaabbbcaabccabaab"])", boolean(),
"[true]", &options_double_char_2);
}

TYPED_TEST(TestStringKernels, Strptime) {
Expand Down

0 comments on commit 1d7d919

Please sign in to comment.