Skip to content

Commit

Permalink
Fix: Self-assignment issues
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Mar 18, 2024
1 parent 0a71676 commit 142f48a
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 4 deletions.
2 changes: 1 addition & 1 deletion include/stringzilla/stringzilla.h
Original file line number Diff line number Diff line change
Expand Up @@ -3256,7 +3256,7 @@ SZ_PUBLIC sz_ptr_t sz_string_shrink_to_fit(sz_string_t *string, sz_memory_alloca

// We may already be space-optimal, and in that case we don't need to do anything.
sz_size_t new_space = string_length + 1;
if (string_space == new_space || string_is_external) return string->external.start;
if (string_space == new_space || !string_is_external) return string->external.start;

sz_ptr_t new_start = (sz_ptr_t)allocator->allocate(new_space, allocator->handle);
if (!new_start) return SZ_NULL_CHAR;
Expand Down
32 changes: 29 additions & 3 deletions include/stringzilla/stringzilla.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1972,6 +1972,12 @@ class basic_string {
*/
static constexpr size_type npos = SZ_SSIZE_MAX;

/**
* @brief The number of characters that can be stored in the internal buffer.
* Depends on the size of the internal buffer for the "Small String Optimization".
*/
static constexpr size_type min_capacity = SZ_STRING_INTERNAL_SPACE - 1;

#pragma region Constructors and STL Utilities

sz_constexpr_if_cpp20 basic_string() noexcept {
Expand Down Expand Up @@ -3362,10 +3368,19 @@ bool basic_string<char_type_, allocator_>::try_assign(string_view other) noexcep
sz_size_t string_length;
sz_string_range(&string_, &string_start, &string_length);

if (string_length >= other.length()) {
// One nasty special case is when the other string is a substring of this string.
// We need to handle that separately, as the `sz_string_expand` may invalidate the `other` pointer.
if (other.data() >= string_start && other.data() < string_start + string_length) {
auto offset_in_this = other.data() - string_start;
sz_string_erase(&string_, 0, offset_in_this);
sz_string_erase(&string_, other.length(), SZ_SIZE_MAX);
}
// In some of the other cases, when the assigned string is short, we don't need to re-allocate.
else if (string_length >= other.length()) {
other.copy(string_start, other.length());
sz_string_erase(&string_, other.length(), SZ_SIZE_MAX);
}
// In the common case, however, we need to allocate.
else {
if (!_with_alloc([&](sz_alloc_type &alloc) {
string_start = sz_string_expand(&string_, SZ_SIZE_MAX, other.length(), &alloc);
Expand All @@ -3392,10 +3407,21 @@ bool basic_string<char_type_, allocator_>::try_push_back(char_type c) noexcept {
template <typename char_type_, typename allocator_>
bool basic_string<char_type_, allocator_>::try_append(const_pointer str, size_type length) noexcept {
return _with_alloc([&](sz_alloc_type &alloc) {
auto old_size = size();
// Sometimes we are inserting part of this string into itself.
// By the time `sz_string_expand` finished, the old `str` pointer may be invalidated,
// so we need to handle that special case separately.
auto this_span = span();
if (str >= this_span.begin() && str < this_span.end()) {
auto str_offset_in_this = str - data();
sz_ptr_t start = sz_string_expand(&string_, SZ_SIZE_MAX, length, &alloc);
if (!start) return false;
sz_copy(start + old_size, str, length);
sz_copy(start + this_span.size(), start + str_offset_in_this, length);
}
else {
sz_ptr_t start = sz_string_expand(&string_, SZ_SIZE_MAX, length, &alloc);
if (!start) return false;
sz_copy(start + this_span.size(), str, length);
}
return true;
});
}
Expand Down
29 changes: 29 additions & 0 deletions scripts/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,13 +431,20 @@ static void test_api_mutable() {
assert_scoped(str s = "obsolete", s.assign(s, 4), s == "lete"); // Partial self-assignment
assert_scoped(str s = "obsolete", s.assign(s, 4, 3), s == "let"); // Partial self-assignment

// Self-assignment is a special case of assignment.
assert_scoped(str s = "obsolete", s = s, s == "obsolete");
assert_scoped(str s = "obsolete", s.assign(s), s == "obsolete");
assert_scoped(str s = "obsolete", s.assign(s.data(), 2), s == "ob");
assert_scoped(str s = "obsolete", s.assign(s.data(), s.size()), s == "obsolete");

// Allocations, capacity and memory management.
assert_scoped(str s, s.reserve(10), s.capacity() >= 10);
assert_scoped(str s, s.resize(10), s.size() == 10);
assert_scoped(str s, s.resize(10, 'a'), s.size() == 10 && s == "aaaaaaaaaa");
assert(str().max_size() > 0);
assert(str().get_allocator() == std::allocator<char>());
assert(std::strcmp(str("c_str").c_str(), "c_str") == 0);
assert_scoped(str s = "hello", s.shrink_to_fit(), s.capacity() <= sz::string::min_capacity);

// Concatenation.
// Following are missing in strings, but are present in vectors.
Expand Down Expand Up @@ -627,6 +634,28 @@ static void test_api_readonly_extensions() {
void test_api_mutable_extensions() {
using str = sz::string;

// Try methods.
assert(str("obsolete").try_assign("hello"));
assert(str().try_reserve(10));
assert(str().try_resize(10));
assert(str("__").try_insert(1, "test"));
assert(str("test").try_erase(1, 2));
assert(str("test").try_clear());
assert(str("test").try_replace(1, 2, "aaaa"));
assert(str("test").try_push_back('a'));
assert(str("test").try_shrink_to_fit());

// Self-referencing methods.
assert_scoped(str s = "test", s.try_assign(s.view()), s == "test");
assert_scoped(str s = "test", s.try_assign(s.view().sub(1, 2)), s == "e");
assert_scoped(str s = "test", s.try_append(s.view().sub(1, 2)), s == "teste");

// Try methods going beyond and beneath capacity threshold.
assert_scoped(str s = "0123456789012345678901234567890123456789012345678901234567890123", // 64 symbols at start
s.try_append(s) && s.try_append(s) && s.try_append(s) && s.try_append(s) && s.try_clear() &&
s.try_shrink_to_fit(),
s.capacity() < sz::string::min_capacity);

// Same length replacements.
assert_scoped(str s = "hello", s.replace_all("xx", "xx"), s == "hello");
assert_scoped(str s = "hello", s.replace_all("l", "1"), s == "he11o");
Expand Down

0 comments on commit 142f48a

Please sign in to comment.