Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[stdlib] Un-deprecate String.__iter__() #3984

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions mojo/docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -251,4 +251,7 @@ ctx.enqueue_function(compiled_func, grid_dim=1, block_dim=1)
end_result.extend(extra_data) # [4, 5, 8, 10]
```

- `String` and `StringSlice` `.codepoint_slices()` was removed. This is now the
default behavior of their `__iter__()` methods.

### 🛠️ Fixed
8 changes: 4 additions & 4 deletions mojo/stdlib/src/builtin/string_literal.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -457,18 +457,18 @@ struct StringLiteral(
return self.__str__()

fn __iter__(ref self) -> CodepointSliceIter[StaticConstantOrigin]:
"""Return an iterator over the string literal.
"""Iterate over the string unicode characters.

Returns:
An iterator over the string.
An iterator of references to the string unicode characters.
"""
return CodepointSliceIter(self.as_string_slice())

fn __reversed__(self) -> CodepointSliceIter[StaticConstantOrigin, False]:
"""Iterate backwards over the string, returning immutable references.
"""Iterate backwards over the string unicode characters.

Returns:
A reversed iterator over the string.
A reversed iterator of references to the string unicode characters.
"""
return CodepointSliceIter[StaticConstantOrigin, False](
self.as_string_slice()
Expand Down
42 changes: 8 additions & 34 deletions mojo/stdlib/src/collections/string/string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -1034,22 +1034,23 @@ struct String(
"""
self._iadd(other.as_bytes())

@deprecated("Use `str.codepoints()` or `str.codepoint_slices()` instead.")
fn __iter__(self) -> CodepointSliceIter[__origin_of(self)]:
"""Iterate over the string, returning immutable references.
"""Iterate over the string unicode characters.

Returns:
An iterator of references to the string elements.
An iterator of references to the string unicode characters.
"""
return self.codepoint_slices()
return CodepointSliceIter[__origin_of(self)](self.as_string_slice())

fn __reversed__(self) -> CodepointSliceIter[__origin_of(self), False]:
"""Iterate backwards over the string, returning immutable references.
"""Iterate backwards over the string unicode characters.

Returns:
A reversed iterator of references to the string elements.
A reversed iterator of references to the string unicode characters.
"""
return CodepointSliceIter[__origin_of(self), forward=False](self)
return CodepointSliceIter[__origin_of(self), forward=False](
self.as_string_slice()
)

# ===------------------------------------------------------------------=== #
# Trait implementations
Expand Down Expand Up @@ -1244,33 +1245,6 @@ struct String(
"""
return self.as_string_slice().codepoints()

fn codepoint_slices(self) -> CodepointSliceIter[__origin_of(self)]:
"""Returns an iterator over single-character slices of this string.

Each returned slice points to a single Unicode codepoint encoded in the
underlying UTF-8 representation of this string.

Returns:
An iterator of references to the string elements.

# Examples

Iterate over the character slices in a string:

```mojo
from testing import assert_equal, assert_true

var s = String("abc")
var iter = s.codepoint_slices()
assert_true(iter.__next__() == "a")
assert_true(iter.__next__() == "b")
assert_true(iter.__next__() == "c")
assert_equal(iter.__has_next__(), False)
```
.
"""
return self.as_string_slice().codepoint_slices()

fn unsafe_ptr(
ref self,
) -> UnsafePointer[
Expand Down
29 changes: 9 additions & 20 deletions mojo/stdlib/src/collections/string/string_slice.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](
"""
var result = String()
var use_dquote = False
for s in self.codepoint_slices():
for s in self:
use_dquote = use_dquote or (s == "'")

if s == "\\":
Expand Down Expand Up @@ -863,20 +863,19 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](
self.unsafe_ptr(), rhs.unsafe_ptr(), min(len1, len2)
)

@deprecated("Use `str.codepoints()` or `str.codepoint_slices()` instead.")
fn __iter__(self) -> CodepointSliceIter[origin]:
"""Iterate over the string, returning immutable references.
"""Iterate over the string unicode characters.

Returns:
An iterator of references to the string elements.
An iterator of references to the string unicode characters.
"""
return self.codepoint_slices()
return CodepointSliceIter[origin](self)

fn __reversed__(self) -> CodepointSliceIter[origin, False]:
"""Iterate backwards over the string, returning immutable references.
"""Iterate backwards over the string unicode characters.

Returns:
A reversed iterator of references to the string elements.
A reversed iterator of references to the string unicode characters.
"""
return CodepointSliceIter[origin, forward=False](self)

Expand Down Expand Up @@ -1066,7 +1065,7 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](
try:
# Python adds all "whitespace chars" as one separator
# if no separator was specified
for s in self[lhs:].codepoint_slices():
for s in self[lhs:]:
if not s.isspace():
break
lhs += s.byte_length()
Expand All @@ -1080,9 +1079,7 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](
output.append(self[str_byte_len:])
break
rhs = lhs + num_bytes(self.unsafe_ptr()[lhs])
for s in self[
lhs + num_bytes(self.unsafe_ptr()[lhs]) :
].codepoint_slices():
for s in self[lhs + num_bytes(self.unsafe_ptr()[lhs]) :]:
if s.isspace():
break
rhs += s.byte_length()
Expand Down Expand Up @@ -1291,14 +1288,6 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](
"""
return CodepointsIter(self)

fn codepoint_slices(self) -> CodepointSliceIter[origin]:
"""Iterate over the string, returning immutable references.

Returns:
An iterator of references to the string elements.
"""
return CodepointSliceIter[origin](self)

@always_inline
fn as_bytes(self) -> Span[Byte, origin]:
"""Get the sequence of encoded bytes of the underlying string.
Expand Down Expand Up @@ -1759,7 +1748,7 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](
)
else:
var offset = 0
for s in self.codepoint_slices():
for s in self:
var b_len = s.byte_length()
if not _is_newline_char(ptr, offset, ptr[offset], b_len):
return False
Expand Down
12 changes: 6 additions & 6 deletions mojo/stdlib/test/collections/string/test_string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -1102,7 +1102,7 @@ def test_string_codepoints_iter():

def test_string_char_slices_iter():
var s0 = String("abc")
var s0_iter = s0.codepoint_slices()
var s0_iter = s0.__iter__()
assert_true(s0_iter.__next__() == "a")
assert_true(s0_iter.__next__() == "b")
assert_true(s0_iter.__next__() == "c")
Expand All @@ -1113,7 +1113,7 @@ def test_string_char_slices_iter():
# Borrow immutably
fn conc(vs: String) -> String:
var c = String("")
for v in vs.codepoint_slices():
for v in vs:
c += v
return c

Expand All @@ -1124,18 +1124,18 @@ def test_string_char_slices_iter():
concat += v
assert_equal(321, atol(concat))

for v in vs.codepoint_slices():
for v in vs:
v.unsafe_ptr().origin_cast[mut=True]()[] = ord("1")

# Borrow immutably
for v in vs.codepoint_slices():
for v in vs:
concat += v

assert_equal(321111, atol(concat))

var idx = -1
vs = String("mojo🔥")
var iterator = vs.codepoint_slices()
var iterator = vs.__iter__()
assert_equal(5, len(iterator))
var item = iterator.__next__()
assert_equal(String("m"), String(item))
Expand Down Expand Up @@ -1185,7 +1185,7 @@ def test_string_char_slices_iter():
var ptr = item.unsafe_ptr()
var amnt_characters = 0
var byte_idx = 0
for v in item.codepoint_slices():
for v in item:
var byte_len = v.byte_length()
for i in range(byte_len):
assert_equal(ptr[byte_idx + i], v.unsafe_ptr()[i])
Expand Down