Skip to content

Commit

Permalink
Implement MatchData#offset
Browse files Browse the repository at this point in the history
  • Loading branch information
richardboehme committed Mar 20, 2022
1 parent 4492d0c commit cac5ae5
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 1 deletion.
3 changes: 2 additions & 1 deletion include/natalie/match_data_object.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class MatchDataObject : public Object {

Value array(int);
Value group(Env *, size_t);
Value offset(Env *, Value);

Value captures(Env *);
Value to_a(Env *);
Expand All @@ -54,6 +55,6 @@ class MatchDataObject : public Object {

private:
OnigRegion *m_region { nullptr };
const StringObject *m_string { nullptr };
StringObject *m_string { nullptr };
};
}
2 changes: 2 additions & 0 deletions include/natalie/string_object.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@ class StringObject : public Object {

Value convert_float();

static size_t byte_index_to_char_index(ArrayObject *chars, size_t byte_index);

template <typename... Args>
static StringObject *format(const char *fmt, Args... args) {
String out;
Expand Down
1 change: 1 addition & 0 deletions lib/natalie/compiler/binding_gen.rb
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,7 @@ def generate_name
gen.binding('MatchData', 'captures', 'MatchDataObject', 'captures', argc: 0, pass_env: true, pass_block: false, return_type: :Object)
gen.binding('MatchData', 'size', 'MatchDataObject', 'size', argc: 0, pass_env: false, pass_block: false, return_type: :size_t)
gen.binding('MatchData', 'length', 'MatchDataObject', 'size', argc: 0, pass_env: false, pass_block: false, return_type: :size_t)
gen.binding('MatchData', 'offset', 'MatchDataObject', 'offset', argc: 1, pass_env: true, pass_block: false, return_type: :Object)
gen.binding('MatchData', 'to_a', 'MatchDataObject', 'to_a', argc: 0, pass_env: true, pass_block: false, return_type: :Object)
gen.binding('MatchData', 'to_s', 'MatchDataObject', 'to_s', argc: 0, pass_env: true, pass_block: false, return_type: :Object)
gen.binding('MatchData', '[]', 'MatchDataObject', 'ref', argc: 1, pass_env: true, pass_block: false, return_type: :Object)
Expand Down
30 changes: 30 additions & 0 deletions spec/core/matchdata/offset_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# -*- encoding: utf-8 -*-

require_relative '../../spec_helper'

describe "MatchData#offset" do
it "returns a two element array with the begin and end of the nth match" do
match_data = /(.)(.)(\d+)(\d)/.match("THX1138.")
match_data.offset(0).should == [1, 7]
match_data.offset(4).should == [6, 7]
end

it "returns [nil, nil] when the nth match isn't found" do
match_data = /something is( not)? (right)/.match("something is right")
match_data.offset(1).should == [nil, nil]
end

it "returns the offset for multi byte strings" do
match_data = /(.)(.)(\d+)(\d)/.match("TñX1138.")
match_data.offset(0).should == [1, 7]
match_data.offset(4).should == [6, 7]
end

not_supported_on :opal do
it "returns the offset for multi byte strings with unicode regexp" do
match_data = /(.)(.)(\d+)(\d)/u.match("TñX1138.")
match_data.offset(0).should == [1, 7]
match_data.offset(4).should == [6, 7]
end
end
end
17 changes: 17 additions & 0 deletions src/match_data_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,23 @@ Value MatchDataObject::group(Env *env, size_t index) {
return new StringObject { str, length };
}

Value MatchDataObject::offset(Env *env, Value n) {
nat_int_t index = IntegerObject::convert_to_nat_int_t(env, n);
if (index >= (nat_int_t)size())
return NilObject::the();

auto begin = m_region->beg[index];
auto end = m_region->end[index];
if (begin == -1)
return new ArrayObject { NilObject::the(), NilObject::the() };

auto chars = m_string->chars(env);
return new ArrayObject {
Value::integer(StringObject::byte_index_to_char_index(chars, begin)),
Value::integer(StringObject::byte_index_to_char_index(chars, end))
};
}

Value MatchDataObject::captures(Env *env) {
return this->array(1);
}
Expand Down
12 changes: 12 additions & 0 deletions src/string_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,18 @@ Value StringObject::ref(Env *env, Value index_obj) {
abort();
}

size_t StringObject::byte_index_to_char_index(ArrayObject *chars, size_t byte_index) {
size_t char_index = 0;
size_t current_byte_index = 0;
for (auto character : *chars) {
current_byte_index += character->as_string()->length();
if (current_byte_index > byte_index)
break;
++char_index;
}
return char_index;
}

Value StringObject::sub(Env *env, Value find, Value replacement_value, Block *block) {
if (!block && !replacement_value)
env->raise("ArgumentError", "wrong number of arguments (given 1, expected 2)");
Expand Down

0 comments on commit cac5ae5

Please sign in to comment.