diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c46ab0ec6..714677443a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * [#77](https://github.com/rubocop-hq/rubocop-performance/issues/77): Add new `Performance/BindCall` cop. ([@koic][]) * [#105](https://github.com/rubocop-hq/rubocop-performance/pull/105): Add new `Performance/DeletePrefix` and `Performance/DeleteSuffix` cops. ([@koic][]) +* [#107](https://github.com/rubocop-hq/rubocop-performance/pull/107): Support regexp metacharacter `^` for `Performance/StartWith` cop and regexp metacharacter `$` for `Performance/EndWith` cop. ([@koic][]) ### Bug fixes * [#108](https://github.com/rubocop-hq/rubocop-performance/pull/108): Fix an incorrect autocorrect for `Performance/ReverseEach` when there is a newline between reverse and each. ([@joe-sharp][], [@dischorde][], [@siegfault][]) diff --git a/lib/rubocop/cop/mixin/regexp_metacharacter.rb b/lib/rubocop/cop/mixin/regexp_metacharacter.rb new file mode 100644 index 0000000000..68c7c49840 --- /dev/null +++ b/lib/rubocop/cop/mixin/regexp_metacharacter.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +module RuboCop + module Cop + # Common functionality for handling regexp metacharacters. + module RegexpMetacharacter + def literal_at_start?(regex_str) + # is this regexp 'literal' in the sense of only matching literal + # chars, rather than using metachars like `.` and `*` and so on? + # also, is it anchored at the start of the string? + # (tricky: \s, \d, and so on are metacharacters, but other characters + # escaped with a slash are just literals. LITERAL_REGEX takes all + # that into account.) + regex_str =~ /\A(\\A|\^)(?:#{Util::LITERAL_REGEX})+\z/ + end + + def literal_at_end?(regex_str) + # is this regexp 'literal' in the sense of only matching literal + # chars, rather than using metachars like . and * and so on? + # also, is it anchored at the end of the string? + regex_str =~ /\A(?:#{Util::LITERAL_REGEX})+(\\z|\$)\z/ + end + + def drop_start_metacharacter(regexp_string) + if regexp_string.start_with?('\\A') + regexp_string[2..-1] # drop `\A` anchor + else + regexp_string[1..-1] # drop `^` anchor + end + end + + def drop_end_metacharacter(regexp_string) + if regexp_string.end_with?('\\z') + regexp_string.chomp('\z') # drop `\z` anchor + else + regexp_string.chop # drop `$` anchor + end + end + end + end +end diff --git a/lib/rubocop/cop/performance/delete_prefix.rb b/lib/rubocop/cop/performance/delete_prefix.rb index c006bddc4b..f2867720dc 100644 --- a/lib/rubocop/cop/performance/delete_prefix.rb +++ b/lib/rubocop/cop/performance/delete_prefix.rb @@ -25,6 +25,7 @@ module Performance # class DeletePrefix < Cop extend TargetRubyVersion + include RegexpMetacharacter minimum_target_ruby_version 2.5 @@ -55,12 +56,7 @@ def autocorrect(node) gsub_method?(node) do |receiver, bad_method, regexp_str, _| lambda do |corrector| good_method = PREFERRED_METHODS[bad_method] - - regexp_str = if regexp_str.start_with?('\\A') - regexp_str[2..-1] # drop `\A` anchor - else - regexp_str[1..-1] # drop `^` anchor - end + regexp_str = drop_start_metacharacter(regexp_str) regexp_str = interpret_string_escapes(regexp_str) string_literal = to_string_literal(regexp_str) @@ -70,18 +66,6 @@ def autocorrect(node) end end end - - private - - def literal_at_start?(regex_str) - # is this regexp 'literal' in the sense of only matching literal - # chars, rather than using metachars like `.` and `*` and so on? - # also, is it anchored at the start of the string? - # (tricky: \s, \d, and so on are metacharacters, but other characters - # escaped with a slash are just literals. LITERAL_REGEX takes all - # that into account.) - regex_str =~ /\A(\\A|\^)(?:#{LITERAL_REGEX})+\z/ - end end end end diff --git a/lib/rubocop/cop/performance/delete_suffix.rb b/lib/rubocop/cop/performance/delete_suffix.rb index 9872f70bf6..8d727dd57f 100644 --- a/lib/rubocop/cop/performance/delete_suffix.rb +++ b/lib/rubocop/cop/performance/delete_suffix.rb @@ -25,6 +25,7 @@ module Performance # class DeleteSuffix < Cop extend TargetRubyVersion + include RegexpMetacharacter minimum_target_ruby_version 2.5 @@ -55,12 +56,7 @@ def autocorrect(node) gsub_method?(node) do |receiver, bad_method, regexp_str, _| lambda do |corrector| good_method = PREFERRED_METHODS[bad_method] - - regexp_str = if regexp_str.end_with?('\\z') - regexp_str.chomp('\z') # drop `\z` anchor - else - regexp_str.chop # drop `$` anchor - end + regexp_str = drop_end_metacharacter(regexp_str) regexp_str = interpret_string_escapes(regexp_str) string_literal = to_string_literal(regexp_str) @@ -70,18 +66,6 @@ def autocorrect(node) end end end - - private - - def literal_at_end?(regex_str) - # is this regexp 'literal' in the sense of only matching literal - # chars, rather than using metachars like `.` and `*` and so on? - # also, is it anchored at the start of the string? - # (tricky: \s, \d, and so on are metacharacters, but other characters - # escaped with a slash are just literals. LITERAL_REGEX takes all - # that into account.) - regex_str =~ /\A(?:#{LITERAL_REGEX})+(\\z|\$)\z/ - end end end end diff --git a/lib/rubocop/cop/performance/end_with.rb b/lib/rubocop/cop/performance/end_with.rb index 82a7997d09..584d2eb6f1 100644 --- a/lib/rubocop/cop/performance/end_with.rb +++ b/lib/rubocop/cop/performance/end_with.rb @@ -15,9 +15,18 @@ module Performance # 'abc'.match(/bc\Z/) # /bc\Z/.match('abc') # + # 'abc'.match?(/bc$/) + # /bc$/.match?('abc') + # 'abc' =~ /bc$/ + # /bc$/ =~ 'abc' + # 'abc'.match(/bc$/) + # /bc$/.match('abc') + # # # good # 'abc'.end_with?('bc') class EndWith < Cop + include RegexpMetacharacter + MSG = 'Use `String#end_with?` instead of a regex match anchored to ' \ 'the end of the string.' @@ -27,13 +36,6 @@ class EndWith < Cop (match-with-lvasgn (regexp (str $#literal_at_end?) (regopt)) $_)} PATTERN - def literal_at_end?(regex_str) - # is this regexp 'literal' in the sense of only matching literal - # chars, rather than using metachars like . and * and so on? - # also, is it anchored at the end of the string? - regex_str =~ /\A(?:#{LITERAL_REGEX})+\\z\z/ - end - def on_send(node) return unless redundant_regex?(node) @@ -44,7 +46,7 @@ def on_send(node) def autocorrect(node) redundant_regex?(node) do |receiver, regex_str| receiver, regex_str = regex_str, receiver if receiver.is_a?(String) - regex_str = regex_str[0..-3] # drop \Z anchor + regex_str = drop_end_metacharacter(regex_str) regex_str = interpret_string_escapes(regex_str) lambda do |corrector| diff --git a/lib/rubocop/cop/performance/start_with.rb b/lib/rubocop/cop/performance/start_with.rb index 458f4bad6e..18733d4bdf 100644 --- a/lib/rubocop/cop/performance/start_with.rb +++ b/lib/rubocop/cop/performance/start_with.rb @@ -15,9 +15,18 @@ module Performance # 'abc'.match(/\Aab/) # /\Aab/.match('abc') # + # 'abc'.match?(/^ab/) + # /^ab/.match?('abc') + # 'abc' =~ /^ab/ + # /^ab/ =~ 'abc' + # 'abc'.match(/^ab/) + # /^ab/.match('abc') + # # # good # 'abc'.start_with?('ab') class StartWith < Cop + include RegexpMetacharacter + MSG = 'Use `String#start_with?` instead of a regex match anchored to ' \ 'the beginning of the string.' @@ -27,16 +36,6 @@ class StartWith < Cop (match-with-lvasgn (regexp (str $#literal_at_start?) (regopt)) $_)} PATTERN - def literal_at_start?(regex_str) - # is this regexp 'literal' in the sense of only matching literal - # chars, rather than using metachars like `.` and `*` and so on? - # also, is it anchored at the start of the string? - # (tricky: \s, \d, and so on are metacharacters, but other characters - # escaped with a slash are just literals. LITERAL_REGEX takes all - # that into account.) - regex_str =~ /\A\\A(?:#{LITERAL_REGEX})+\z/ - end - def on_send(node) return unless redundant_regex?(node) @@ -47,7 +46,7 @@ def on_send(node) def autocorrect(node) redundant_regex?(node) do |receiver, regex_str| receiver, regex_str = regex_str, receiver if receiver.is_a?(String) - regex_str = regex_str[2..-1] # drop \A anchor + regex_str = drop_start_metacharacter(regex_str) regex_str = interpret_string_escapes(regex_str) lambda do |corrector| diff --git a/lib/rubocop/cop/performance_cops.rb b/lib/rubocop/cop/performance_cops.rb index 4dfc431e19..446357cff3 100644 --- a/lib/rubocop/cop/performance_cops.rb +++ b/lib/rubocop/cop/performance_cops.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require_relative 'mixin/regexp_metacharacter' + require_relative 'performance/bind_call' require_relative 'performance/caller' require_relative 'performance/case_when_splat' diff --git a/manual/cops_performance.md b/manual/cops_performance.md index 5a74a449d1..1b5bb34634 100644 --- a/manual/cops_performance.md +++ b/manual/cops_performance.md @@ -390,6 +390,13 @@ would suffice. 'abc'.match(/bc\Z/) /bc\Z/.match('abc') +'abc'.match?(/bc$/) +/bc$/.match?('abc') +'abc' =~ /bc$/ +/bc$/ =~ 'abc' +'abc'.match(/bc$/) +/bc$/.match('abc') + # good 'abc'.end_with?('bc') ``` @@ -860,6 +867,13 @@ This cop identifies unnecessary use of a regex where 'abc'.match(/\Aab/) /\Aab/.match('abc') +'abc'.match?(/^ab/) +/^ab/.match?('abc') +'abc' =~ /^ab/ +/^ab/ =~ 'abc' +'abc'.match(/^ab/) +/^ab/.match('abc') + # good 'abc'.start_with?('ab') ``` diff --git a/spec/rubocop/cop/performance/end_with_spec.rb b/spec/rubocop/cop/performance/end_with_spec.rb index 06b4c84708..bbefdbedae 100644 --- a/spec/rubocop/cop/performance/end_with_spec.rb +++ b/spec/rubocop/cop/performance/end_with_spec.rb @@ -14,6 +14,16 @@ expect(new_source).to eq "str.end_with?('abc')" end + it "autocorrects str#{method} /abc$/" do + new_source = autocorrect_source("str#{method} /abc$/") + expect(new_source).to eq "str.end_with?('abc')" + end + + it "autocorrects /abc$/#{method} str" do + new_source = autocorrect_source("/abc$/#{method} str") + expect(new_source).to eq "str.end_with?('abc')" + end + it "autocorrects str#{method} /\\n\\z/" do new_source = autocorrect_source("str#{method} /\\n\\z/") expect(new_source).to eq 'str.end_with?("\n")' diff --git a/spec/rubocop/cop/performance/start_with_spec.rb b/spec/rubocop/cop/performance/start_with_spec.rb index dbe909aa18..3c7f4dd82d 100644 --- a/spec/rubocop/cop/performance/start_with_spec.rb +++ b/spec/rubocop/cop/performance/start_with_spec.rb @@ -14,6 +14,16 @@ expect(new_source).to eq "str.start_with?('abc')" end + it "autocorrects str#{method} /^abc/" do + new_source = autocorrect_source("str#{method} /^abc/") + expect(new_source).to eq "str.start_with?('abc')" + end + + it "autocorrects /^abc/#{method} str" do + new_source = autocorrect_source("/^abc/#{method} str") + expect(new_source).to eq "str.start_with?('abc')" + end + # escapes like "\n" # note that "\b" is a literal backspace char in a double-quoted string... # but in a regex, it's an anchor on a word boundary