Skip to content

Commit

Permalink
New option: Stop on non-match after a match
Browse files Browse the repository at this point in the history
  • Loading branch information
edoardopirovano committed Jul 8, 2021
1 parent 9eddb71 commit ee576d3
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 11 deletions.
1 change: 1 addition & 0 deletions complete/_rg
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ _rg() {
'(-q --quiet)'{-q,--quiet}'[suppress normal output]'
'--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)'
'*'{-u,--unrestricted}'[reduce level of "smart" searching]'
'--stop-on-nonmatch[stop on first non-matching line after a matching one]'

+ operand # Operands
'(--files --type-list file regexp)1: :_guard "^-*" pattern'
Expand Down
21 changes: 21 additions & 0 deletions crates/core/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_vimgrep(&mut args);
flag_with_filename(&mut args);
flag_word_regexp(&mut args);
flag_stop_on_nonmatch(&mut args);
args
}

Expand Down Expand Up @@ -3107,3 +3108,23 @@ This overrides the --line-regexp flag.
.overrides("line-regexp");
args.push(arg);
}

fn flag_stop_on_nonmatch(args: &mut Vec<RGArg>) {
const SHORT: &str = "After a successful match in a file, stop reading the file once a non-matching line is found.";
const LONG: &str = long!(
"\
Enabling this option will cause ripgrep to stop reading a file once it encounters
a non-matching line after it has encountered a matching one. This is useful if it
is expected that all matches in a given file will be on sequential lines, for example
due to the files being sorted and the pattern being matched on being at the start
of the line.
This overrides the -U/--multiline flag.
"
);
let arg = RGArg::switch("stop-on-nonmatch")
.help(SHORT)
.long_help(LONG)
.overrides("multiline");
args.push(arg);
}
3 changes: 2 additions & 1 deletion crates/core/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,8 @@ impl ArgMatches {
.before_context(ctx_before)
.after_context(ctx_after)
.passthru(self.is_present("passthru"))
.memory_map(self.mmap_choice(paths));
.memory_map(self.mmap_choice(paths))
.stop_on_nonmatch(self.is_present("stop-on-nonmatch"));
match self.encoding()? {
EncodingMode::Some(enc) => {
builder.encoding(Some(enc));
Expand Down
48 changes: 39 additions & 9 deletions crates/searcher/src/searcher/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ use crate::sink::{
};
use grep_matcher::{LineMatchKind, Matcher};

enum FastMatchResult {
Continue,
Stop,
SwitchToSlow,
}

#[derive(Debug)]
pub struct Core<'s, M: 's, S> {
config: &'s Config,
Expand All @@ -25,6 +31,7 @@ pub struct Core<'s, M: 's, S> {
last_line_visited: usize,
after_context_left: usize,
has_sunk: bool,
has_matched: bool,
}

impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Expand All @@ -50,6 +57,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
last_line_visited: 0,
after_context_left: 0,
has_sunk: false,
has_matched: false,
};
if !core.searcher.multi_line_with_matcher(&core.matcher) {
if core.is_line_by_line_fast() {
Expand Down Expand Up @@ -108,8 +116,13 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}

pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
use FastMatchResult::*;
if self.is_line_by_line_fast() {
self.match_by_line_fast(buf)
match self.match_by_line_fast(buf)? {
SwitchToSlow => self.match_by_line_slow(buf),
Continue => Ok(true),
Stop => Ok(false),
}
} else {
self.match_by_line_slow(buf)
}
Expand Down Expand Up @@ -270,7 +283,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
};
self.set_pos(line.end());
if matched != self.config.invert_match {
let success = matched != self.config.invert_match;
if success {
self.has_matched = true;
if !self.before_context_by_line(buf, line.start())? {
return Ok(false);
}
Expand All @@ -286,40 +301,51 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
return Ok(false);
}
}
if !success && self.config.stop_on_nonmatch && self.has_matched {
return Ok(false);
}
}
Ok(true)
}

fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
fn match_by_line_fast(
&mut self,
buf: &[u8],
) -> Result<FastMatchResult, S::Error> {
debug_assert!(!self.config.passthru);
use FastMatchResult::*;

while !buf[self.pos()..].is_empty() {
if self.has_matched && self.config.stop_on_nonmatch {
return Ok(SwitchToSlow);
}
if self.config.invert_match {
if !self.match_by_line_fast_invert(buf)? {
return Ok(false);
return Ok(Stop);
}
} else if let Some(line) = self.find_by_line_fast(buf)? {
self.has_matched = true;
if self.config.max_context() > 0 {
if !self.after_context_by_line(buf, line.start())? {
return Ok(false);
return Ok(Stop);
}
if !self.before_context_by_line(buf, line.start())? {
return Ok(false);
return Ok(Stop);
}
}
self.set_pos(line.end());
if !self.sink_matched(buf, &line)? {
return Ok(false);
return Ok(Stop);
}
} else {
break;
}
}
if !self.after_context_by_line(buf, buf.len())? {
return Ok(false);
return Ok(Stop);
}
self.set_pos(buf.len());
Ok(true)
Ok(Continue)
}

#[inline(always)]
Expand All @@ -344,6 +370,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if invert_match.is_empty() {
return Ok(true);
}
self.has_matched = true;
if !self.after_context_by_line(buf, invert_match.start())? {
return Ok(false);
}
Expand Down Expand Up @@ -574,6 +601,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if self.config.passthru {
return false;
}
if self.config.stop_on_nonmatch && self.has_matched {
return false;
}
if let Some(line_term) = self.matcher.line_terminator() {
if line_term == self.config.line_term {
return true;
Expand Down
3 changes: 3 additions & 0 deletions crates/searcher/src/searcher/glue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub struct ReadByLine<'s, M, R, S> {
config: &'s Config,
core: Core<'s, M, S>,
rdr: LineBufferReader<'s, R>,
stop_on_nonmatch: bool,
}

impl<'s, M, R, S> ReadByLine<'s, M, R, S>
Expand All @@ -27,13 +28,15 @@ where
matcher: M,
read_from: LineBufferReader<'s, R>,
write_to: S,
stop_on_nonmatch: bool,
) -> ReadByLine<'s, M, R, S> {
debug_assert!(!searcher.multi_line_with_matcher(&matcher));

ReadByLine {
config: &searcher.config,
core: Core::new(searcher, matcher, write_to, false),
rdr: read_from,
stop_on_nonmatch: stop_on_nonmatch,
}
}

Expand Down
31 changes: 30 additions & 1 deletion crates/searcher/src/searcher/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ pub struct Config {
encoding: Option<Encoding>,
/// Whether to do automatic transcoding based on a BOM or not.
bom_sniffing: bool,
/// Whether to stop searching when a non-matching line is found after a match.
stop_on_nonmatch: bool,
}

impl Default for Config {
Expand All @@ -190,6 +192,7 @@ impl Default for Config {
multi_line: false,
encoding: None,
bom_sniffing: true,
stop_on_nonmatch: false,
}
}
}
Expand Down Expand Up @@ -555,6 +558,18 @@ impl SearcherBuilder {
self.config.bom_sniffing = yes;
self
}

/// Stop searching a file when a non-matching line is found after a matching one.
///
/// This is useful for searching sorted files where it is expected that all
/// the matches will be on adjacent lines.
pub fn stop_on_nonmatch(
&mut self,
stop_on_nonmatch: bool,
) -> &mut SearcherBuilder {
self.config.stop_on_nonmatch = stop_on_nonmatch;
self
}
}

/// A searcher executes searches over a haystack and writes results to a caller
Expand Down Expand Up @@ -732,7 +747,14 @@ impl Searcher {
let mut line_buffer = self.line_buffer.borrow_mut();
let rdr = LineBufferReader::new(decoder, &mut *line_buffer);
log::trace!("generic reader: searching via roll buffer strategy");
ReadByLine::new(self, matcher, rdr, write_to).run()
ReadByLine::new(
self,
matcher,
rdr,
write_to,
self.stop_on_nonmatch(),
)
.run()
}
}

Expand Down Expand Up @@ -838,6 +860,13 @@ impl Searcher {
self.config.multi_line
}

/// Returns true if and only if this searcher is configured to stop
/// when in finds a non-matching line after a matching one.
#[inline]
pub fn stop_on_nonmatch(&self) -> bool {
self.config.stop_on_nonmatch
}

/// Returns true if and only if this searcher will choose a multi-line
/// strategy given the provided matcher.
///
Expand Down
7 changes: 7 additions & 0 deletions tests/feature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -975,3 +975,10 @@ rgtest!(no_unicode, |dir: Dir, mut cmd: TestCommand| {
dir.create("test", "δ");
cmd.arg("-i").arg("--no-unicode").arg("Δ").assert_err();
});

// See: https://github.com/BurntSushi/ripgrep/issues/1790
rgtest!(stop_on_nonmatch, |dir: Dir, mut cmd: TestCommand| {
dir.create("test", "line1\nline2\nline3\nline4\nline5");
cmd.args(&["--stop-on-nonmatch", "[235]"]);
eqnice!("test:line2\ntest:line3\n", cmd.stdout());
});

0 comments on commit ee576d3

Please sign in to comment.