From 3dcf3e0403da54c808d62f5151c40527a39d9419 Mon Sep 17 00:00:00 2001 From: Edoardo Pirovano Date: Wed, 7 Jul 2021 17:50:23 +0100 Subject: [PATCH] New option: Stop on non-match after a match --- crates/core/app.rs | 21 ++++++++++++ crates/core/args.rs | 3 +- crates/searcher/src/searcher/core.rs | 48 ++++++++++++++++++++++------ crates/searcher/src/searcher/glue.rs | 3 ++ crates/searcher/src/searcher/mod.rs | 31 +++++++++++++++++- tests/feature.rs | 7 ++++ 6 files changed, 102 insertions(+), 11 deletions(-) diff --git a/crates/core/app.rs b/crates/core/app.rs index fb257d6cd3..f3d995674c 100644 --- a/crates/core/app.rs +++ b/crates/core/app.rs @@ -644,6 +644,7 @@ pub fn all_args_and_flags() -> Vec { flag_vimgrep(&mut args); flag_with_filename(&mut args); flag_word_regexp(&mut args); + flag_stop_on_nonmatch(&mut args); args } @@ -3107,3 +3108,23 @@ This overrides the --line-regexp flag. .overrides("line-regexp"); args.push(arg); } + +fn flag_stop_on_nonmatch(args: &mut Vec) { + const SHORT: &str = "After a successful match in a file, stop reading the file once a non-matching line is found."; + const LONG: &str = long!( + "\ +Enabling this option will cause ripgrep to stop reading a file once it encounters +a non-matching line after it has encountered a matching one. This is useful if it +is expected that all matches in a given file will be on sequential lines, for example +due to the files being sorted and the pattern being matched on being at the start +of the line. + +This overrides the -U/--multiline flag. +" + ); + let arg = RGArg::switch("stop-on-nonmatch") + .help(SHORT) + .long_help(LONG) + .overrides("multiline"); + args.push(arg); +} diff --git a/crates/core/args.rs b/crates/core/args.rs index 6623cbd54c..abe267fd45 100644 --- a/crates/core/args.rs +++ b/crates/core/args.rs @@ -840,7 +840,8 @@ impl ArgMatches { .before_context(ctx_before) .after_context(ctx_after) .passthru(self.is_present("passthru")) - .memory_map(self.mmap_choice(paths)); + .memory_map(self.mmap_choice(paths)) + .stop_on_nonmatch(self.is_present("stop-on-nonmatch")); match self.encoding()? { EncodingMode::Some(enc) => { builder.encoding(Some(enc)); diff --git a/crates/searcher/src/searcher/core.rs b/crates/searcher/src/searcher/core.rs index b6deda0187..c795f46329 100644 --- a/crates/searcher/src/searcher/core.rs +++ b/crates/searcher/src/searcher/core.rs @@ -10,6 +10,12 @@ use crate::sink::{ }; use grep_matcher::{LineMatchKind, Matcher}; +enum FastMatchResult { + Continue, + Stop, + SwitchToSlow, +} + #[derive(Debug)] pub struct Core<'s, M: 's, S> { config: &'s Config, @@ -25,6 +31,7 @@ pub struct Core<'s, M: 's, S> { last_line_visited: usize, after_context_left: usize, has_sunk: bool, + has_matched: bool, } impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { @@ -50,6 +57,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { last_line_visited: 0, after_context_left: 0, has_sunk: false, + has_matched: false, }; if !core.searcher.multi_line_with_matcher(&core.matcher) { if core.is_line_by_line_fast() { @@ -108,8 +116,13 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { } pub fn match_by_line(&mut self, buf: &[u8]) -> Result { + use FastMatchResult::*; if self.is_line_by_line_fast() { - self.match_by_line_fast(buf) + match self.match_by_line_fast(buf)? { + SwitchToSlow => self.match_by_line_slow(buf), + Continue => Ok(true), + Stop => Ok(false), + } } else { self.match_by_line_slow(buf) } @@ -270,7 +283,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { } }; self.set_pos(line.end()); - if matched != self.config.invert_match { + let success = matched != self.config.invert_match; + if success { + self.has_matched = true; if !self.before_context_by_line(buf, line.start())? { return Ok(false); } @@ -286,40 +301,51 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { return Ok(false); } } + if !success && self.config.stop_on_nonmatch && self.has_matched { + return Ok(false); + } } Ok(true) } - fn match_by_line_fast(&mut self, buf: &[u8]) -> Result { + fn match_by_line_fast( + &mut self, + buf: &[u8], + ) -> Result { debug_assert!(!self.config.passthru); + use FastMatchResult::*; while !buf[self.pos()..].is_empty() { + if self.has_matched && self.config.stop_on_nonmatch { + return Ok(SwitchToSlow); + } if self.config.invert_match { if !self.match_by_line_fast_invert(buf)? { - return Ok(false); + return Ok(Stop); } } else if let Some(line) = self.find_by_line_fast(buf)? { + self.has_matched = true; if self.config.max_context() > 0 { if !self.after_context_by_line(buf, line.start())? { - return Ok(false); + return Ok(Stop); } if !self.before_context_by_line(buf, line.start())? { - return Ok(false); + return Ok(Stop); } } self.set_pos(line.end()); if !self.sink_matched(buf, &line)? { - return Ok(false); + return Ok(Stop); } } else { break; } } if !self.after_context_by_line(buf, buf.len())? { - return Ok(false); + return Ok(Stop); } self.set_pos(buf.len()); - Ok(true) + Ok(Continue) } #[inline(always)] @@ -344,6 +370,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { if invert_match.is_empty() { return Ok(true); } + self.has_matched = true; if !self.after_context_by_line(buf, invert_match.start())? { return Ok(false); } @@ -574,6 +601,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { if self.config.passthru { return false; } + if self.config.stop_on_nonmatch && self.has_matched { + return false; + } if let Some(line_term) = self.matcher.line_terminator() { if line_term == self.config.line_term { return true; diff --git a/crates/searcher/src/searcher/glue.rs b/crates/searcher/src/searcher/glue.rs index 21a58aa4cf..bb3066be04 100644 --- a/crates/searcher/src/searcher/glue.rs +++ b/crates/searcher/src/searcher/glue.rs @@ -14,6 +14,7 @@ pub struct ReadByLine<'s, M, R, S> { config: &'s Config, core: Core<'s, M, S>, rdr: LineBufferReader<'s, R>, + stop_on_nonmatch: bool, } impl<'s, M, R, S> ReadByLine<'s, M, R, S> @@ -27,6 +28,7 @@ where matcher: M, read_from: LineBufferReader<'s, R>, write_to: S, + stop_on_nonmatch: bool, ) -> ReadByLine<'s, M, R, S> { debug_assert!(!searcher.multi_line_with_matcher(&matcher)); @@ -34,6 +36,7 @@ where config: &searcher.config, core: Core::new(searcher, matcher, write_to, false), rdr: read_from, + stop_on_nonmatch: stop_on_nonmatch, } } diff --git a/crates/searcher/src/searcher/mod.rs b/crates/searcher/src/searcher/mod.rs index 3bd939bbef..1eaee283e8 100644 --- a/crates/searcher/src/searcher/mod.rs +++ b/crates/searcher/src/searcher/mod.rs @@ -173,6 +173,8 @@ pub struct Config { encoding: Option, /// Whether to do automatic transcoding based on a BOM or not. bom_sniffing: bool, + /// Whether to stop searching when a non-matching line is found after a match. + stop_on_nonmatch: bool, } impl Default for Config { @@ -190,6 +192,7 @@ impl Default for Config { multi_line: false, encoding: None, bom_sniffing: true, + stop_on_nonmatch: false, } } } @@ -555,6 +558,18 @@ impl SearcherBuilder { self.config.bom_sniffing = yes; self } + + /// Stop searching a file when a non-matching line is found after a matching one. + /// + /// This is useful for searching sorted files where it is expected that all + /// the matches will be on adjacent lines. + pub fn stop_on_nonmatch( + &mut self, + stop_on_nonmatch: bool, + ) -> &mut SearcherBuilder { + self.config.stop_on_nonmatch = stop_on_nonmatch; + self + } } /// A searcher executes searches over a haystack and writes results to a caller @@ -732,7 +747,14 @@ impl Searcher { let mut line_buffer = self.line_buffer.borrow_mut(); let rdr = LineBufferReader::new(decoder, &mut *line_buffer); log::trace!("generic reader: searching via roll buffer strategy"); - ReadByLine::new(self, matcher, rdr, write_to).run() + ReadByLine::new( + self, + matcher, + rdr, + write_to, + self.stop_on_nonmatch(), + ) + .run() } } @@ -838,6 +860,13 @@ impl Searcher { self.config.multi_line } + /// Returns true if and only if this searcher is configured to stop + /// when in finds a non-matching line after a matching one. + #[inline] + pub fn stop_on_nonmatch(&self) -> bool { + self.config.stop_on_nonmatch + } + /// Returns true if and only if this searcher will choose a multi-line /// strategy given the provided matcher. /// diff --git a/tests/feature.rs b/tests/feature.rs index 36cbad73cb..bdf9a14031 100644 --- a/tests/feature.rs +++ b/tests/feature.rs @@ -975,3 +975,10 @@ rgtest!(no_unicode, |dir: Dir, mut cmd: TestCommand| { dir.create("test", "δ"); cmd.arg("-i").arg("--no-unicode").arg("Δ").assert_err(); }); + +// See: https://github.com/BurntSushi/ripgrep/issues/1790 +rgtest!(stop_on_nonmatch, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "line1\nline2\nline3\nline4\nline5"); + cmd.args(&["--stop-on-nonmatch", "[235]"]); + eqnice!("test:line2\ntest:line3\n", cmd.stdout()); +});