helix-editor · archseer · Jun 9, 2021 · Jun 9, 2021 · Jun 9, 2021 · Jun 9, 2021
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml
@@ -26,6 +26,7 @@ tree-sitter = "0.19"
 once_cell = "1.8"
 arc-swap = "1"
 regex = "1"
+regex-automata = "0.1"
 
 serde = { version = "1.0", features = ["derive"] }
 toml = "0.5"

diff --git a/helix-core/src/search.rs b/helix-core/src/search.rs
@@ -43,3 +43,183 @@ pub fn find_nth_prev(text: RopeSlice, ch: char, mut pos: usize, n: usize) -> Opt
 
     Some(pos)
 }
+
+use regex_automata::{dense, DenseDFA, Error as RegexError, DFA};
+use std::ops::Range;
+
+#[derive(Debug)]
+pub struct Searcher {
+    /// Locate end of match searching right.
+    right_fdfa: DenseDFA<Vec<usize>, usize>,
+    /// Locate start of match searching right.
+    right_rdfa: DenseDFA<Vec<usize>, usize>,
+
+    /// Locate start of match searching left.
+    left_fdfa: DenseDFA<Vec<usize>, usize>,
+    /// Locate end of match searching left.
+    left_rdfa: DenseDFA<Vec<usize>, usize>,
+}
+
+impl Searcher {
+    pub fn new(pattern: &str) -> Result<Searcher, RegexError> {
+        // Check case info for smart case
+        let has_uppercase = pattern.chars().any(|c| c.is_uppercase());
+
+        // Create Regex DFAs for all search directions.
+        let mut builder = dense::Builder::new();
+        let builder = builder.case_insensitive(!has_uppercase).unicode(false);
+
+        let left_fdfa = builder.clone().reverse(true).build(pattern)?;
+        let left_rdfa = builder
+            .clone()
+            .anchored(true)
+            .longest_match(true)
+            .build(pattern)?;
+
+        let right_fdfa = builder.clone().build(pattern)?;
+        let right_rdfa = builder
+            .anchored(true)
+            .longest_match(true)
+            .reverse(true)
+            .build(pattern)?;
+
+        Ok(Searcher {
+            right_fdfa,
+            right_rdfa,
+            left_fdfa,
+            left_rdfa,
+        })
+    }
+    pub fn search_prev(&self, text: RopeSlice, offset: usize) -> Option<Range<usize>> {
+        let text = text.slice(..offset);
+        let start = self.rfind(text, &self.left_fdfa)?;
+        let end = self.find(text.slice(start..), &self.left_rdfa)?;
+
+        Some(start..start + end)
+    }
+
+    pub fn search_next(&self, text: RopeSlice, offset: usize) -> Option<Range<usize>> {
+        let text = text.slice(offset..);
+        let end = self.find(text, &self.right_fdfa)?;
+        let start = self.rfind(text.slice(..end), &self.right_rdfa)?;
+
+        Some(offset + start..offset + end)
+    }
+
+    /// Returns the end offset of the longest match. If no match exists, then None is returned.
+    /// NOTE: based on DFA::find_at
+    fn find(&self, text: RopeSlice, dfa: &impl DFA) -> Option<usize> {
+        // TODO: check this inside main search
+        // if dfa.is_anchored() && start > 0 {
+        //     return None;
+        // }
+
+        let mut state = dfa.start_state();
+        let mut last_match = if dfa.is_dead_state(state) {
+            return None;
+        } else if dfa.is_match_state(state) {
+            Some(0)
+        } else {
+            None
+        };
+
+        let mut chunk_byte_offset = 0;
+
+        for chunk in text.chunks() {
+            for (i, &b) in chunk.as_bytes().iter().enumerate() {
+                state = unsafe { dfa.next_state_unchecked(state, b) };
+                if dfa.is_match_or_dead_state(state) {
+                    if dfa.is_dead_state(state) {
+                        return last_match;
+                    }
+                    last_match = Some(chunk_byte_offset + i + 1);
+                }
+            }
+            chunk_byte_offset += chunk.len();
+        }
+
+        last_match
+    }
+
+    /// Returns the start offset of the longest match in reverse, by searching from the end of the
+    /// input towards the start of the input. If no match exists, then None is returned. In other
+    /// words, this has the same match semantics as find, but in reverse.
+    ///
+    /// NOTE: based on DFA::rfind_at
+    fn rfind(&self, text: RopeSlice, dfa: &impl DFA) -> Option<usize> {
+        // if dfa.is_anchored() && start < bytes.len() {
+        //     return None;
+        // }
+
+        let mut state = dfa.start_state();
+        let mut last_match = if dfa.is_dead_state(state) {
+            return None;
+        } else if dfa.is_match_state(state) {
+            Some(text.len_bytes())
+        } else {
+            None
+        };
+
+        // This is basically chunks().rev()
+        let (mut chunks, mut chunk_byte_offset, _, _) = text.chunks_at_byte(text.len_bytes());
+
+        while let Some(chunk) = chunks.prev() {
+            for (i, &b) in chunk.as_bytes().iter().rev().enumerate() {
+                state = unsafe { dfa.next_state_unchecked(state, b) };
+                if dfa.is_match_or_dead_state(state) {
+                    if dfa.is_dead_state(state) {
+                        return last_match;
+                    }
+                    last_match = Some(chunk_byte_offset - i - 1);
+                }
+            }
+            chunk_byte_offset -= chunk.len();
+        }
+        last_match
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_search_next() {
+        use crate::Rope;
+        let text = Rope::from("hello world!");
+
+        let searcher = Searcher::new(r"\w+").unwrap();
+
+        let result = searcher.search_next(text.slice(..), 0).unwrap();
+        let fragment = text.slice(result.start..result.end);
+        assert_eq!("hello", fragment);
+
+        let result = searcher.search_next(text.slice(..), result.end).unwrap();
+        let fragment = text.slice(result.start..result.end);
+        assert_eq!("world", fragment);
+
+        let result = searcher.search_next(text.slice(..), result.end);
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn test_search_prev() {
+        use crate::Rope;
+        let text = Rope::from("hello world!");
+
+        let searcher = Searcher::new(r"\w+").unwrap();
+
+        let result = searcher
+            .search_prev(text.slice(..), text.len_bytes())
+            .unwrap();
+        let fragment = text.slice(result.start..result.end);
+        assert_eq!("world", fragment);
+
+        let result = searcher.search_prev(text.slice(..), result.start).unwrap();
+        let fragment = text.slice(result.start..result.end);
+        assert_eq!("hello", fragment);
+
+        let result = searcher.search_prev(text.slice(..), result.start);
+        assert!(result.is_none());
+    }
+}
diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs
@@ -203,7 +203,9 @@ impl Command {
         split_selection_on_newline, "Split selection on newlines",
         search, "Search for regex pattern",
         search_next, "Select next search match",
+        search_prev, "Select previous search match",
         extend_search_next, "Add next search match to selection",
+        extend_search_prev, "Add previous search match to selection",
         search_selection, "Use current selection as search pattern",
         extend_line, "Select current line, if already selected, extend to next line",
         extend_to_line_bounds, "Extend selection to line bounds (line-wise selection)",
@@ -1018,25 +1020,91 @@ fn split_selection_on_newline(cx: &mut Context) {
     doc.set_selection(view.id, selection);
 }
 
-fn search_impl(doc: &mut Document, view: &mut View, contents: &str, regex: &Regex, extend: bool) {
+// -> we always search from after the cursor.head
+// TODO: be able to use selection as search query (*/alt *)
+
+use helix_core::search::Searcher;
+
+pub fn search(cx: &mut Context) {
+    let (view, doc) = current!(cx.editor);
+
+    // TODO: could probably share with select_on_matches?
+
+    let view_id = view.id;
+    let snapshot = doc.selection(view_id).clone();
+
+    let prompt = Prompt::new(
+        "search:".to_string(),
-        "search:".to_string(),
+        "search: ".to_string(),
-        "search:".to_string(),
+        "search: ".to_string(),
+        Some('\\'),
+        |_input: &str| Vec::new(), // this is fine because Vec::new() doesn't allocate
+        move |cx: &mut compositor::Context, input: &str, event: PromptEvent| {
+            match event {
+                PromptEvent::Abort => {
+                    let (view, doc) = current!(cx.editor);
+                    doc.set_selection(view.id, snapshot.clone());
+                }
+                PromptEvent::Validate => {
+                    // TODO: push_jump to store selection just before jump
+                }
+                PromptEvent::Update => {
+                    // skip empty input, TODO: trigger default
+                    if input.is_empty() {
+                        return;
+                    }
+
+                    match Searcher::new(input) {
+                        Ok(searcher) => {
+                            let (view, doc) = current!(cx.editor);
+                            // revert state to what it was before the last update
+                            // TODO: also revert text
+                            doc.set_selection(view.id, snapshot.clone());
+
+                            cx.editor.search = Some(searcher);
+                            _search(cx.editor, Direction::Forward, false);
+
+                            // TODO: only store on enter (accept), not update
+                            // cx.editor.registers.write('\\', vec![input.to_string()]);
+                        }
+                        Err(_err) => (), // TODO: mark command line as error
+                    }
+                }
+            }
+        },
+    );
+
+    cx.push_layer(Box::new(prompt));
+}
+
+pub fn _search(editor: &mut Editor, direction: Direction, extend: bool) {
+    let (view, doc) = current!(editor);
+    let text = doc.text().clone(); // need to clone or we run into borrowing issues, but it's a cheap clone
+    let cursor = doc.selection(view.id).primary().cursor(text.slice(..));
+    let start = text.char_to_byte(cursor);
+
+    let mat = if let Some(searcher) = &editor.search {
+        // use find_at to find the next match after the cursor, loop around the end
+        // Careful, `Regex` uses `bytes` as offsets, not character indices!
+        match direction {
+            Direction::Backward => searcher
+                .search_prev(text.slice(..), start)
+                .or_else(|| searcher.search_prev(text.slice(..), text.len_bytes())),
+            Direction::Forward => searcher
+                .search_next(text.slice(..), start)
+                .or_else(|| searcher.search_next(text.slice(..), 0)),
+        }
+    } else {
+        None
+    };
+
+    // refetch to avoid borrowing problems
+    let (view, doc) = current!(editor);
     let text = doc.text().slice(..);
     let selection = doc.selection(view.id);
 
-    // Get the right side of the primary block cursor.
-    let start = text.char_to_byte(graphemes::next_grapheme_boundary(
-        text,
-        selection.primary().cursor(text),
-    ));
-
-    // use find_at to find the next match after the cursor, loop around the end
-    // Careful, `Regex` uses `bytes` as offsets, not character indices!
-    let mat = regex
-        .find_at(contents, start)
-        .or_else(|| regex.find(contents));
     // TODO: message on wraparound
     if let Some(mat) = mat {
-        let start = text.byte_to_char(mat.start());
-        let end = text.byte_to_char(mat.end());
+        let start = text.byte_to_char(mat.start);
+        let end = text.byte_to_char(mat.end);
 
         if end == 0 {
             // skip empty matches that don't make sense
@@ -1052,48 +1120,22 @@ fn search_impl(doc: &mut Document, view: &mut View, contents: &str, regex: &Rege
         doc.set_selection(view.id, selection);
         align_view(doc, view, Align::Center);
     };
+    view.ensure_cursor_in_view(doc);
 }
 
-// TODO: use one function for search vs extend
-fn search(cx: &mut Context) {
-    let (_, doc) = current!(cx.editor);
-
-    // TODO: could probably share with select_on_matches?
-
-    // HAXX: sadly we can't avoid allocating a single string for the whole buffer since we can't
-    // feed chunks into the regex yet
-    let contents = doc.text().slice(..).to_string();
-
-    let prompt = ui::regex_prompt(
-        cx,
-        "search:".to_string(),
-        move |view, doc, registers, regex| {
-            search_impl(doc, view, &contents, &regex, false);
-            // TODO: only store on enter (accept), not update
-            registers.write('\\', vec![regex.as_str().to_string()]);
-        },
-    );
-
-    cx.push_layer(Box::new(prompt));
+pub fn search_next(cx: &mut Context) {
+    _search(cx.editor, Direction::Forward, false);
 }
 
-fn search_next_impl(cx: &mut Context, extend: bool) {
-    let (view, doc) = current!(cx.editor);
-    let registers = &mut cx.editor.registers;
-    if let Some(query) = registers.read('\\') {
-        let query = query.first().unwrap();
-        let contents = doc.text().slice(..).to_string();
-        let regex = Regex::new(query).unwrap();
-        search_impl(doc, view, &contents, &regex, extend);
-    }
+pub fn extend_search_next(cx: &mut Context) {
+    _search(cx.editor, Direction::Forward, true);
 }
-
-fn search_next(cx: &mut Context) {
-    search_next_impl(cx, false);
+pub fn search_prev(cx: &mut Context) {
+    _search(cx.editor, Direction::Backward, false);
 }
 
-fn extend_search_next(cx: &mut Context) {
-    search_next_impl(cx, true);
+pub fn extend_search_prev(cx: &mut Context) {
+    _search(cx.editor, Direction::Backward, true);
 }
 
 fn search_selection(cx: &mut Context) {
@@ -1102,7 +1144,8 @@ fn search_selection(cx: &mut Context) {
     let query = doc.selection(view.id).primary().fragment(contents);
     let regex = regex::escape(&query);
     cx.editor.registers.write('\\', vec![regex]);
-    search_next(cx);
+    let msg = format!("register '{}' set to '{}'", '\\', query);
+    cx.editor.set_status(msg);
 }
 
 fn extend_line(cx: &mut Context) {