From 8ac2d50feccc27c351968ed9a886c0a35ea86a29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bla=C5=BE=20Hrastnik?= Date: Wed, 9 Jun 2021 13:22:55 +0900 Subject: [PATCH 1/5] Rebuild search by using regex-automata --- Cargo.lock | 10 +++ helix-core/Cargo.toml | 1 + helix-core/src/search.rs | 177 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 188 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 6aa9830b9d6f..df5f5093928c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -316,6 +316,7 @@ dependencies = [ "once_cell", "quickcheck", "regex", + "regex-automata", "ropey", "serde", "similar", @@ -757,6 +758,15 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax", +] + [[package]] name = "regex-syntax" version = "0.6.25" diff --git a/helix-core/Cargo.toml b/helix-core/Cargo.toml index 4316dc2c49f1..d117582bbe33 100644 --- a/helix-core/Cargo.toml +++ b/helix-core/Cargo.toml @@ -26,6 +26,7 @@ tree-sitter = "0.19" once_cell = "1.8" arc-swap = "1" regex = "1" +regex-automata = "0.1" serde = { version = "1.0", features = ["derive"] } toml = "0.5" diff --git a/helix-core/src/search.rs b/helix-core/src/search.rs index 243ac227aec2..3a0ed88f9b82 100644 --- a/helix-core/src/search.rs +++ b/helix-core/src/search.rs @@ -43,3 +43,180 @@ pub fn find_nth_prev(text: RopeSlice, ch: char, mut pos: usize, n: usize) -> Opt Some(pos) } + +use crate::movement::Direction; +use regex_automata::{dense, DenseDFA, Error as RegexError, DFA}; +use std::ops::Range; + +pub struct Searcher { + /// Locate end of match searching right. + right_fdfa: DenseDFA, usize>, + /// Locate start of match searching right. + right_rdfa: DenseDFA, usize>, + + /// Locate start of match searching left. + left_fdfa: DenseDFA, usize>, + /// Locate end of match searching left. + left_rdfa: DenseDFA, usize>, +} + +impl Searcher { + pub fn new(pattern: &str) -> Result { + // Check case info for smart case + let has_uppercase = pattern.chars().any(|c| c.is_uppercase()); + + // Create Regex DFAs for all search directions. + let mut builder = dense::Builder::new(); + let builder = builder.case_insensitive(!has_uppercase); + + let left_fdfa = builder.clone().reverse(true).build(pattern)?; + let left_rdfa = builder + .clone() + .anchored(true) + .longest_match(true) + .build(pattern)?; + + let right_fdfa = builder.clone().build(pattern)?; + let right_rdfa = builder + .anchored(true) + .longest_match(true) + .reverse(true) + .build(pattern)?; + + Ok(Searcher { + right_fdfa, + right_rdfa, + left_fdfa, + left_rdfa, + }) + } + pub fn search_prev(&self, text: RopeSlice, offset: usize) -> Option> { + let text = text.slice(..offset); + let start = self.rfind(text, &self.left_fdfa)?; + let end = self.find(text.slice(start..), &self.left_rdfa)?; + + Some(start..start + end) + } + + pub fn search_next(&self, text: RopeSlice, offset: usize) -> Option> { + let text = text.slice(offset..); + let end = self.find(text, &self.right_fdfa)?; + let start = self.rfind(text.slice(..end), &self.right_rdfa)?; + + Some(offset + start..offset + end) + } + + /// Returns the end offset of the longest match. If no match exists, then None is returned. + /// NOTE: based on DFA::find_at + fn find(&self, text: RopeSlice, dfa: &impl DFA) -> Option { + // TOOD: needs to change to rfind condition if searching reverse + // TODO: check this inside main search + // if dfa.is_anchored() && start > 0 { + // return None; + // } + + let mut state = dfa.start_state(); + let mut last_match = if dfa.is_dead_state(state) { + return None; + } else if dfa.is_match_state(state) { + Some(0) + } else { + None + }; + + for chunk in text.chunks() { + for (i, &b) in chunk.as_bytes().iter().enumerate() { + state = unsafe { dfa.next_state_unchecked(state, b) }; + if dfa.is_match_or_dead_state(state) { + if dfa.is_dead_state(state) { + return last_match; + } + last_match = Some(i + 1); + } + } + } + + last_match + } + + /// Returns the start offset of the longest match in reverse, by searching from the end of the + /// input towards the start of the input. If no match exists, then None is returned. In other + /// words, this has the same match semantics as find, but in reverse. + /// + /// NOTE: based on DFA::rfind_at + fn rfind(&self, text: RopeSlice, dfa: &impl DFA) -> Option { + // if dfa.is_anchored() && start < bytes.len() { + // return None; + // } + + let mut state = dfa.start_state(); + let mut last_match = if dfa.is_dead_state(state) { + return None; + } else if dfa.is_match_state(state) { + Some(text.len_bytes()) + } else { + None + }; + + // This is basically chunks().rev() + let (mut chunks, _, _, _) = text.chunks_at_byte(text.len_bytes()); + + while let Some(chunk) = chunks.prev() { + for (i, &b) in chunk.as_bytes().iter().enumerate().rev() { + state = unsafe { dfa.next_state_unchecked(state, b) }; + if dfa.is_match_or_dead_state(state) { + if dfa.is_dead_state(state) { + return last_match; + } + last_match = Some(i); + } + } + } + last_match + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_search_next() { + use crate::Rope; + let text = Rope::from("hello world!"); + + let searcher = Searcher::new(r"\w+").unwrap(); + + let result = searcher.search_next(text.slice(..), 0).unwrap(); + let fragment = text.slice(result.start..result.end); + assert_eq!("hello", fragment); + + let result = searcher.search_next(text.slice(..), result.end).unwrap(); + let fragment = text.slice(result.start..result.end); + assert_eq!("world", fragment); + + let result = searcher.search_next(text.slice(..), result.end); + assert!(result.is_none()); + } + + #[test] + fn test_search_prev() { + use crate::Rope; + let text = Rope::from("hello world!"); + + let searcher = Searcher::new(r"\w+").unwrap(); + + let result = searcher + .search_prev(text.slice(..), text.len_bytes()) + .unwrap(); + let fragment = text.slice(result.start..result.end); + assert_eq!("world", fragment); + + let result = searcher.search_prev(text.slice(..), result.start).unwrap(); + let fragment = text.slice(result.start..result.end); + assert_eq!("hello", fragment); + + let result = searcher.search_prev(text.slice(..), result.start); + assert!(result.is_none()); + } +} From b9ff4bde2ab9f8e956f9d67e1f408f34a473dde7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bla=C5=BE=20Hrastnik?= Date: Wed, 9 Jun 2021 15:46:13 +0900 Subject: [PATCH 2/5] Only recalculate resize during rendering, this stops flashing on resize --- helix-term/src/ui/editor.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index a2b169ed4198..1f10c8dc0c09 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -698,8 +698,8 @@ impl Component for EditorView { fn handle_event(&mut self, event: Event, cx: &mut Context) -> EventResult { match event { Event::Resize(width, height) => { - // HAXX: offset the render area height by 1 to account for prompt/commandline - cx.editor.resize(Rect::new(0, 0, width, height - 1)); + // Ignore this event, we handle resizing just before rendering to screen. + // Handling it here but not re-rendering will cause flashing EventResult::Consumed(None) } Event::Key(key) => { From e3ff5de6a5fcd9f732715ea0081a6b54c632355f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bla=C5=BE=20Hrastnik?= Date: Wed, 9 Jun 2021 18:24:23 +0900 Subject: [PATCH 3/5] Fix searching through several chunks --- helix-core/src/search.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/helix-core/src/search.rs b/helix-core/src/search.rs index 3a0ed88f9b82..b8157d7434cb 100644 --- a/helix-core/src/search.rs +++ b/helix-core/src/search.rs @@ -109,7 +109,6 @@ impl Searcher { /// Returns the end offset of the longest match. If no match exists, then None is returned. /// NOTE: based on DFA::find_at fn find(&self, text: RopeSlice, dfa: &impl DFA) -> Option { - // TOOD: needs to change to rfind condition if searching reverse // TODO: check this inside main search // if dfa.is_anchored() && start > 0 { // return None; @@ -124,6 +123,8 @@ impl Searcher { None }; + let mut chunk_byte_offset = 0; + for chunk in text.chunks() { for (i, &b) in chunk.as_bytes().iter().enumerate() { state = unsafe { dfa.next_state_unchecked(state, b) }; @@ -131,9 +132,10 @@ impl Searcher { if dfa.is_dead_state(state) { return last_match; } - last_match = Some(i + 1); + last_match = Some(chunk_byte_offset + i + 1); } } + chunk_byte_offset += chunk.len(); } last_match @@ -159,18 +161,19 @@ impl Searcher { }; // This is basically chunks().rev() - let (mut chunks, _, _, _) = text.chunks_at_byte(text.len_bytes()); + let (mut chunks, mut chunk_byte_offset, _, _) = text.chunks_at_byte(text.len_bytes()); while let Some(chunk) = chunks.prev() { - for (i, &b) in chunk.as_bytes().iter().enumerate().rev() { + for (i, &b) in chunk.as_bytes().iter().rev().enumerate() { state = unsafe { dfa.next_state_unchecked(state, b) }; if dfa.is_match_or_dead_state(state) { if dfa.is_dead_state(state) { return last_match; } - last_match = Some(i); + last_match = Some(chunk_byte_offset - i - 1); } } + chunk_byte_offset -= chunk.len(); } last_match } From ea7aad99b863cd23ce5988427573afc605906b39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bla=C5=BE=20Hrastnik?= Date: Wed, 9 Jun 2021 18:24:42 +0900 Subject: [PATCH 4/5] Use searcher for search --- helix-core/src/search.rs | 2 +- helix-term/src/commands.rs | 143 +++++++++++++++++++++++------------- helix-term/src/keymap.rs | 6 +- helix-term/src/ui/editor.rs | 2 +- helix-view/src/editor.rs | 3 + 5 files changed, 102 insertions(+), 54 deletions(-) diff --git a/helix-core/src/search.rs b/helix-core/src/search.rs index b8157d7434cb..7faa21df09ac 100644 --- a/helix-core/src/search.rs +++ b/helix-core/src/search.rs @@ -44,10 +44,10 @@ pub fn find_nth_prev(text: RopeSlice, ch: char, mut pos: usize, n: usize) -> Opt Some(pos) } -use crate::movement::Direction; use regex_automata::{dense, DenseDFA, Error as RegexError, DFA}; use std::ops::Range; +#[derive(Debug)] pub struct Searcher { /// Locate end of match searching right. right_fdfa: DenseDFA, usize>, diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 7403f5b2616c..a94370ee7807 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -203,7 +203,9 @@ impl Command { split_selection_on_newline, "Split selection on newlines", search, "Search for regex pattern", search_next, "Select next search match", + search_prev, "Select previous search match", extend_search_next, "Add next search match to selection", + extend_search_prev, "Add previous search match to selection", search_selection, "Use current selection as search pattern", extend_line, "Select current line, if already selected, extend to next line", extend_to_line_bounds, "Extend selection to line bounds (line-wise selection)", @@ -1018,25 +1020,91 @@ fn split_selection_on_newline(cx: &mut Context) { doc.set_selection(view.id, selection); } -fn search_impl(doc: &mut Document, view: &mut View, contents: &str, regex: &Regex, extend: bool) { +// -> we always search from after the cursor.head +// TODO: be able to use selection as search query (*/alt *) + +use helix_core::search::Searcher; + +pub fn search(cx: &mut Context) { + let (view, doc) = current!(cx.editor); + + // TODO: could probably share with select_on_matches? + + let view_id = view.id; + let snapshot = doc.selection(view_id).clone(); + + let prompt = Prompt::new( + "search:".to_string(), + Some('\\'), + |_input: &str| Vec::new(), // this is fine because Vec::new() doesn't allocate + move |cx: &mut compositor::Context, input: &str, event: PromptEvent| { + match event { + PromptEvent::Abort => { + let (view, doc) = current!(cx.editor); + doc.set_selection(view.id, snapshot.clone()); + } + PromptEvent::Validate => { + // TODO: push_jump to store selection just before jump + } + PromptEvent::Update => { + // skip empty input, TODO: trigger default + if input.is_empty() { + return; + } + + match Searcher::new(input) { + Ok(searcher) => { + let (view, doc) = current!(cx.editor); + // revert state to what it was before the last update + // TODO: also revert text + doc.set_selection(view.id, snapshot.clone()); + + cx.editor.search = Some(searcher); + _search(cx.editor, Direction::Forward, false); + + // TODO: only store on enter (accept), not update + // cx.editor.registers.write('\\', vec![input.to_string()]); + } + Err(_err) => (), // TODO: mark command line as error + } + } + } + }, + ); + + cx.push_layer(Box::new(prompt)); +} + +pub fn _search(editor: &mut Editor, direction: Direction, extend: bool) { + let (view, doc) = current!(editor); + let text = doc.text().clone(); // need to clone or we run into borrowing issues, but it's a cheap clone + let cursor = doc.selection(view.id).primary().cursor(text.slice(..)); + let start = text.char_to_byte(cursor); + + let mat = if let Some(searcher) = &editor.search { + // use find_at to find the next match after the cursor, loop around the end + // Careful, `Regex` uses `bytes` as offsets, not character indices! + match direction { + Direction::Backward => searcher + .search_prev(text.slice(..), start) + .or_else(|| searcher.search_prev(text.slice(..), text.len_bytes())), + Direction::Forward => searcher + .search_next(text.slice(..), start) + .or_else(|| searcher.search_next(text.slice(..), 0)), + } + } else { + None + }; + + // refetch to avoid borrowing problems + let (view, doc) = current!(editor); let text = doc.text().slice(..); let selection = doc.selection(view.id); - // Get the right side of the primary block cursor. - let start = text.char_to_byte(graphemes::next_grapheme_boundary( - text, - selection.primary().cursor(text), - )); - - // use find_at to find the next match after the cursor, loop around the end - // Careful, `Regex` uses `bytes` as offsets, not character indices! - let mat = regex - .find_at(contents, start) - .or_else(|| regex.find(contents)); // TODO: message on wraparound if let Some(mat) = mat { - let start = text.byte_to_char(mat.start()); - let end = text.byte_to_char(mat.end()); + let start = text.byte_to_char(mat.start); + let end = text.byte_to_char(mat.end); if end == 0 { // skip empty matches that don't make sense @@ -1052,48 +1120,22 @@ fn search_impl(doc: &mut Document, view: &mut View, contents: &str, regex: &Rege doc.set_selection(view.id, selection); align_view(doc, view, Align::Center); }; + view.ensure_cursor_in_view(doc); } -// TODO: use one function for search vs extend -fn search(cx: &mut Context) { - let (_, doc) = current!(cx.editor); - - // TODO: could probably share with select_on_matches? - - // HAXX: sadly we can't avoid allocating a single string for the whole buffer since we can't - // feed chunks into the regex yet - let contents = doc.text().slice(..).to_string(); - - let prompt = ui::regex_prompt( - cx, - "search:".to_string(), - move |view, doc, registers, regex| { - search_impl(doc, view, &contents, ®ex, false); - // TODO: only store on enter (accept), not update - registers.write('\\', vec![regex.as_str().to_string()]); - }, - ); - - cx.push_layer(Box::new(prompt)); +pub fn search_next(cx: &mut Context) { + _search(cx.editor, Direction::Forward, false); } -fn search_next_impl(cx: &mut Context, extend: bool) { - let (view, doc) = current!(cx.editor); - let registers = &mut cx.editor.registers; - if let Some(query) = registers.read('\\') { - let query = query.first().unwrap(); - let contents = doc.text().slice(..).to_string(); - let regex = Regex::new(query).unwrap(); - search_impl(doc, view, &contents, ®ex, extend); - } +pub fn extend_search_next(cx: &mut Context) { + _search(cx.editor, Direction::Forward, true); } - -fn search_next(cx: &mut Context) { - search_next_impl(cx, false); +pub fn search_prev(cx: &mut Context) { + _search(cx.editor, Direction::Backward, false); } -fn extend_search_next(cx: &mut Context) { - search_next_impl(cx, true); +pub fn extend_search_prev(cx: &mut Context) { + _search(cx.editor, Direction::Backward, true); } fn search_selection(cx: &mut Context) { @@ -1102,7 +1144,8 @@ fn search_selection(cx: &mut Context) { let query = doc.selection(view.id).primary().fragment(contents); let regex = regex::escape(&query); cx.editor.registers.write('\\', vec![regex]); - search_next(cx); + let msg = format!("register '{}' set to '{}'", '\\', query); + cx.editor.set_status(msg); } fn extend_line(cx: &mut Context) { diff --git a/helix-term/src/keymap.rs b/helix-term/src/keymap.rs index 053b92e62a66..b02a66885c99 100644 --- a/helix-term/src/keymap.rs +++ b/helix-term/src/keymap.rs @@ -417,8 +417,7 @@ impl Default for Keymaps { "/" => search, // ? for search_reverse "n" => search_next, - "N" => extend_search_next, - // N for search_prev + "N" => search_prev, "*" => search_selection, "u" => undo, @@ -522,6 +521,9 @@ impl Default for Keymaps { "T" => extend_till_prev_char, "F" => extend_prev_char, + "n" => extend_search_next, + "N" => extend_search_prev, + "home" => goto_line_start, "end" => goto_line_end, "esc" => exit_select_mode, diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index 1f10c8dc0c09..31449c9aeb4e 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -697,7 +697,7 @@ impl EditorView { impl Component for EditorView { fn handle_event(&mut self, event: Event, cx: &mut Context) -> EventResult { match event { - Event::Resize(width, height) => { + Event::Resize(_width, _height) => { // Ignore this event, we handle resizing just before rendering to screen. // Handling it here but not re-rendering will cause flashing EventResult::Consumed(None) diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index 7e8548e73180..eb108bb8d93b 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -15,6 +15,7 @@ use anyhow::Error; pub use helix_core::diagnostic::Severity; pub use helix_core::register::Registers; +use helix_core::search::Searcher; use helix_core::syntax; use helix_core::Position; @@ -31,6 +32,7 @@ pub struct Editor { pub syn_loader: Arc, pub theme_loader: Arc, + pub search: Option, pub status_msg: Option<(String, Severity)>, } @@ -65,6 +67,7 @@ impl Editor { theme_loader: themes, registers: Registers::default(), clipboard_provider: get_clipboard_provider(), + search: None, status_msg: None, } } From bddf749fba7b078f0fa212948c2eef68df5d2076 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bla=C5=BE=20Hrastnik?= Date: Sat, 12 Jun 2021 23:35:25 +0900 Subject: [PATCH 5/5] search: disable unicode support --- helix-core/src/search.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helix-core/src/search.rs b/helix-core/src/search.rs index 7faa21df09ac..1549d64b8495 100644 --- a/helix-core/src/search.rs +++ b/helix-core/src/search.rs @@ -67,7 +67,7 @@ impl Searcher { // Create Regex DFAs for all search directions. let mut builder = dense::Builder::new(); - let builder = builder.case_insensitive(!has_uppercase); + let builder = builder.case_insensitive(!has_uppercase).unicode(false); let left_fdfa = builder.clone().reverse(true).build(pattern)?; let left_rdfa = builder