From d8a9cc6428e76d1e075d58c88fa09f762bc3c3d1 Mon Sep 17 00:00:00 2001 From: "Azat S." Date: Sun, 20 Oct 2024 21:09:56 +0300 Subject: [PATCH] fix: fix todo comments identification --- Cargo.lock | 1 + Cargo.toml | 1 + src/identify_supported_file.rs | 9 ++++++-- src/identify_todo_comment.rs | 41 ++++++++++++++++++++++------------ tests/identify_todo_comment.rs | 24 +++++++++++++++++--- 5 files changed, 57 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 13bf49a..35a12f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1222,6 +1222,7 @@ dependencies = [ "clap", "futures", "indicatif", + "lazy_static", "mockall", "open", "oxc", diff --git a/Cargo.toml b/Cargo.toml index 1f396c0..a1eff1f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ chrono = { version = "0.4.38", features = ["serde"] } clap = { version = "4.5.20", features = ["derive"] } futures = "0.3.31" indicatif = "0.17.8" +lazy_static = "1.5.0" open = "5.3.0" oxc = "0.31.0" regex = "1.11.0" diff --git a/src/identify_supported_file.rs b/src/identify_supported_file.rs index 1d8393c..1b1c7f3 100644 --- a/src/identify_supported_file.rs +++ b/src/identify_supported_file.rs @@ -1,6 +1,11 @@ +use lazy_static::lazy_static; use regex::Regex; +lazy_static! { + static ref SUPPORTED_FILE_REGEX: Regex = + Regex::new(r"\.[cm]?[jt]sx?$").unwrap(); +} + pub fn identify_supported_file(file_name: &str) -> bool { - let re = Regex::new(r"\.[cm]?[jt]sx?$").unwrap(); - re.is_match(file_name) + SUPPORTED_FILE_REGEX.is_match(file_name) } diff --git a/src/identify_todo_comment.rs b/src/identify_todo_comment.rs index dce6011..3d68fae 100644 --- a/src/identify_todo_comment.rs +++ b/src/identify_todo_comment.rs @@ -1,9 +1,12 @@ -pub const PRIMARY_TODO_KEYWORDS: [&str; 9] = [ - "TODO", "FIXME", "CHANGED", "XXX", "HACK", "BUG", "OPTIMIZE", "REFACTOR", - "TEMP", +use lazy_static::lazy_static; +use regex::Regex; + +pub const PRIMARY_TODO_KEYWORDS: [&str; 8] = [ + "TODO", "FIXME", "XXX", "HACK", "BUG", "OPTIMIZE", "REFACTOR", "TEMP", ]; -pub const SECONDARY_TODO_KEYWORDS: [&str; 13] = [ +pub const SECONDARY_TODO_KEYWORDS: [&str; 14] = [ + "CHANGED", "IDEA", "NOTE", "REVIEW", @@ -19,23 +22,33 @@ pub const SECONDARY_TODO_KEYWORDS: [&str; 13] = [ "COMBAK", ]; +lazy_static! { + static ref PRIMARY_KEYWORD_REGEXES: Vec = PRIMARY_TODO_KEYWORDS + .iter() + .map(|keyword| Regex::new(&format!( + r"(?i)\b{}\b", + regex::escape(keyword) + )) + .unwrap()) + .collect(); + static ref SECONDARY_KEYWORD_REGEX: Regex = + Regex::new(r"(?i)^[^\w]*(\w+)([\s\p{P}]*)(:|[\p{P}\s]|$)").unwrap(); +} + pub fn identify_todo_comment(comment_text: &str) -> Option { let trimmed_text = comment_text.trim(); - let words_with_separators: Vec<&str> = - trimmed_text.split_whitespace().collect(); - - for keyword in PRIMARY_TODO_KEYWORDS.iter() { - for word in &words_with_separators { - if word.to_uppercase().contains(&keyword.to_uppercase()) { - return Some(keyword.to_string()); - } + for (i, re) in PRIMARY_KEYWORD_REGEXES.iter().enumerate() { + if re.is_match(trimmed_text) { + return Some(PRIMARY_TODO_KEYWORDS[i].to_string()); } } - if let Some(first_word) = words_with_separators.first() { + if let Some(captures) = SECONDARY_KEYWORD_REGEX.captures(trimmed_text) { + let first_word = captures.get(1).unwrap().as_str(); + for keyword in SECONDARY_TODO_KEYWORDS.iter() { - if first_word.to_uppercase() == keyword.to_uppercase() { + if first_word.eq_ignore_ascii_case(keyword) { return Some(keyword.to_string()); } } diff --git a/tests/identify_todo_comment.rs b/tests/identify_todo_comment.rs index ad72dae..5e2f737 100644 --- a/tests/identify_todo_comment.rs +++ b/tests/identify_todo_comment.rs @@ -25,9 +25,15 @@ async fn test_primary_keyword_lowercase() { } #[tokio::test] -async fn test_secondary_keyword_at_start() { - let comment = "// FIXME: There is a bug."; - assert_eq!(identify_todo_comment(comment), Some("FIXME".to_string())); +async fn test_secondary_keyword_at_start_with_colon() { + let comment = "// NB: There is a nota bene."; + assert_eq!(identify_todo_comment(comment), Some("NB".to_string())); +} + +#[tokio::test] +async fn test_secondary_keyword_at_start_with_no_colon() { + let comment = "// DEBUG: There is a debug."; + assert_eq!(identify_todo_comment(comment), Some("DEBUG".to_string())); } #[tokio::test] @@ -71,3 +77,15 @@ async fn test_no_keyword_found() { let comment = "// This is just a comment without anything."; assert_eq!(identify_todo_comment(comment), None); } + +#[tokio::test] +async fn test_no_keyword_found_with_similar_primary_word() { + let comment = "// I love todoctor"; + assert_eq!(identify_todo_comment(comment), None); +} + +#[tokio::test] +async fn test_no_keyword_found_with_similar_secondary_word() { + let comment = "// Dangerous stuff"; + assert_eq!(identify_todo_comment(comment), None); +}