From 718c016a82e2ca56cbaa2bf6cdc3535e4a2382f5 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Fri, 20 Oct 2023 08:18:34 +0900 Subject: [PATCH] Add caching to formatter --- crates/ruff_cli/src/args.rs | 12 + crates/ruff_cli/src/cache.rs | 633 +++++++++++++++++----- crates/ruff_cli/src/commands/check.rs | 71 +-- crates/ruff_cli/src/commands/format.rs | 87 ++- crates/ruff_cli/src/diagnostics.rs | 52 +- crates/ruff_cli/tests/format.rs | 8 +- crates/ruff_cli/tests/integration_test.rs | 4 +- docs/configuration.md | 10 +- 8 files changed, 619 insertions(+), 258 deletions(-) diff --git a/crates/ruff_cli/src/args.rs b/crates/ruff_cli/src/args.rs index 156ca6a6160274..d329b845d8890b 100644 --- a/crates/ruff_cli/src/args.rs +++ b/crates/ruff_cli/src/args.rs @@ -365,6 +365,14 @@ pub struct FormatCommand { /// Path to the `pyproject.toml` or `ruff.toml` file to use for configuration. #[arg(long, conflicts_with = "isolated")] pub config: Option, + + /// Disable cache reads. + #[arg(short, long, help_heading = "Miscellaneous")] + pub no_cache: bool, + /// Path to the cache directory. + #[arg(long, env = "RUFF_CACHE_DIR", help_heading = "Miscellaneous")] + pub cache_dir: Option, + /// Respect file exclusions via `.gitignore` and other standard ignore files. /// Use `--no-respect-gitignore` to disable. #[arg( @@ -535,6 +543,7 @@ impl FormatCommand { config: self.config, files: self.files, isolated: self.isolated, + no_cache: self.no_cache, stdin_filename: self.stdin_filename, }, CliOverrides { @@ -547,6 +556,8 @@ impl FormatCommand { preview: resolve_bool_arg(self.preview, self.no_preview).map(PreviewMode::from), force_exclude: resolve_bool_arg(self.force_exclude, self.no_force_exclude), target_version: self.target_version, + cache_dir: self.cache_dir, + // Unsupported on the formatter CLI, but required on `Overrides`. ..CliOverrides::default() }, @@ -590,6 +601,7 @@ pub struct CheckArguments { #[allow(clippy::struct_excessive_bools)] pub struct FormatArguments { pub check: bool, + pub no_cache: bool, pub diff: bool, pub config: Option, pub files: Vec, diff --git a/crates/ruff_cli/src/cache.rs b/crates/ruff_cli/src/cache.rs index f6595c43370a52..4298615c269500 100644 --- a/crates/ruff_cli/src/cache.rs +++ b/crates/ruff_cli/src/cache.rs @@ -1,13 +1,20 @@ use std::collections::HashMap; +use std::fmt::Debug; use std::fs::{self, File}; use std::hash::Hasher; use std::io::{self, BufReader, BufWriter, Write}; +use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Mutex; use std::time::{Duration, SystemTime}; use anyhow::{Context, Result}; +use filetime::FileTime; +use itertools::Itertools; +use log::{debug, error}; +use rayon::iter::ParallelIterator; +use rayon::iter::{IntoParallelIterator, ParallelBridge}; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; @@ -15,22 +22,47 @@ use ruff_cache::{CacheKey, CacheKeyHasher}; use ruff_diagnostics::{DiagnosticKind, Fix}; use ruff_linter::message::Message; use ruff_linter::warn_user; +use ruff_macros::CacheKey; use ruff_notebook::NotebookIndex; use ruff_python_ast::imports::ImportMap; use ruff_source_file::SourceFileBuilder; use ruff_text_size::{TextRange, TextSize}; +use ruff_workspace::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy, Resolver}; use ruff_workspace::Settings; +use crate::cache; use crate::diagnostics::Diagnostics; -/// Maximum duration for which we keep a file in cache that hasn't been seen. -const MAX_LAST_SEEN: Duration = Duration::from_secs(30 * 24 * 60 * 60); // 30 days. - /// [`Path`] that is relative to the package root in [`PackageCache`]. pub(crate) type RelativePath = Path; /// [`PathBuf`] that is relative to the package root in [`PackageCache`]. pub(crate) type RelativePathBuf = PathBuf; +#[derive(CacheKey)] +pub(crate) struct FileCacheKey { + /// Timestamp when the file was last modified before the (cached) check. + file_last_modified: FileTime, + /// Permissions of the file before the (cached) check. + file_permissions_mode: u32, +} + +impl FileCacheKey { + pub(crate) fn from_path(path: &Path) -> io::Result { + // Construct a cache key for the file + let metadata = path.metadata()?; + + #[cfg(unix)] + let permissions = metadata.permissions().mode(); + #[cfg(windows)] + let permissions: u32 = metadata.permissions().readonly().into(); + + Ok(FileCacheKey { + file_last_modified: FileTime::from_last_modification_time(&metadata), + file_permissions_mode: permissions, + }) + } +} + /// Cache. /// /// `Cache` holds everything required to display the diagnostics for a single @@ -50,7 +82,7 @@ pub(crate) struct Cache { /// Files that are linted, but are not in `package.files` or are in /// `package.files` but are outdated. This gets merged with `package.files` /// when the cache is written back to disk in [`Cache::store`]. - new_files: Mutex>, + changes: Mutex>, /// The "current" timestamp used as cache for the updates of /// [`FileCache::last_seen`] last_seen_cache: u64, @@ -65,7 +97,7 @@ impl Cache { /// /// Finally `settings` is used to ensure we don't open a cache for different /// settings. It also defines the directory where to store the cache. - pub(crate) fn open(package_root: PathBuf, settings: &Settings) -> Cache { + pub(crate) fn open(package_root: PathBuf, settings: &Settings) -> Self { debug_assert!(package_root.is_absolute(), "package root not canonicalized"); let mut buf = itoa::Buffer::new(); @@ -105,7 +137,7 @@ impl Cache { } /// Create an empty `Cache`. - fn empty(path: PathBuf, package_root: PathBuf) -> Cache { + fn empty(path: PathBuf, package_root: PathBuf) -> Self { let package = PackageCache { package_root, files: HashMap::new(), @@ -114,37 +146,25 @@ impl Cache { } #[allow(clippy::cast_possible_truncation)] - fn new(path: PathBuf, package: PackageCache) -> Cache { + fn new(path: PathBuf, package: PackageCache) -> Self { Cache { path, package, - new_files: Mutex::new(HashMap::new()), + changes: Mutex::new(Vec::new()), // SAFETY: this will be truncated to the year ~2554 (so don't use // this code after that!). last_seen_cache: SystemTime::UNIX_EPOCH.elapsed().unwrap().as_millis() as u64, } } - /// Store the cache to disk, if it has been changed. - #[allow(clippy::cast_possible_truncation)] - pub(crate) fn store(mut self) -> Result<()> { - let new_files = self.new_files.into_inner().unwrap(); - if new_files.is_empty() { + /// Applies the pending changes and persists the cache to disk, if it has been changed. + pub(crate) fn persist(mut self) -> Result<()> { + if !self.save() { // No changes made, no need to write the same cache file back to // disk. return Ok(()); } - // Remove cached files that we haven't seen in a while. - let now = self.last_seen_cache; - self.package.files.retain(|_, file| { - // SAFETY: this will be truncated to the year ~2554. - (now - *file.last_seen.get_mut()) <= MAX_LAST_SEEN.as_millis() as u64 - }); - - // Apply any changes made and keep track of when we last saw files. - self.package.files.extend(new_files); - let file = File::create(&self.path) .with_context(|| format!("Failed to create cache file '{}'", self.path.display()))?; let writer = BufWriter::new(file); @@ -156,6 +176,53 @@ impl Cache { }) } + /// Applies the pending changes without storing the cache to disk. + #[allow(clippy::cast_possible_truncation)] + pub(crate) fn save(&mut self) -> bool { + /// Maximum duration for which we keep a file in cache that hasn't been seen. + const MAX_LAST_SEEN: Duration = Duration::from_secs(30 * 24 * 60 * 60); // 30 days. + + let changes = std::mem::take(self.changes.get_mut().unwrap()); + if changes.is_empty() { + return false; + } + + // Remove cached files that we haven't seen in a while. + let now = self.last_seen_cache; + self.package.files.retain(|_, file| { + // SAFETY: this will be truncated to the year ~2554. + (now - *file.last_seen.get_mut()) <= MAX_LAST_SEEN.as_millis() as u64 + }); + + // Apply any changes made and keep track of when we last saw files. + for change in changes { + let entry = self + .package + .files + .entry(change.path) + .and_modify(|existing| { + if existing.key != change.new_key { + // Reset the data if the key change. + existing.data = FileCacheData::default(); + } + + existing.key = change.new_key; + existing + .last_seen + .store(self.last_seen_cache, Ordering::Relaxed); + }) + .or_insert_with(|| FileCache { + key: change.new_key, + last_seen: AtomicU64::new(self.last_seen_cache), + data: FileCacheData::default(), + }); + + change.new_data.apply(&mut entry.data); + } + + true + } + /// Returns the relative path based on `path` and the package root. /// /// Returns `None` if `path` is not within the package. @@ -169,7 +236,7 @@ impl Cache { /// /// This returns `None` if `key` differs from the cached key or if the /// cache doesn't contain results for the file. - pub(crate) fn get(&self, path: &RelativePath, key: &T) -> Option<&FileCache> { + pub(crate) fn get(&self, path: &RelativePath, key: &FileCacheKey) -> Option<&FileCache> { let file = self.package.files.get(path)?; let mut hasher = CacheKeyHasher::new(); @@ -185,50 +252,34 @@ impl Cache { Some(file) } + pub(crate) fn is_formatted(&self, path: &RelativePath, key: &FileCacheKey) -> bool { + self.get(path, key) + .is_some_and(|entry| entry.data.formatted) + } + /// Add or update a file cache at `path` relative to the package root. - pub(crate) fn update( + fn update(&self, path: RelativePathBuf, key: &FileCacheKey, data: ChangeData) { + let mut hasher = CacheKeyHasher::new(); + key.cache_key(&mut hasher); + + self.changes.lock().unwrap().push(Change { + path, + new_key: hasher.finish(), + new_data: data, + }); + } + + pub(crate) fn update_lint( &self, path: RelativePathBuf, - key: T, - messages: &[Message], - imports: &ImportMap, - notebook_index: Option<&NotebookIndex>, + key: &FileCacheKey, + data: LintCacheData, ) { - let source = if let Some(msg) = messages.first() { - msg.file.source_text().to_owned() - } else { - String::new() // No messages, no need to keep the source! - }; - - let messages = messages - .iter() - .map(|msg| { - // Make sure that all message use the same source file. - assert!( - msg.file == messages.first().unwrap().file, - "message uses a different source file" - ); - CacheMessage { - kind: msg.kind.clone(), - range: msg.range, - fix: msg.fix.clone(), - noqa_offset: msg.noqa_offset, - } - }) - .collect(); - - let mut hasher = CacheKeyHasher::new(); - key.cache_key(&mut hasher); + self.update(path, key, ChangeData::Lint(data)); + } - let file = FileCache { - key: hasher.finish(), - last_seen: AtomicU64::new(self.last_seen_cache), - imports: imports.clone(), - messages, - source, - notebook_index: notebook_index.cloned(), - }; - self.new_files.lock().unwrap().insert(path, file); + pub(crate) fn set_formatted(&self, path: RelativePathBuf, key: &FileCacheKey) { + self.update(path, key, ChangeData::Formatted); } } @@ -254,55 +305,43 @@ pub(crate) struct FileCache { /// Represented as the number of milliseconds since Unix epoch. This will /// break in 1970 + ~584 years (~2554). last_seen: AtomicU64, - /// Imports made. - imports: ImportMap, - /// Diagnostic messages. - messages: Vec, - /// Source code of the file. - /// - /// # Notes - /// - /// This will be empty if `messages` is empty. - source: String, - /// Notebook index if this file is a Jupyter Notebook. - notebook_index: Option, + + data: FileCacheData, } impl FileCache { /// Convert the file cache into `Diagnostics`, using `path` as file name. - pub(crate) fn as_diagnostics(&self, path: &Path) -> Diagnostics { - let messages = if self.messages.is_empty() { - Vec::new() - } else { - let file = SourceFileBuilder::new(path.to_string_lossy(), &*self.source).finish(); - self.messages - .iter() - .map(|msg| Message { - kind: msg.kind.clone(), - range: msg.range, - fix: msg.fix.clone(), - file: file.clone(), - noqa_offset: msg.noqa_offset, - }) - .collect() - }; - let notebook_indexes = if let Some(notebook_index) = self.notebook_index.as_ref() { - FxHashMap::from_iter([(path.to_string_lossy().to_string(), notebook_index.clone())]) - } else { - FxHashMap::default() - }; - Diagnostics::new(messages, self.imports.clone(), notebook_indexes) + pub(crate) fn to_diagnostics(&self, path: &Path) -> Option { + self.data.lint.as_ref().map(|lint| { + let messages = if lint.messages.is_empty() { + Vec::new() + } else { + let file = SourceFileBuilder::new(path.to_string_lossy(), &*lint.source).finish(); + lint.messages + .iter() + .map(|msg| Message { + kind: msg.kind.clone(), + range: msg.range, + fix: msg.fix.clone(), + file: file.clone(), + noqa_offset: msg.noqa_offset, + }) + .collect() + }; + let notebook_indexes = if let Some(notebook_index) = lint.notebook_index.as_ref() { + FxHashMap::from_iter([(path.to_string_lossy().to_string(), notebook_index.clone())]) + } else { + FxHashMap::default() + }; + Diagnostics::new(messages, lint.imports.clone(), notebook_indexes) + }) } } -/// On disk representation of a diagnostic message. -#[derive(Deserialize, Debug, Serialize)] -struct CacheMessage { - kind: DiagnosticKind, - /// Range into the message's [`FileCache::source`]. - range: TextRange, - fix: Option, - noqa_offset: TextSize, +#[derive(Debug, Default, Deserialize, Serialize)] +struct FileCacheData { + lint: Option, + formatted: bool, } /// Returns a hash key based on the `package_root`, `settings` and the crate @@ -335,33 +374,210 @@ pub(crate) fn init(path: &Path) -> Result<()> { Ok(()) } +#[derive(Deserialize, Debug, Serialize, PartialEq)] +pub(crate) struct LintCacheData { + /// Imports made. + pub(super) imports: ImportMap, + /// Diagnostic messages. + pub(super) messages: Vec, + /// Source code of the file. + /// + /// # Notes + /// + /// This will be empty if `messages` is empty. + pub(super) source: String, + /// Notebook index if this file is a Jupyter Notebook. + pub(super) notebook_index: Option, +} + +impl LintCacheData { + pub(crate) fn from_messages( + messages: &[Message], + imports: ImportMap, + notebook_index: Option, + ) -> Self { + let source = if let Some(msg) = messages.first() { + msg.file.source_text().to_owned() + } else { + String::new() // No messages, no need to keep the source! + }; + + let messages = messages + .iter() + .map(|msg| { + // Make sure that all message use the same source file. + assert_eq!( + msg.file, + messages.first().unwrap().file, + "message uses a different source file" + ); + CacheMessage { + kind: msg.kind.clone(), + range: msg.range, + fix: msg.fix.clone(), + noqa_offset: msg.noqa_offset, + } + }) + .collect(); + + Self { + imports, + messages, + source, + notebook_index, + } + } +} + +/// On disk representation of a diagnostic message. +#[derive(Deserialize, Debug, Serialize, PartialEq)] +pub(super) struct CacheMessage { + kind: DiagnosticKind, + /// Range into the message's [`FileCache::source`]. + range: TextRange, + fix: Option, + noqa_offset: TextSize, +} + +pub(crate) trait PackageCaches { + fn get(&self, package_root: &Path) -> Option<&Cache>; + + fn persist(self) -> anyhow::Result<()>; +} + +impl PackageCaches for Option +where + T: PackageCaches, +{ + fn get(&self, package_root: &Path) -> Option<&Cache> { + match self { + None => None, + Some(caches) => caches.get(package_root), + } + } + + fn persist(self) -> Result<()> { + match self { + None => Ok(()), + Some(caches) => caches.persist(), + } + } +} + +pub(crate) struct PackageCacheMap<'a>(FxHashMap<&'a Path, Cache>); + +impl<'a> PackageCacheMap<'a> { + pub(crate) fn init( + pyproject_config: &PyprojectConfig, + package_roots: &FxHashMap<&'a Path, Option<&'a Path>>, + resolver: &Resolver, + ) -> Self { + fn init_cache(path: &Path) { + if let Err(e) = cache::init(path) { + error!("Failed to initialize cache at {}: {e:?}", path.display()); + } + } + + match pyproject_config.strategy { + PyprojectDiscoveryStrategy::Fixed => { + init_cache(&pyproject_config.settings.cache_dir); + } + PyprojectDiscoveryStrategy::Hierarchical => { + for settings in + std::iter::once(&pyproject_config.settings).chain(resolver.settings()) + { + init_cache(&settings.cache_dir); + } + } + } + + Self( + package_roots + .iter() + .map(|(package, package_root)| package_root.unwrap_or(package)) + .unique() + .par_bridge() + .map(|cache_root| { + let settings = resolver.resolve(cache_root, pyproject_config); + let cache = Cache::open(cache_root.to_path_buf(), settings); + (cache_root, cache) + }) + .collect(), + ) + } +} + +impl PackageCaches for PackageCacheMap<'_> { + fn get(&self, package_root: &Path) -> Option<&Cache> { + let cache = self.0.get(package_root); + + if cache.is_none() { + debug!("No cache found for {}", package_root.display()); + } + + cache + } + + fn persist(self) -> Result<()> { + self.0 + .into_par_iter() + .try_for_each(|(_, cache)| cache.persist()) + } +} + +#[derive(Debug)] +struct Change { + path: PathBuf, + new_key: u64, + new_data: ChangeData, +} + +#[derive(Debug)] +enum ChangeData { + Lint(LintCacheData), + Formatted, +} + +impl ChangeData { + fn apply(self, data: &mut FileCacheData) { + match self { + ChangeData::Lint(new_lint) => { + data.lint = Some(new_lint); + } + ChangeData::Formatted => { + data.formatted = true; + } + } + } +} + #[cfg(test)] mod tests { - use filetime::{set_file_mtime, FileTime}; - use ruff_linter::settings::types::UnsafeFixes; use std::env::temp_dir; use std::fs; use std::io; use std::io::Write; use std::path::{Path, PathBuf}; + use std::sync::atomic::AtomicU64; use std::time::SystemTime; + use anyhow::Result; + use filetime::{set_file_mtime, FileTime}; use itertools::Itertools; + use test_case::test_case; + use ruff_cache::CACHE_DIR_NAME; use ruff_linter::settings::flags; + use ruff_linter::settings::types::UnsafeFixes; + use ruff_linter::source_kind::SourceKind; + use ruff_python_ast::PySourceType; + use ruff_workspace::Settings; - use crate::cache::RelativePathBuf; - use crate::cache::{self, Cache, FileCache}; + use crate::cache::{self, FileCache, FileCacheData, FileCacheKey}; + use crate::cache::{Cache, RelativePathBuf}; + use crate::commands::format::{format_path, FormatCommandError, FormatMode, FormatResult}; use crate::diagnostics::{lint_path, Diagnostics}; - use std::sync::atomic::AtomicU64; - - use anyhow::Result; - use ruff_python_ast::imports::ImportMap; - - use ruff_workspace::Settings; - use test_case::test_case; - #[test_case("../ruff_linter/resources/test/fixtures", "ruff_tests/cache_same_results_ruff_linter"; "ruff_linter_fixtures")] #[test_case("../ruff_notebook/resources/test/fixtures", "ruff_tests/cache_same_results_ruff_notebook"; "ruff_notebook_fixtures")] fn same_results(package_root: &str, cache_dir_path: &str) { @@ -377,7 +593,7 @@ mod tests { let package_root = fs::canonicalize(package_root).unwrap(); let cache = Cache::open(package_root.clone(), &settings); - assert_eq!(cache.new_files.lock().unwrap().len(), 0); + assert_eq!(cache.changes.lock().unwrap().len(), 0); let mut paths = Vec::new(); let mut parse_errors = Vec::new(); @@ -427,7 +643,7 @@ mod tests { } assert_ne!(paths, &[] as &[std::path::PathBuf], "no files checked"); - cache.store().unwrap(); + cache.persist().unwrap(); let cache = Cache::open(package_root.clone(), &settings); assert_ne!(cache.package.files.len(), 0); @@ -472,21 +688,21 @@ mod tests { let test_cache = TestCache::new("cache_adds_file_on_lint"); let cache = test_cache.open(); test_cache.write_source_file("source.py", source); - assert_eq!(cache.new_files.lock().unwrap().len(), 0); + assert_eq!(cache.changes.lock().unwrap().len(), 0); - cache.store().unwrap(); + cache.persist().unwrap(); let cache = test_cache.open(); test_cache .lint_file_with_cache("source.py", &cache) .expect("Failed to lint test file"); assert_eq!( - cache.new_files.lock().unwrap().len(), + cache.changes.lock().unwrap().len(), 1, "A single new file should be added to the cache" ); - cache.store().unwrap(); + cache.persist().unwrap(); } #[test] @@ -497,9 +713,9 @@ mod tests { let cache = test_cache.open(); test_cache.write_source_file("source_1.py", source); test_cache.write_source_file("source_2.py", source); - assert_eq!(cache.new_files.lock().unwrap().len(), 0); + assert_eq!(cache.changes.lock().unwrap().len(), 0); - cache.store().unwrap(); + cache.persist().unwrap(); let cache = test_cache.open(); test_cache @@ -509,12 +725,39 @@ mod tests { .lint_file_with_cache("source_2.py", &cache) .expect("Failed to lint test file"); assert_eq!( - cache.new_files.lock().unwrap().len(), + cache.changes.lock().unwrap().len(), 2, "Both files should be added to the cache" ); + cache.persist().unwrap(); + } - cache.store().unwrap(); + #[test] + fn cache_adds_files_on_format() { + let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; + + let test_cache = TestCache::new("cache_adds_files_on_format"); + let cache = test_cache.open(); + test_cache.write_source_file("source_1.py", source); + test_cache.write_source_file("source_2.py", source); + assert_eq!(cache.changes.lock().unwrap().len(), 0); + + cache.persist().unwrap(); + let cache = test_cache.open(); + + test_cache + .format_file_with_cache("source_1.py", &cache) + .expect("Failed to format test file"); + test_cache + .format_file_with_cache("source_2.py", &cache) + .expect("Failed to format test file"); + assert_eq!( + cache.changes.lock().unwrap().len(), + 2, + "Both files should be added to the cache" + ); + + cache.persist().unwrap(); } #[test] @@ -524,13 +767,13 @@ mod tests { let test_cache = TestCache::new("cache_invalidated_on_file_modified_time"); let cache = test_cache.open(); let source_path = test_cache.write_source_file("source.py", source); - assert_eq!(cache.new_files.lock().unwrap().len(), 0); + assert_eq!(cache.changes.lock().unwrap().len(), 0); let expected_diagnostics = test_cache .lint_file_with_cache("source.py", &cache) .expect("Failed to lint test file"); - cache.store().unwrap(); + cache.persist().unwrap(); let cache = test_cache.open(); // Update the modified time of the file to a time in the future @@ -545,7 +788,7 @@ mod tests { .expect("Failed to lint test file"); assert_eq!( - cache.new_files.lock().unwrap().len(), + cache.changes.lock().unwrap().len(), 1, "Cache should not be used, the file should be treated as new and added to the cache" ); @@ -583,13 +826,13 @@ mod tests { let test_cache = TestCache::new("cache_invalidated_on_permission_change"); let cache = test_cache.open(); let path = test_cache.write_source_file("source.py", source); - assert_eq!(cache.new_files.lock().unwrap().len(), 0); + assert_eq!(cache.changes.lock().unwrap().len(), 0); let expected_diagnostics = test_cache .lint_file_with_cache("source.py", &cache) .unwrap(); - cache.store().unwrap(); + cache.persist().unwrap(); let cache = test_cache.open(); // Flip the permissions on the file @@ -603,7 +846,7 @@ mod tests { .unwrap(); assert_eq!( - cache.new_files.lock().unwrap().len(), + cache.changes.lock().unwrap().len(), 1, "Cache should not be used, the file should be treated as new and added to the cache" ); @@ -615,8 +858,8 @@ mod tests { } #[test] - fn cache_removes_stale_files_on_store() { - let test_cache = TestCache::new("cache_removes_stale_files_on_store"); + fn cache_removes_stale_files_on_persist() { + let test_cache = TestCache::new("cache_removes_stale_files_on_persist"); let mut cache = test_cache.open(); // Add a file to the cache that hasn't been linted or seen since the '70s! @@ -626,10 +869,7 @@ mod tests { FileCache { key: 123, last_seen: AtomicU64::new(123), - imports: ImportMap::new(), - messages: Vec::new(), - source: String::new(), - notebook_index: None, + data: FileCacheData::default(), }, ); @@ -637,34 +877,121 @@ mod tests { let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; test_cache.write_source_file("new.py", source); let new_path_key = RelativePathBuf::from("new.py"); - assert_eq!(cache.new_files.lock().unwrap().len(), 0); + assert_eq!(cache.changes.lock().unwrap().len(), 0); test_cache .lint_file_with_cache("new.py", &cache) .expect("Failed to lint test file"); // Storing the cache should remove the old (`old.py`) file. - cache.store().unwrap(); + cache.persist().unwrap(); // So we when we open the cache again it shouldn't contain `old.py`. let cache = test_cache.open(); - assert!( - cache.package.files.keys().collect_vec() == vec![&new_path_key], + assert_eq!( + cache.package.files.keys().collect_vec(), + vec![&new_path_key], "Only the new file should be present" ); } + #[test] + fn format_updates_cache_entry() { + let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; + + let test_cache = TestCache::new("format_updates_cache_entry"); + let cache = test_cache.open(); + test_cache.write_source_file("source.py", source); + assert_eq!(cache.changes.lock().unwrap().len(), 0); + + cache.persist().unwrap(); + let cache = test_cache.open(); + + // Cache the lint results + test_cache + .lint_file_with_cache("source.py", &cache) + .expect("Failed to lint test file"); + cache.persist().unwrap(); + + let mut cache = test_cache.open(); + + // Now lint the file + test_cache + .format_file_with_cache("source.py", &cache) + .expect("Failed to format test file"); + + cache.save(); + + assert_eq!(cache.package.files.len(), 1); + + let Some(file_cache) = cache.get( + Path::new("source.py"), + &FileCacheKey::from_path(&test_cache.package_root.join("source.py")).unwrap(), + ) else { + panic!("Cache entry for `source.py` is missing."); + }; + + assert!(file_cache.data.lint.is_some()); + assert!(file_cache.data.formatted); + } + + #[test] + fn file_changes_invalidate_file_cache() { + let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; + + let test_cache = TestCache::new("file_changes_invalidate_file_cache"); + let cache = test_cache.open(); + test_cache.write_source_file("source.py", source); + assert_eq!(cache.changes.lock().unwrap().len(), 0); + + cache.persist().unwrap(); + let cache = test_cache.open(); + + // Cache the format and lint results + test_cache + .lint_file_with_cache("source.py", &cache) + .expect("Failed to lint test file"); + test_cache + .format_file_with_cache("source.py", &cache) + .expect("Failed to format test file"); + + cache.persist().unwrap(); + + let mut cache = test_cache.open(); + assert_eq!(cache.package.files.len(), 1); + + test_cache.write_source_file("source.py", "a = 2".as_bytes()); + + test_cache + .format_file_with_cache("source.py", &cache) + .expect("Failed to format test file"); + + cache.save(); + + assert_eq!(cache.package.files.len(), 1); + + let Some(file_cache) = cache.get( + Path::new("source.py"), + &FileCacheKey::from_path(&test_cache.package_root.join("source.py")).unwrap(), + ) else { + panic!("Cache entry for `source.py` is missing."); + }; + + assert_eq!(file_cache.data.lint, None); + assert!(file_cache.data.formatted); + } + struct TestCache { package_root: PathBuf, settings: Settings, } impl TestCache { - fn new(name: &str) -> Self { + fn new(test_case: &str) -> Self { // Build a new cache directory and clear it let mut test_dir = temp_dir(); test_dir.push("ruff_tests/cache"); - test_dir.push(name); + test_dir.push(test_case); let _ = fs::remove_dir_all(&test_dir); @@ -718,6 +1045,22 @@ mod tests { UnsafeFixes::Enabled, ) } + + fn format_file_with_cache( + &self, + path: &str, + cache: &Cache, + ) -> Result { + let file_path = self.package_root.join(path); + format_path( + &file_path, + &self.settings.formatter, + &SourceKind::Python(std::fs::read_to_string(&file_path).unwrap()), + PySourceType::Python, + FormatMode::Write, + Some(cache), + ) + } } impl Drop for TestCache { diff --git a/crates/ruff_cli/src/commands/check.rs b/crates/ruff_cli/src/commands/check.rs index 55e607e2df9119..001cb0ae8449de 100644 --- a/crates/ruff_cli/src/commands/check.rs +++ b/crates/ruff_cli/src/commands/check.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::fmt::Write; use std::io; use std::path::{Path, PathBuf}; @@ -7,28 +6,26 @@ use std::time::Instant; use anyhow::Result; use colored::Colorize; use ignore::Error; -use itertools::Itertools; use log::{debug, error, warn}; #[cfg(not(target_family = "wasm"))] use rayon::prelude::*; -use ruff_linter::settings::types::UnsafeFixes; use rustc_hash::FxHashMap; use ruff_diagnostics::Diagnostic; use ruff_linter::message::Message; use ruff_linter::registry::Rule; +use ruff_linter::settings::types::UnsafeFixes; use ruff_linter::settings::{flags, LinterSettings}; use ruff_linter::{fs, warn_user_once, IOError}; use ruff_python_ast::imports::ImportMap; use ruff_source_file::SourceFileBuilder; use ruff_text_size::{TextRange, TextSize}; use ruff_workspace::resolver::{ - match_exclusion, python_files_in_path, PyprojectConfig, PyprojectDiscoveryStrategy, - ResolvedFile, + match_exclusion, python_files_in_path, PyprojectConfig, ResolvedFile, }; use crate::args::CliOverrides; -use crate::cache::{self, Cache}; +use crate::cache::{Cache, PackageCacheMap, PackageCaches}; use crate::diagnostics::Diagnostics; use crate::panic::catch_unwind; @@ -52,28 +49,6 @@ pub(crate) fn check( return Ok(Diagnostics::default()); } - // Initialize the cache. - if cache.into() { - fn init_cache(path: &Path) { - if let Err(e) = cache::init(path) { - error!("Failed to initialize cache at {}: {e:?}", path.display()); - } - } - - match pyproject_config.strategy { - PyprojectDiscoveryStrategy::Fixed => { - init_cache(&pyproject_config.settings.cache_dir); - } - PyprojectDiscoveryStrategy::Hierarchical => { - for settings in - std::iter::once(&pyproject_config.settings).chain(resolver.settings()) - { - init_cache(&settings.cache_dir); - } - } - } - }; - // Discover the package root for each Python file. let package_roots = resolver.package_roots( &paths @@ -85,19 +60,15 @@ pub(crate) fn check( ); // Load the caches. - let caches = bool::from(cache).then(|| { - package_roots - .iter() - .map(|(package, package_root)| package_root.unwrap_or(package)) - .unique() - .par_bridge() - .map(|cache_root| { - let settings = resolver.resolve(cache_root, pyproject_config); - let cache = Cache::open(cache_root.to_path_buf(), settings); - (cache_root, cache) - }) - .collect::>() - }); + let caches = if bool::from(cache) { + Some(PackageCacheMap::init( + pyproject_config, + &package_roots, + &resolver, + )) + } else { + None + }; let start = Instant::now(); let diagnostics_per_file = paths.par_iter().filter_map(|resolved_file| { @@ -122,14 +93,7 @@ pub(crate) fn check( } let cache_root = package.unwrap_or_else(|| path.parent().unwrap_or(path)); - let cache = caches.as_ref().and_then(|caches| { - if let Some(cache) = caches.get(&cache_root) { - Some(cache) - } else { - debug!("No cache found for {}", cache_root.display()); - None - } - }); + let cache = caches.get(cache_root); lint_path( path, @@ -210,11 +174,7 @@ pub(crate) fn check( all_diagnostics.messages.sort(); // Store the caches. - if let Some(caches) = caches { - caches - .into_par_iter() - .try_for_each(|(_, cache)| cache.store())?; - } + caches.persist()?; let duration = start.elapsed(); debug!("Checked {:?} files in: {:?}", checked_files, duration); @@ -266,13 +226,12 @@ mod test { use std::os::unix::fs::OpenOptionsExt; use anyhow::Result; - - use ruff_linter::settings::types::UnsafeFixes; use rustc_hash::FxHashMap; use tempfile::TempDir; use ruff_linter::message::{Emitter, EmitterContext, TextEmitter}; use ruff_linter::registry::Rule; + use ruff_linter::settings::types::UnsafeFixes; use ruff_linter::settings::{flags, LinterSettings}; use ruff_workspace::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy}; use ruff_workspace::Settings; diff --git a/crates/ruff_cli/src/commands/format.rs b/crates/ruff_cli/src/commands/format.rs index 21424ed421e75c..e9ff23ee4f45ba 100644 --- a/crates/ruff_cli/src/commands/format.rs +++ b/crates/ruff_cli/src/commands/format.rs @@ -10,7 +10,7 @@ use colored::Colorize; use itertools::Itertools; use log::error; use rayon::iter::Either::{Left, Right}; -use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use similar::TextDiff; use thiserror::Error; use tracing::debug; @@ -23,10 +23,11 @@ use ruff_linter::warn_user_once; use ruff_python_ast::{PySourceType, SourceType}; use ruff_python_formatter::{format_module_source, FormatModuleError}; use ruff_text_size::{TextLen, TextRange, TextSize}; -use ruff_workspace::resolver::{match_exclusion, python_files_in_path}; +use ruff_workspace::resolver::{match_exclusion, python_files_in_path, ResolvedFile}; use ruff_workspace::FormatterSettings; use crate::args::{CliOverrides, FormatArguments}; +use crate::cache::{Cache, FileCacheKey, PackageCacheMap, PackageCaches}; use crate::panic::{catch_unwind, PanicError}; use crate::resolve::resolve; use crate::ExitStatus; @@ -73,9 +74,34 @@ pub(crate) fn format( return Ok(ExitStatus::Success); } + // Discover the package root for each Python file. + let package_roots = resolver.package_roots( + &paths + .iter() + .flatten() + .map(ResolvedFile::path) + .collect::>(), + &pyproject_config, + ); + + let caches = if cli.no_cache { + None + } else { + // `--no-cache` doesn't respect code changes, and so is often confusing during + // development. + #[cfg(debug_assertions)] + crate::warn_user!("Detected debug build without --no-cache."); + + Some(PackageCacheMap::init( + &pyproject_config, + &package_roots, + &resolver, + )) + }; + let start = Instant::now(); let (mut results, mut errors): (Vec<_>, Vec<_>) = paths - .into_par_iter() + .par_iter() .filter_map(|entry| { match entry { Ok(resolved_file) => { @@ -110,6 +136,13 @@ pub(crate) fn format( } }; + let package = path + .parent() + .and_then(|parent| package_roots.get(parent).copied()) + .flatten(); + let cache_root = package.unwrap_or_else(|| path.parent().unwrap_or(path)); + let cache = caches.get(cache_root); + Some( match catch_unwind(|| { format_path( @@ -118,21 +151,22 @@ pub(crate) fn format( &unformatted, source_type, mode, + cache, ) }) { Ok(inner) => inner.map(|result| FormatPathResult { - path: resolved_file.into_path(), + path: resolved_file.path().to_path_buf(), unformatted, result, }), Err(error) => Err(FormatCommandError::Panic( - Some(resolved_file.into_path()), + Some(resolved_file.path().to_path_buf()), error, )), }, ) } - Err(err) => Some(Err(FormatCommandError::Ignore(err))), + Err(err) => Some(Err(FormatCommandError::Ignore(err.clone()))), } }) .partition_map(|result| match result { @@ -168,6 +202,8 @@ pub(crate) fn format( duration ); + caches.persist()?; + // Report on any errors. for error in &errors { error!("{error}"); @@ -214,13 +250,26 @@ pub(crate) fn format( /// Format the file at the given [`Path`]. #[tracing::instrument(level="debug", skip_all, fields(path = %path.display()))] -fn format_path( +pub(crate) fn format_path( path: &Path, settings: &FormatterSettings, unformatted: &SourceKind, source_type: PySourceType, mode: FormatMode, + cache: Option<&Cache>, ) -> Result { + if let Some(cache) = cache { + let relative_path = cache + .relative_path(path) + .expect("wrong package cache for file"); + + if let Ok(cache_key) = FileCacheKey::from_path(path) { + if cache.is_formatted(relative_path, &cache_key) { + return Ok(FormatResult::Unchanged); + } + } + } + // Format the source. let format_result = match format_source(unformatted, source_type, Some(path), settings)? { FormattedSource::Formatted(formatted) => match mode { @@ -231,13 +280,35 @@ fn format_path( formatted .write(&mut writer) .map_err(|err| FormatCommandError::Write(Some(path.to_path_buf()), err))?; + + if let Some(cache) = cache { + if let Ok(cache_key) = FileCacheKey::from_path(path) { + let relative_path = cache + .relative_path(path) + .expect("wrong package cache for file"); + cache.set_formatted(relative_path.to_path_buf(), &cache_key); + } + } + FormatResult::Formatted } FormatMode::Check => FormatResult::Formatted, FormatMode::Diff => FormatResult::Diff(formatted), }, - FormattedSource::Unchanged => FormatResult::Unchanged, + FormattedSource::Unchanged => { + if let Some(cache) = cache { + if let Ok(cache_key) = FileCacheKey::from_path(path) { + let relative_path = cache + .relative_path(path) + .expect("wrong package cache for file"); + cache.set_formatted(relative_path.to_path_buf(), &cache_key); + } + } + + FormatResult::Unchanged + } }; + Ok(format_result) } diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs index c8af1330adac48..b2cf812e09bff1 100644 --- a/crates/ruff_cli/src/diagnostics.rs +++ b/crates/ruff_cli/src/diagnostics.rs @@ -3,16 +3,14 @@ use std::fs::File; use std::io; use std::ops::{Add, AddAssign}; -#[cfg(unix)] -use std::os::unix::fs::PermissionsExt; use std::path::Path; use anyhow::{Context, Result}; use colored::Colorize; -use filetime::FileTime; use log::{debug, error, warn}; use rustc_hash::FxHashMap; +use crate::cache::{Cache, FileCacheKey, LintCacheData}; use ruff_diagnostics::Diagnostic; use ruff_linter::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult}; use ruff_linter::logging::DisplayParseError; @@ -23,7 +21,6 @@ use ruff_linter::settings::types::UnsafeFixes; use ruff_linter::settings::{flags, LinterSettings}; use ruff_linter::source_kind::{SourceError, SourceKind}; use ruff_linter::{fs, IOError, SyntaxError}; -use ruff_macros::CacheKey; use ruff_notebook::{Notebook, NotebookError, NotebookIndex}; use ruff_python_ast::imports::ImportMap; use ruff_python_ast::{SourceType, TomlSourceType}; @@ -31,33 +28,6 @@ use ruff_source_file::{LineIndex, SourceCode, SourceFileBuilder}; use ruff_text_size::{TextRange, TextSize}; use ruff_workspace::Settings; -use crate::cache::Cache; - -#[derive(CacheKey)] -pub(crate) struct FileCacheKey { - /// Timestamp when the file was last modified before the (cached) check. - file_last_modified: FileTime, - /// Permissions of the file before the (cached) check. - file_permissions_mode: u32, -} - -impl FileCacheKey { - fn from_path(path: &Path) -> io::Result { - // Construct a cache key for the file - let metadata = path.metadata()?; - - #[cfg(unix)] - let permissions = metadata.permissions().mode(); - #[cfg(windows)] - let permissions: u32 = metadata.permissions().readonly().into(); - - Ok(FileCacheKey { - file_last_modified: FileTime::from_last_modification_time(&metadata), - file_permissions_mode: permissions, - }) - } -} - #[derive(Debug, Default, PartialEq)] pub(crate) struct Diagnostics { pub(crate) messages: Vec, @@ -230,9 +200,11 @@ pub(crate) fn lint_path( .expect("wrong package cache for file"); let cache_key = FileCacheKey::from_path(path).context("Failed to create cache key")?; - - if let Some(cache) = cache.get(relative_path, &cache_key) { - return Ok(cache.as_diagnostics(path)); + let cached_diagnostics = cache + .get(relative_path, &cache_key) + .and_then(|entry| entry.to_diagnostics(path)); + if let Some(diagnostics) = cached_diagnostics { + return Ok(diagnostics); } // Stash the file metadata for later so when we update the cache it reflects the prerun @@ -332,12 +304,14 @@ pub(crate) fn lint_path( if let Some((cache, relative_path, key)) = caching { // We don't cache parsing errors. if parse_error.is_none() { - cache.update( + cache.update_lint( relative_path.to_owned(), - key, - &messages, - &imports, - source_kind.as_ipy_notebook().map(Notebook::index), + &key, + LintCacheData::from_messages( + &messages, + imports.clone(), + source_kind.as_ipy_notebook().map(Notebook::index).cloned(), + ), ); } } diff --git a/crates/ruff_cli/tests/format.rs b/crates/ruff_cli/tests/format.rs index 5fc8fc94fe3de0..6ec13b34356816 100644 --- a/crates/ruff_cli/tests/format.rs +++ b/crates/ruff_cli/tests/format.rs @@ -110,7 +110,7 @@ fn mixed_line_endings() -> Result<()> { assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) .current_dir(tempdir.path()) - .args(["format", "--diff", "--isolated"]) + .args(["format", "--no-cache", "--diff", "--isolated"]) .arg("."), @r###" success: true exit_code: 0 @@ -173,7 +173,7 @@ OTHER = "OTHER" assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) .current_dir(tempdir.path()) - .args(["format", "--check", "--config"]) + .args(["format", "--no-cache", "--check", "--config"]) .arg(ruff_toml.file_name().unwrap()) // Explicitly pass test.py, should be formatted regardless of it being excluded by format.exclude .arg(test_path.file_name().unwrap()) @@ -322,7 +322,7 @@ format = "json" #[test] fn test_diff() { - let args = ["format", "--isolated", "--diff"]; + let args = ["format", "--no-cache", "--isolated", "--diff"]; let fixtures = Path::new("resources").join("test").join("fixtures"); let paths = [ fixtures.join("unformatted.py"), @@ -365,7 +365,7 @@ fn test_diff() { #[test] fn test_diff_no_change() { - let args = ["format", "--isolated", "--diff"]; + let args = ["format", "--no-cache", "--isolated", "--diff"]; let fixtures = Path::new("resources").join("test").join("fixtures"); let paths = [fixtures.join("unformatted.py")]; insta::with_settings!({filters => vec![ diff --git a/crates/ruff_cli/tests/integration_test.rs b/crates/ruff_cli/tests/integration_test.rs index 36a25bd29e4ca5..d6b7b712b95ba9 100644 --- a/crates/ruff_cli/tests/integration_test.rs +++ b/crates/ruff_cli/tests/integration_test.rs @@ -1305,7 +1305,7 @@ extend-unsafe-fixes = ["UP034"] .args([ "--output-format", "text", - "--no-cache", + "--no-cache", "--select", "F601,UP034", ]) @@ -1344,7 +1344,7 @@ extend-safe-fixes = ["F601"] .args([ "--output-format", "text", - "--no-cache", + "--no-cache", "--select", "F601,UP034", ]) diff --git a/docs/configuration.md b/docs/configuration.md index 899de5c2e73422..63872a7f01fd6d 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -390,15 +390,17 @@ Options: -h, --help Print help +Miscellaneous: + -n, --no-cache Disable cache reads + --cache-dir Path to the cache directory [env: RUFF_CACHE_DIR=] + --isolated Ignore all configuration files + --stdin-filename The name of the file when passing it through stdin + File selection: --respect-gitignore Respect file exclusions via `.gitignore` and other standard ignore files. Use `--no-respect-gitignore` to disable --exclude List of paths, used to omit files and/or directories from analysis --force-exclude Enforce exclusions, even for paths passed to Ruff directly on the command-line. Use `--no-force-exclude` to disable -Miscellaneous: - --isolated Ignore all configuration files - --stdin-filename The name of the file when passing it through stdin - Log levels: -v, --verbose Enable verbose logging -q, --quiet Print diagnostics, but nothing else