Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use more threads when discovering python files #12258

Merged
merged 3 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions crates/red_knot/src/workspace.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
use std::collections::BTreeMap;

use ruff_db::system::{SystemPath, SystemPathBuf};

use crate::program::Program;

pub struct Workspace {
/// The root path of the workspace.
///
/// This is the path to the common ancestor directory of all programs containing a ruff configuration file
/// or the current working directory if the programs have no common ancestor directory (e.g. `C:\foo` and `D:\bar`)
/// or no configuration file. This is the same as [`Program::path`] if there is only one program in the workspace.
path: SystemPathBuf,

/// The programs that are part of this workspace.
///
/// The key is the directory containing the program.
programs: BTreeMap<SystemPathBuf, Program>,
}

impl Workspace {
pub fn new(path: impl AsRef<SystemPath>) -> Self {
Self {
path: path.as_ref().to_path_buf(),
programs: BTreeMap::default(),
}
}

pub fn add_program(&mut self, program: Program) {
self.programs.insert(program.path().to_path_buf(), program);
}

pub fn path(&self) -> &SystemPath {
&self.path
}

/// Returns the closest program that contains the given path.
pub fn program(&self, path: impl AsRef<SystemPath>) -> Option<&Program> {
let path = path.as_ref();
for (program_path, program) in self.programs.range(..=path.to_path_buf()).rev() {
if path.starts_with(program_path) {
return Some(program);
}
}

None
}

pub fn program_mut(&mut self, path: impl AsRef<SystemPath>) -> Option<&mut Program> {
let path = path.as_ref();
for (program_path, program) in self.programs.range_mut(..=path.to_path_buf()).rev() {
if path.starts_with(program_path) {
return Some(program);
}
}

None
}
}
265 changes: 174 additions & 91 deletions crates/ruff_workspace/src/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::sync::RwLock;
use anyhow::Result;
use anyhow::{anyhow, bail};
use globset::{Candidate, GlobSet};
use ignore::{WalkBuilder, WalkState};
use ignore::{DirEntry, Error, ParallelVisitor, WalkBuilder, WalkState};
use itertools::Itertools;
use log::debug;
use matchit::{InsertError, Match, Router};
Expand Down Expand Up @@ -378,119 +378,202 @@ pub fn python_files_in_path<'a>(
}
builder.standard_filters(resolver.respect_gitignore());
builder.hidden(false);

builder.threads(
std::thread::available_parallelism()
.map_or(1, std::num::NonZero::get)
.min(12),
);

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New default for the number of threads

let walker = builder.build_parallel();

// Run the `WalkParallel` to collect all Python files.
let is_hierarchical = resolver.is_hierarchical();
let error: std::sync::Mutex<Result<()>> = std::sync::Mutex::new(Ok(()));
let resolver: RwLock<Resolver> = RwLock::new(resolver);
let files: std::sync::Mutex<Vec<Result<ResolvedFile, ignore::Error>>> =
std::sync::Mutex::new(vec![]);
walker.run(|| {
Box::new(|result| {
// Respect our own exclusion behavior.
if let Ok(entry) = &result {
if entry.depth() > 0 {
let path = entry.path();
let resolver = resolver.read().unwrap();
let settings = resolver.resolve(path);
if let Some(file_name) = path.file_name() {
let file_path = Candidate::new(path);
let file_basename = Candidate::new(file_name);
if match_candidate_exclusion(
&file_path,
&file_basename,
&settings.file_resolver.exclude,
) {
debug!("Ignored path via `exclude`: {:?}", path);
return WalkState::Skip;
} else if match_candidate_exclusion(
&file_path,
&file_basename,
&settings.file_resolver.extend_exclude,
) {
debug!("Ignored path via `extend-exclude`: {:?}", path);
return WalkState::Skip;
}
} else {
debug!("Ignored path due to error in parsing: {:?}", path);

let state = WalkPythonFilesState::new(resolver);
let mut visitor = PythonFilesVisitorBuilder::new(transformer, &state);
walker.visit(&mut visitor);

state.finish()
}

type ResolvedFiles = Vec<Result<ResolvedFile, ignore::Error>>;

struct WalkPythonFilesState<'config> {
is_hierarchical: bool,
merged: std::sync::Mutex<(ResolvedFiles, Result<()>)>,
resolver: RwLock<Resolver<'config>>,
}

impl<'config> WalkPythonFilesState<'config> {
fn new(resolver: Resolver<'config>) -> Self {
Self {
is_hierarchical: resolver.is_hierarchical(),
merged: std::sync::Mutex::new((Vec::new(), Ok(()))),
resolver: RwLock::new(resolver),
}
}

fn finish(self) -> Result<(Vec<Result<ResolvedFile, ignore::Error>>, Resolver<'config>)> {
let (files, error) = self.merged.into_inner().unwrap();
error?;

Ok((files, self.resolver.into_inner().unwrap()))
}
}

struct PythonFilesVisitorBuilder<'s, 'config> {
state: &'s WalkPythonFilesState<'config>,
transformer: &'s dyn ConfigurationTransformer,
}

impl<'s, 'config> PythonFilesVisitorBuilder<'s, 'config> {
fn new(
transformer: &'s dyn ConfigurationTransformer,
state: &'s WalkPythonFilesState<'config>,
) -> Self {
Self { state, transformer }
}
}

struct PythonFilesVisitor<'s, 'config> {
local_files: Vec<Result<ResolvedFile, ignore::Error>>,
local_error: Result<()>,
global: &'s WalkPythonFilesState<'config>,
transformer: &'s dyn ConfigurationTransformer,
}

impl<'config, 's> ignore::ParallelVisitorBuilder<'s> for PythonFilesVisitorBuilder<'s, 'config>
where
'config: 's,
{
fn build(&mut self) -> Box<dyn ignore::ParallelVisitor + 's> {
Box::new(PythonFilesVisitor {
local_files: vec![],
local_error: Ok(()),
global: self.state,
transformer: self.transformer,
})
}
}

impl ParallelVisitor for PythonFilesVisitor<'_, '_> {
fn visit(&mut self, result: std::result::Result<DirEntry, Error>) -> WalkState {
// Respect our own exclusion behavior.
if let Ok(entry) = &result {
if entry.depth() > 0 {
let path = entry.path();
let resolver = self.global.resolver.read().unwrap();
let settings = resolver.resolve(path);
if let Some(file_name) = path.file_name() {
let file_path = Candidate::new(path);
let file_basename = Candidate::new(file_name);
if match_candidate_exclusion(
&file_path,
&file_basename,
&settings.file_resolver.exclude,
) {
debug!("Ignored path via `exclude`: {:?}", path);
return WalkState::Skip;
} else if match_candidate_exclusion(
&file_path,
&file_basename,
&settings.file_resolver.extend_exclude,
) {
debug!("Ignored path via `extend-exclude`: {:?}", path);
return WalkState::Skip;
}
} else {
debug!("Ignored path due to error in parsing: {:?}", path);
return WalkState::Skip;
}
}
}

// Search for the `pyproject.toml` file in this directory, before we visit any
// of its contents.
if is_hierarchical {
if let Ok(entry) = &result {
if entry
.file_type()
.is_some_and(|file_type| file_type.is_dir())
{
match settings_toml(entry.path()) {
Ok(Some(pyproject)) => match resolve_scoped_settings(
&pyproject,
Relativity::Parent,
transformer,
) {
Ok((root, settings)) => {
resolver.write().unwrap().add(root, settings);
}
Err(err) => {
*error.lock().unwrap() = Err(err);
return WalkState::Quit;
}
},
Ok(None) => {}
// Search for the `pyproject.toml` file in this directory, before we visit any
// of its contents.
if self.global.is_hierarchical {
if let Ok(entry) = &result {
if entry
.file_type()
.is_some_and(|file_type| file_type.is_dir())
{
match settings_toml(entry.path()) {
Ok(Some(pyproject)) => match resolve_scoped_settings(
&pyproject,
Relativity::Parent,
self.transformer,
) {
Ok((root, settings)) => {
self.global.resolver.write().unwrap().add(root, settings);
}
Err(err) => {
*error.lock().unwrap() = Err(err);
self.local_error = Err(err);
return WalkState::Quit;
}
},
Ok(None) => {}
Err(err) => {
self.local_error = Err(err);
return WalkState::Quit;
}
}
}
}
}

match result {
Ok(entry) => {
// Ignore directories
let resolved = if entry.file_type().map_or(true, |ft| ft.is_dir()) {
None
} else if entry.depth() == 0 {
// Accept all files that are passed-in directly.
Some(ResolvedFile::Root(entry.into_path()))
match result {
Ok(entry) => {
// Ignore directories
let resolved = if entry.file_type().map_or(true, |ft| ft.is_dir()) {
None
} else if entry.depth() == 0 {
// Accept all files that are passed-in directly.
Some(ResolvedFile::Root(entry.into_path()))
} else {
// Otherwise, check if the file is included.
let path = entry.path();
let resolver = self.global.resolver.read().unwrap();
let settings = resolver.resolve(path);
if settings.file_resolver.include.is_match(path) {
debug!("Included path via `include`: {:?}", path);
Some(ResolvedFile::Nested(entry.into_path()))
} else if settings.file_resolver.extend_include.is_match(path) {
debug!("Included path via `extend-include`: {:?}", path);
Some(ResolvedFile::Nested(entry.into_path()))
} else {
// Otherwise, check if the file is included.
let path = entry.path();
let resolver = resolver.read().unwrap();
let settings = resolver.resolve(path);
if settings.file_resolver.include.is_match(path) {
debug!("Included path via `include`: {:?}", path);
Some(ResolvedFile::Nested(entry.into_path()))
} else if settings.file_resolver.extend_include.is_match(path) {
debug!("Included path via `extend-include`: {:?}", path);
Some(ResolvedFile::Nested(entry.into_path()))
} else {
None
}
};

if let Some(resolved) = resolved {
files.lock().unwrap().push(Ok(resolved));
None
}
};

if let Some(resolved) = resolved {
self.local_files.push(Ok(resolved));
}
Err(err) => {
files.lock().unwrap().push(Err(err));
}
}
Err(err) => {
self.local_files.push(Err(err));
}
}

WalkState::Continue
})
});
WalkState::Continue
}
}

error.into_inner().unwrap()?;
impl Drop for PythonFilesVisitor<'_, '_> {
fn drop(&mut self) {
let mut merged = self.global.merged.lock().unwrap();
let (ref mut files, ref mut error) = &mut *merged;

Ok((files.into_inner().unwrap(), resolver.into_inner().unwrap()))
if files.is_empty() {
*files = std::mem::take(&mut self.local_files);
} else {
files.append(&mut self.local_files);
}

let local_error = std::mem::replace(&mut self.local_error, Ok(()));
if error.is_ok() {
*error = local_error;
}
}
}

#[derive(Clone, Debug, PartialEq, Eq)]
Expand Down
Loading