Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes to in-memory caching and added tests for file caching #119

Merged
merged 24 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ target/
.DS_Store

# Testing directories
.venv/
./.venv/
tmp/
temp/
docs/node_modules/
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 1 addition & 4 deletions crates/pet-conda/tests/ci_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,10 +310,7 @@ fn detect_new_conda_env_created_with_p_flag_with_python() {
use pet_core::{
os_environment::EnvironmentApi, python_environment::PythonEnvironmentKind, Locator,
};
use pet_reporter::{
cache::{self, CacheReporter},
collect,
};
use pet_reporter::{cache::CacheReporter, collect};

setup();
let env_name = "env_with_python3";
Expand Down
48 changes: 41 additions & 7 deletions crates/pet-homebrew/src/environments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

use crate::sym_links::get_known_symlinks;
use lazy_static::lazy_static;
use log::trace;
use pet_core::python_environment::{
PythonEnvironment, PythonEnvironmentBuilder, PythonEnvironmentKind,
};
Expand All @@ -12,19 +13,21 @@ use std::path::{Path, PathBuf};

lazy_static! {
static ref PYTHON_VERSION: Regex =
Regex::new(r"/(\d+\.\d+\.\d+)/").expect("error parsing Version regex for Homebrew");
Regex::new(r"/(\d+\.\d+\.\d+)").expect("error parsing Version regex for Homebrew");
}

pub fn get_python_info(
python_exe_from_bin_dir: &Path,
resolved_exe: &Path,
) -> Option<PythonEnvironment> {
// let user_friendly_exe = python_exe_from_bin_dir;
let python_version = resolved_exe.to_string_lossy().to_string();
let version = match PYTHON_VERSION.captures(&python_version) {
Some(captures) => captures.get(1).map(|version| version.as_str().to_string()),
None => None,
};
let version = get_version(resolved_exe);

trace!(
"Getting homebrew python info for {:?} => {:?} with version {:?}",
python_exe_from_bin_dir,
resolved_exe,
version
);

let mut symlinks = vec![
python_exe_from_bin_dir.to_path_buf(),
Expand Down Expand Up @@ -57,6 +60,14 @@ pub fn get_python_info(
Some(env)
}

fn get_version(resolved_exe: &Path) -> Option<String> {
let python_version = resolved_exe.to_string_lossy().to_string();
match PYTHON_VERSION.captures(&python_version) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found a bug, as a result of the CI containers getting updated..

Some(captures) => captures.get(1).map(|version| version.as_str().to_string()),
None => None,
}
}

fn get_prefix(_resolved_file: &Path) -> Option<PathBuf> {
// NOTE:
// While testing found that on Mac Intel
Expand Down Expand Up @@ -135,3 +146,26 @@ fn get_prefix(_resolved_file: &Path) -> Option<PathBuf> {
// }
None
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
#[cfg(unix)]
fn extract_version() {
assert_eq!(
get_version(&PathBuf::from(
"/home/linuxbrew/.linuxbrew/Cellar/[email protected]/3.12.4/bin/python3"
)),
Some("3.12.4".to_string())
);

assert_eq!(
get_version(&PathBuf::from(
"/home/linuxbrew/.linuxbrew/Cellar/[email protected]/3.11.9_1/bin/python3.11"
)),
Some("3.11.9".to_string())
);
}
}
9 changes: 9 additions & 0 deletions crates/pet-python-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,12 @@ serde = { version = "1.0.152", features = ["derive"] }
log = "0.4.21"
serde_json = "1.0.93"
sha2 = "0.10.6"
env_logger = "0.10.2"

[features]
ci = []
ci-jupyter-container = []
ci-homebrew-container = []
ci-poetry-global = []
ci-poetry-project = []
ci-poetry-custom = []
89 changes: 55 additions & 34 deletions crates/pet-python-utils/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,18 @@
// Licensed under the MIT License.

use lazy_static::lazy_static;
use log::trace;
use log::{trace, warn};
use std::{
collections::{hash_map::Entry, HashMap, HashSet},
io,
path::PathBuf,
sync::{Arc, Mutex},
time::SystemTime,
};

use crate::{
env::ResolvedPythonEnv,
fs_cache::{get_cache_from_file, store_cache_in_file},
fs_cache::{delete_cache_file, get_cache_from_file, store_cache_in_file},
};

lazy_static! {
Expand All @@ -25,6 +26,10 @@ pub trait CacheEntry: Send + Sync {
fn track_symlinks(&self, symlinks: Vec<PathBuf>);
}

pub fn clear_cache() -> io::Result<()> {
CACHE.clear()
}

pub fn create_cache(executable: PathBuf) -> Arc<Mutex<Box<dyn CacheEntry>>> {
CACHE.create_cache(executable)
}
Expand Down Expand Up @@ -61,28 +66,39 @@ impl CacheImpl {
/// Once a cache directory has been set, you cannot change it.
/// No point supporting such a scenario.
fn set_cache_directory(&self, cache_dir: PathBuf) {
if let Some(cache_dir) = self.cache_dir.lock().unwrap().clone() {
warn!(
"Cache directory has already been set to {:?}. Cannot change it now.",
cache_dir
);
return;
}
trace!("Setting cache directory to {:?}", cache_dir);
self.cache_dir.lock().unwrap().replace(cache_dir);
}
fn clear(&self) -> io::Result<()> {
trace!("Clearing cache");
self.locks.lock().unwrap().clear();
if let Some(cache_directory) = self.cache_dir.lock().unwrap().clone() {
std::fs::remove_dir_all(cache_directory)
} else {
Ok(())
}
}
fn create_cache(&self, executable: PathBuf) -> LockableCacheEntry {
if let Some(cache_directory) = self.cache_dir.lock().unwrap().as_ref() {
match self.locks.lock().unwrap().entry(executable.clone()) {
Entry::Occupied(lock) => lock.get().clone(),
Entry::Vacant(lock) => {
let cache =
Box::new(CacheEntryImpl::create(cache_directory.clone(), executable))
as Box<(dyn CacheEntry + 'static)>;
lock.insert(Arc::new(Mutex::new(cache))).clone()
}
let cache_directory = self.cache_dir.lock().unwrap().clone();
match self.locks.lock().unwrap().entry(executable.clone()) {
Entry::Occupied(lock) => lock.get().clone(),
Entry::Vacant(lock) => {
let cache = Box::new(CacheEntryImpl::create(cache_directory.clone(), executable))
as Box<(dyn CacheEntry + 'static)>;
lock.insert(Arc::new(Mutex::new(cache))).clone()
}
} else {
Arc::new(Mutex::new(Box::new(CacheEntryImpl::empty(
executable.clone(),
))))
}
}
}

type FilePathWithMTimeCTime = (PathBuf, Option<SystemTime>, Option<SystemTime>);
type FilePathWithMTimeCTime = (PathBuf, SystemTime, SystemTime);

struct CacheEntryImpl {
cache_directory: Option<PathBuf>,
Expand All @@ -92,17 +108,9 @@ struct CacheEntryImpl {
symlinks: Arc<Mutex<Vec<FilePathWithMTimeCTime>>>,
}
impl CacheEntryImpl {
pub fn create(cache_directory: PathBuf, executable: PathBuf) -> impl CacheEntry {
pub fn create(cache_directory: Option<PathBuf>, executable: PathBuf) -> impl CacheEntry {
CacheEntryImpl {
cache_directory: Some(cache_directory),
executable,
envoronment: Arc::new(Mutex::new(None)),
symlinks: Arc::new(Mutex::new(Vec::new())),
}
}
pub fn empty(executable: PathBuf) -> impl CacheEntry {
CacheEntryImpl {
cache_directory: None,
cache_directory,
executable,
envoronment: Arc::new(Mutex::new(None)),
symlinks: Arc::new(Mutex::new(Vec::new())),
Expand All @@ -112,10 +120,21 @@ impl CacheEntryImpl {
// Check if any of the exes have changed since we last cached this.
for symlink_info in self.symlinks.lock().unwrap().iter() {
if let Ok(metadata) = symlink_info.0.metadata() {
if metadata.modified().ok() != symlink_info.1
|| metadata.created().ok() != symlink_info.2
if metadata.modified().ok() != Some(symlink_info.1)
|| metadata.created().ok() != Some(symlink_info.2)
{
trace!(
"Symlink {:?} has changed since we last cached it. original mtime & ctime {:?}, {:?}, current mtime & ctime {:?}, {:?}",
symlink_info.0,
symlink_info.1,
symlink_info.2,
metadata.modified().ok(),
metadata.created().ok()
);
self.envoronment.lock().unwrap().take();
if let Some(cache_directory) = &self.cache_directory {
delete_cache_file(cache_directory, &self.executable);
}
}
}
}
Expand Down Expand Up @@ -149,11 +168,12 @@ impl CacheEntry for CacheEntryImpl {
let mut symlinks = vec![];
for symlink in environment.symlinks.clone().unwrap_or_default().iter() {
if let Ok(metadata) = symlink.metadata() {
symlinks.push((
symlink.clone(),
metadata.modified().ok(),
metadata.created().ok(),
));
// We only care if we have the information
if let (Some(modified), Some(created)) =
(metadata.modified().ok(), metadata.created().ok())
{
symlinks.push((symlink.clone(), modified, created));
}
}
}

Expand All @@ -167,8 +187,9 @@ impl CacheEntry for CacheEntryImpl {
.unwrap()
.replace(environment.clone());

trace!("Caching interpreter info for {:?}", self.executable);

if let Some(ref cache_directory) = self.cache_directory {
trace!("Storing cache for {:?}", self.executable);
store_cache_in_file(cache_directory, &self.executable, &environment, symlinks)
}
}
Expand Down
63 changes: 49 additions & 14 deletions crates/pet-python-utils/src/fs_cache.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

use log::trace;
use log::{error, trace};
use pet_fs::path::norm_case;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
Expand All @@ -14,7 +14,7 @@ use std::{

use crate::env::ResolvedPythonEnv;

type FilePathWithMTimeCTime = (PathBuf, Option<SystemTime>, Option<SystemTime>);
type FilePathWithMTimeCTime = (PathBuf, SystemTime, SystemTime);

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
Expand All @@ -23,8 +23,13 @@ struct CacheEntry {
pub symlinks: Vec<FilePathWithMTimeCTime>,
}

fn generate_cache_file(cache_directory: &Path, executable: &PathBuf) -> PathBuf {
cache_directory.join(format!("{}.1.json", generate_hash(executable)))
pub fn generate_cache_file(cache_directory: &Path, executable: &PathBuf) -> PathBuf {
cache_directory.join(format!("{}.2.json", generate_hash(executable)))
}

pub fn delete_cache_file(cache_directory: &Path, executable: &PathBuf) {
let cache_file = generate_cache_file(cache_directory, executable);
let _ = fs::remove_file(cache_file);
}

pub fn get_cache_from_file(
Expand All @@ -35,11 +40,29 @@ pub fn get_cache_from_file(
let file = File::open(cache_file.clone()).ok()?;
let reader = BufReader::new(file);
let cache: CacheEntry = serde_json::from_reader(reader).ok()?;
// Account for conflicts in the cache file
// i.e. the hash generated is same for another file, remember we only take the first 16 chars.
if !cache
.environment
.clone()
.symlinks
.unwrap_or_default()
.contains(executable)
{
trace!(
"Cache file {:?} {:?}, does not match executable {:?} (possible hash collision)",
cache_file,
cache.environment,
executable
);
return None;
}

// Check if any of the exes have changed since we last cached them.
let cache_is_valid = cache.symlinks.iter().all(|symlink| {
if let Ok(metadata) = symlink.0.metadata() {
metadata.modified().ok() == symlink.1 && metadata.created().ok() == symlink.2
metadata.modified().ok() == Some(symlink.1)
&& metadata.created().ok() == Some(symlink.2)
} else {
// File may have been deleted.
false
Expand All @@ -62,15 +85,27 @@ pub fn store_cache_in_file(
symlinks_with_times: Vec<FilePathWithMTimeCTime>,
) {
let cache_file = generate_cache_file(cache_directory, executable);
let _ = std::fs::create_dir_all(cache_directory);

let cache = CacheEntry {
environment: environment.clone(),
symlinks: symlinks_with_times,
};
if let Ok(file) = std::fs::File::create(cache_file.clone()) {
trace!("Caching {:?} in {:?}", executable, cache_file);
let _ = serde_json::to_writer_pretty(file, &cache).ok();
match std::fs::create_dir_all(cache_directory) {
Ok(_) => {
let cache = CacheEntry {
environment: environment.clone(),
symlinks: symlinks_with_times,
};
match std::fs::File::create(cache_file.clone()) {
Ok(file) => {
trace!("Caching {:?} in {:?}", executable, cache_file);
match serde_json::to_writer_pretty(file, &cache) {
Ok(_) => (),
Err(err) => error!("Error writing cache file {:?} {:?}", cache_file, err),
}
}
Err(err) => error!("Error creating cache file {:?} {:?}", cache_file, err),
}
}
Err(err) => error!(
"Error creating cache directory {:?} {:?}",
cache_directory, err
),
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/pet-python-utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
pub mod cache;
pub mod env;
pub mod executable;
mod fs_cache;
pub mod fs_cache;
mod headers;
pub mod platform_dirs;
pub mod version;
Loading
Loading