From c639259e22d3bc1534a600aeae7e542bc3fb1171 Mon Sep 17 00:00:00 2001 From: Joshua Elliott Date: Tue, 3 Sep 2024 16:31:13 -0600 Subject: [PATCH] Merkle tree refactor - checkout --- LICENSE | 2 +- src/lib/src/core/v0_19_0.rs | 1 + src/lib/src/core/v0_19_0/branches.rs | 105 +++++++++++++++++- .../core/v0_19_0/index/commit_merkle_tree.rs | 27 +++++ src/lib/src/core/v0_19_0/restore.rs | 31 ++++++ .../model/merkle_tree/node/file_node_types.rs | 2 + src/lib/src/repositories/branches.rs | 2 +- 7 files changed, 164 insertions(+), 6 deletions(-) create mode 100644 src/lib/src/core/v0_19_0/restore.rs diff --git a/LICENSE b/LICENSE index 7a4a3ea24..a2dfa7f67 100644 --- a/LICENSE +++ b/LICENSE @@ -187,7 +187,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2024 Oxen Labs Inc Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/src/lib/src/core/v0_19_0.rs b/src/lib/src/core/v0_19_0.rs index 593e997d1..998095c57 100644 --- a/src/lib/src/core/v0_19_0.rs +++ b/src/lib/src/core/v0_19_0.rs @@ -12,6 +12,7 @@ pub mod init; pub mod metadata; pub mod pull; pub mod push; +pub mod restore; pub mod rm; pub mod status; pub mod structs; diff --git a/src/lib/src/core/v0_19_0/branches.rs b/src/lib/src/core/v0_19_0/branches.rs index d4c26ac8b..763b5762a 100644 --- a/src/lib/src/core/v0_19_0/branches.rs +++ b/src/lib/src/core/v0_19_0/branches.rs @@ -1,8 +1,17 @@ +use crate::core::refs::RefReader; +use crate::core::v0_19_0::index::merkle_tree::CommitMerkleTree; +use crate::core::v0_19_0::{commits, restore}; use crate::error::OxenError; -use crate::model::{Commit, CommitEntry, LocalRepository}; +use crate::model::{Commit, CommitEntry, LocalRepository, MerkleTreeNodeType}; +use crate::repositories; +use crate::util; use std::path::Path; +use super::index::merkle_tree::node::{ + FileChunkType, FileNode, FileStorageType, MerkleTreeNodeData, +}; + pub fn list_entry_versions_for_commit( local_repo: &LocalRepository, commit_id: &str, @@ -11,13 +20,101 @@ pub fn list_entry_versions_for_commit( todo!() } -pub async fn checkout(repo: &LocalRepository, name: &str) -> Result<(), OxenError> { - todo!() +pub async fn checkout(repo: &LocalRepository, branch_name: &str) -> Result<(), OxenError> { + let branch = repositories::branches::get_by_name(repo, branch_name)? + .ok_or(OxenError::local_branch_not_found(branch_name))?; + + checkout_commit_id(repo, &branch.commit_id).await?; + // Pull changes if needed + // TODO + + Ok(()) } pub async fn checkout_commit_id( repo: &LocalRepository, commit_id: impl AsRef, ) -> Result<(), OxenError> { - todo!() + let commit = repositories::commits::get_by_id(repo, &commit_id)? + .ok_or(OxenError::commit_id_does_not_exist(&commit_id))?; + + // Set working repo to commit + set_working_repo_to_commit(repo, &commit).await?; + Ok(()) +} + +pub async fn set_working_repo_to_commit( + repo: &LocalRepository, + commit: &Commit, +) -> Result<(), OxenError> { + let head_commit = commits::head_commit(repo)?; + if head_commit.id == commit.id { + log::debug!( + "set_working_repo_to_commit, do nothing... head commit == commit_id {}", + commit.id + ); + return Ok(()); + } + + let tree = CommitMerkleTree::from_commit(repo, commit)?; + + // Cleanup removed files + + // Restore missing or modified files + r_restore_missing_or_modified_files(repo, &tree.root, Path::new(""))?; + + // Remove untracked directories + + Ok(()) +} + +fn r_restore_missing_or_modified_files( + repo: &LocalRepository, + node: &MerkleTreeNodeData, + path: &Path, +) -> Result<(), OxenError> { + // Recursively iterate through the tree, checking each file against the working repo + // If the file is not in the working repo, restore it from the commit + // If the file is in the working repo, but the hash does not match, overwrite the file in the working repo with the file from the commit + // If the file is in the working repo, and the hash matches, do nothing + + match &node.dtype { + MerkleTreeNodeType::File => { + let file_node = node.file().unwrap(); + let rel_path = path.join(file_node.name.clone()); + let full_path = repo.path.join(&rel_path); + if !full_path.exists() { + // File doesn't exist, restore it + log::debug!("Restoring missing file: {:?}", rel_path); + restore::restore_file(repo, &file_node.hash, &rel_path)?; + } else { + // File exists, check if it needs to be updated + let current_hash = util::hasher::hash_file_contents(&full_path)?; + if current_hash != file_node.hash.to_string() { + log::debug!("Updating modified file: {:?}", rel_path); + restore::restore_file(repo, &file_node.hash, &rel_path)?; + } + } + } + MerkleTreeNodeType::Dir => { + // Recursively call for each file and directory + let children = CommitMerkleTree::node_files_and_folders(node)?; + let dir_node = node.dir().unwrap(); + let dir_path = path.join(dir_node.name); + for child_node in children { + r_restore_missing_or_modified_files(repo, &child_node, &dir_path)?; + } + } + MerkleTreeNodeType::Commit => { + // If we get a commit node, we need to skip to the root directory + let root_dir = CommitMerkleTree::get_root_dir_from_commit(node)?; + r_restore_missing_or_modified_files(repo, root_dir, path)?; + } + _ => { + return Err(OxenError::basic_str( + "Got an unexpected node type during checkout", + )); + } + } + Ok(()) } diff --git a/src/lib/src/core/v0_19_0/index/commit_merkle_tree.rs b/src/lib/src/core/v0_19_0/index/commit_merkle_tree.rs index cd15d8961..015f9e554 100644 --- a/src/lib/src/core/v0_19_0/index/commit_merkle_tree.rs +++ b/src/lib/src/core/v0_19_0/index/commit_merkle_tree.rs @@ -287,6 +287,33 @@ impl CommitMerkleTree { Ok(children) } + /// Get the root directory node given a commit node + pub fn get_root_dir_from_commit( + node: &MerkleTreeNodeData, + ) -> Result<&MerkleTreeNodeData, OxenError> { + if node.dtype != MerkleTreeNodeType::Commit { + return Err(OxenError::basic_str( + "Expected a commit node, but got a different type", + )); + } + + // A commit node should have exactly one child, which is the root directory + if node.children.len() != 1 { + return Err(OxenError::basic_str( + "Commit node should have exactly one child (root directory)", + )); + } + + let root_dir = &node.children[0]; + if root_dir.dtype != MerkleTreeNodeType::Dir { + return Err(OxenError::basic_str( + "The child of a commit node should be a directory", + )); + } + + Ok(root_dir) + } + pub fn total_vnodes(&self) -> usize { self.root.total_vnodes() } diff --git a/src/lib/src/core/v0_19_0/restore.rs b/src/lib/src/core/v0_19_0/restore.rs new file mode 100644 index 000000000..03ed647e2 --- /dev/null +++ b/src/lib/src/core/v0_19_0/restore.rs @@ -0,0 +1,31 @@ +use std::path::Path; + +use crate::error::OxenError; +use crate::model::{Commit, CommitEntry, LocalRepository, MerkleHash}; +use crate::util; + +// TODO: probably need to pass a data node here instead of a hash to get the metadata +pub fn restore_file( + repo: &LocalRepository, + hash: &MerkleHash, + dst_path: &Path, +) -> Result<(), OxenError> { + let version_path = util::fs::version_path_from_hash(repo, hash); + if !version_path.exists() { + return Err(OxenError::basic_str(&format!( + "Source file not found in versions directory: {:?}", + version_path + ))); + } + + let working_path = repo.path.join(dst_path); + if let Some(parent) = dst_path.parent() { + util::fs::create_dir_all(parent)?; + } + + util::fs::copy(version_path, working_path.clone())?; + // TODO: set file metadata + // Previous version used: + // CommitEntryWriter::set_file_timestamps(repo, path, entry, files_db)?; + Ok(()) +} diff --git a/src/lib/src/model/merkle_tree/node/file_node_types.rs b/src/lib/src/model/merkle_tree/node/file_node_types.rs index 8778af88c..a4ab0ae22 100644 --- a/src/lib/src/model/merkle_tree/node/file_node_types.rs +++ b/src/lib/src/model/merkle_tree/node/file_node_types.rs @@ -9,11 +9,13 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] pub enum FileChunkType { SingleFile, + // Chunked type is not used yet Chunked, } #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] pub enum FileStorageType { Disk, + // S3 is not used yet S3, } diff --git a/src/lib/src/repositories/branches.rs b/src/lib/src/repositories/branches.rs index fc84be81f..0d334eb96 100644 --- a/src/lib/src/repositories/branches.rs +++ b/src/lib/src/repositories/branches.rs @@ -241,7 +241,7 @@ pub fn is_locked(repo: &LocalRepository, name: &str) -> Result name, branch_lock_file.display() ); - // Branch is locked if file eixsts + // Branch is locked if file exists Ok(branch_lock_file.exists()) }