Skip to content

Commit

Permalink
Merkle tree refactor - checkout
Browse files Browse the repository at this point in the history
  • Loading branch information
jcelliott committed Sep 4, 2024
1 parent 7ac0f4c commit c639259
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 6 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright [yyyy] [name of copyright owner]
Copyright 2024 Oxen Labs Inc

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
1 change: 1 addition & 0 deletions src/lib/src/core/v0_19_0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub mod init;
pub mod metadata;
pub mod pull;
pub mod push;
pub mod restore;
pub mod rm;
pub mod status;
pub mod structs;
Expand Down
105 changes: 101 additions & 4 deletions src/lib/src/core/v0_19_0/branches.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
use crate::core::refs::RefReader;
use crate::core::v0_19_0::index::merkle_tree::CommitMerkleTree;
use crate::core::v0_19_0::{commits, restore};
use crate::error::OxenError;
use crate::model::{Commit, CommitEntry, LocalRepository};
use crate::model::{Commit, CommitEntry, LocalRepository, MerkleTreeNodeType};
use crate::repositories;
use crate::util;

use std::path::Path;

use super::index::merkle_tree::node::{
FileChunkType, FileNode, FileStorageType, MerkleTreeNodeData,
};

pub fn list_entry_versions_for_commit(
local_repo: &LocalRepository,
commit_id: &str,
Expand All @@ -11,13 +20,101 @@ pub fn list_entry_versions_for_commit(
todo!()
}

pub async fn checkout(repo: &LocalRepository, name: &str) -> Result<(), OxenError> {
todo!()
pub async fn checkout(repo: &LocalRepository, branch_name: &str) -> Result<(), OxenError> {
let branch = repositories::branches::get_by_name(repo, branch_name)?
.ok_or(OxenError::local_branch_not_found(branch_name))?;

checkout_commit_id(repo, &branch.commit_id).await?;
// Pull changes if needed
// TODO

Ok(())
}

pub async fn checkout_commit_id(
repo: &LocalRepository,
commit_id: impl AsRef<str>,
) -> Result<(), OxenError> {
todo!()
let commit = repositories::commits::get_by_id(repo, &commit_id)?
.ok_or(OxenError::commit_id_does_not_exist(&commit_id))?;

// Set working repo to commit
set_working_repo_to_commit(repo, &commit).await?;
Ok(())
}

pub async fn set_working_repo_to_commit(
repo: &LocalRepository,
commit: &Commit,
) -> Result<(), OxenError> {
let head_commit = commits::head_commit(repo)?;
if head_commit.id == commit.id {
log::debug!(
"set_working_repo_to_commit, do nothing... head commit == commit_id {}",
commit.id
);
return Ok(());
}

let tree = CommitMerkleTree::from_commit(repo, commit)?;

// Cleanup removed files

// Restore missing or modified files
r_restore_missing_or_modified_files(repo, &tree.root, Path::new(""))?;

// Remove untracked directories

Ok(())
}

fn r_restore_missing_or_modified_files(
repo: &LocalRepository,
node: &MerkleTreeNodeData,
path: &Path,
) -> Result<(), OxenError> {
// Recursively iterate through the tree, checking each file against the working repo
// If the file is not in the working repo, restore it from the commit
// If the file is in the working repo, but the hash does not match, overwrite the file in the working repo with the file from the commit
// If the file is in the working repo, and the hash matches, do nothing

match &node.dtype {
MerkleTreeNodeType::File => {
let file_node = node.file().unwrap();
let rel_path = path.join(file_node.name.clone());
let full_path = repo.path.join(&rel_path);
if !full_path.exists() {
// File doesn't exist, restore it
log::debug!("Restoring missing file: {:?}", rel_path);
restore::restore_file(repo, &file_node.hash, &rel_path)?;
} else {
// File exists, check if it needs to be updated
let current_hash = util::hasher::hash_file_contents(&full_path)?;
if current_hash != file_node.hash.to_string() {
log::debug!("Updating modified file: {:?}", rel_path);
restore::restore_file(repo, &file_node.hash, &rel_path)?;
}
}
}
MerkleTreeNodeType::Dir => {
// Recursively call for each file and directory
let children = CommitMerkleTree::node_files_and_folders(node)?;
let dir_node = node.dir().unwrap();
let dir_path = path.join(dir_node.name);
for child_node in children {
r_restore_missing_or_modified_files(repo, &child_node, &dir_path)?;
}
}
MerkleTreeNodeType::Commit => {
// If we get a commit node, we need to skip to the root directory
let root_dir = CommitMerkleTree::get_root_dir_from_commit(node)?;
r_restore_missing_or_modified_files(repo, root_dir, path)?;
}
_ => {
return Err(OxenError::basic_str(
"Got an unexpected node type during checkout",
));
}
}
Ok(())
}
27 changes: 27 additions & 0 deletions src/lib/src/core/v0_19_0/index/commit_merkle_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,33 @@ impl CommitMerkleTree {
Ok(children)
}

/// Get the root directory node given a commit node
pub fn get_root_dir_from_commit(
node: &MerkleTreeNodeData,
) -> Result<&MerkleTreeNodeData, OxenError> {
if node.dtype != MerkleTreeNodeType::Commit {
return Err(OxenError::basic_str(
"Expected a commit node, but got a different type",
));
}

// A commit node should have exactly one child, which is the root directory
if node.children.len() != 1 {
return Err(OxenError::basic_str(
"Commit node should have exactly one child (root directory)",
));
}

let root_dir = &node.children[0];
if root_dir.dtype != MerkleTreeNodeType::Dir {
return Err(OxenError::basic_str(
"The child of a commit node should be a directory",
));
}

Ok(root_dir)
}

pub fn total_vnodes(&self) -> usize {
self.root.total_vnodes()
}
Expand Down
31 changes: 31 additions & 0 deletions src/lib/src/core/v0_19_0/restore.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use std::path::Path;

use crate::error::OxenError;
use crate::model::{Commit, CommitEntry, LocalRepository, MerkleHash};
use crate::util;

// TODO: probably need to pass a data node here instead of a hash to get the metadata
pub fn restore_file(
repo: &LocalRepository,
hash: &MerkleHash,
dst_path: &Path,
) -> Result<(), OxenError> {
let version_path = util::fs::version_path_from_hash(repo, hash);
if !version_path.exists() {
return Err(OxenError::basic_str(&format!(
"Source file not found in versions directory: {:?}",
version_path
)));
}

let working_path = repo.path.join(dst_path);
if let Some(parent) = dst_path.parent() {
util::fs::create_dir_all(parent)?;
}

util::fs::copy(version_path, working_path.clone())?;
// TODO: set file metadata
// Previous version used:
// CommitEntryWriter::set_file_timestamps(repo, path, entry, files_db)?;
Ok(())
}
2 changes: 2 additions & 0 deletions src/lib/src/model/merkle_tree/node/file_node_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
pub enum FileChunkType {
SingleFile,
// Chunked type is not used yet
Chunked,
}

#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
pub enum FileStorageType {
Disk,
// S3 is not used yet
S3,
}
2 changes: 1 addition & 1 deletion src/lib/src/repositories/branches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ pub fn is_locked(repo: &LocalRepository, name: &str) -> Result<bool, OxenError>
name,
branch_lock_file.display()
);
// Branch is locked if file eixsts
// Branch is locked if file exists
Ok(branch_lock_file.exists())
}

Expand Down

0 comments on commit c639259

Please sign in to comment.