Skip to content

Commit

Permalink
fix uploading same file twice without changes
Browse files Browse the repository at this point in the history
  • Loading branch information
gschoeni committed Dec 26, 2024
1 parent 2a12e64 commit 66c369c
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 3 deletions.
Binary file added data/test/parquet/sft 100.parquet
Binary file not shown.
87 changes: 85 additions & 2 deletions src/lib/src/api/client/workspaces/commits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@ mod tests {
assert!(remote_commit.is_some());
assert_eq!(commit.id, remote_commit.unwrap().id);

println!("DONE!!");

Ok(remote_repo)
})
.await
Expand Down Expand Up @@ -174,4 +172,89 @@ mod tests {
})
.await
}

#[tokio::test]
async fn test_commit_same_data_frame_file_twice() -> Result<(), OxenError> {
test::run_remote_created_and_readme_remote_repo_test(|remote_repo| async move {
let branch_name = "main";
let branch = api::client::branches::create_from_branch(
&remote_repo,
branch_name,
DEFAULT_BRANCH_NAME,
)
.await?;
assert_eq!(branch.name, branch_name);

let workspace_id = UserConfig::identifier()?;
let directory_name = "";
let paths = vec![test::test_100_parquet()];
api::client::workspaces::create(&remote_repo, &branch_name, &workspace_id).await?;
let result = api::client::workspaces::files::add_many(
&remote_repo,
&workspace_id,
directory_name,
paths,
)
.await;
assert!(result.is_ok());

let body = NewCommitBody {
message: "Adding 100 row parquet".to_string(),
author: "Test User".to_string(),
email: "[email protected]".to_string(),
};
let commit =
api::client::workspaces::commit(&remote_repo, branch_name, &workspace_id, &body)
.await?;

let remote_commit = api::client::commits::get_by_id(&remote_repo, &commit.id).await?;
assert!(remote_commit.is_some());
assert_eq!(commit.id, remote_commit.unwrap().id);

// List the files on main
let revision = "main";
let path = "";
let page = 1;
let page_size = 100;
let entries =
api::client::dir::list(&remote_repo, revision, path, page, page_size).await?;

// There should be the README and the parquet file
assert_eq!(entries.total_entries, 2);
assert_eq!(entries.entries.len(), 2);

// Add the same file again
let workspace_id = UserConfig::identifier()? + "2";
api::client::workspaces::create(&remote_repo, &branch_name, &workspace_id).await?;
let paths = vec![test::test_100_parquet()];
let result = api::client::workspaces::files::add_many(
&remote_repo,
&workspace_id,
directory_name,
paths,
)
.await;
assert!(result.is_ok());

// Commit the changes
let body = NewCommitBody {
message: "Adding 100 row parquet AGAIN".to_string(),
author: "Test User".to_string(),
email: "[email protected]".to_string(),
};
let result =
api::client::workspaces::commit(&remote_repo, branch_name, &workspace_id, &body)
.await;
assert!(result.is_err());

// List the files on main
let entries =
api::client::dir::list(&remote_repo, revision, path, page, page_size).await?;
assert_eq!(entries.total_entries, 2);
assert_eq!(entries.entries.len(), 2);

Ok(remote_repo)
})
.await
}
}
4 changes: 4 additions & 0 deletions src/lib/src/core/v0_19_0/index/commit_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,10 @@ pub fn commit_dir_entries(
);
}

if dir_entries.is_empty() {
return Err(OxenError::basic_str("No changes to commit"));
}

let message = &new_commit.message;
// if the HEAD file exists, we have parents
// otherwise this is the first commit
Expand Down
4 changes: 4 additions & 0 deletions src/lib/src/core/v0_19_0/workspaces.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ pub fn commit(
&commit_progress_bar,
)?;

if dir_entries.is_empty() {
return Err(OxenError::basic_str("No changes to commit"));
}

core::v0_19_0::index::commit_writer::commit_dir_entries(
&workspace.base_repo,
dir_entries,
Expand Down
10 changes: 9 additions & 1 deletion src/lib/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ where
Ok(())
}

/// Test interacting with a remote repo that has nothing synced
/// Test interacting with a remote repo that has a README
pub async fn run_remote_created_and_readme_remote_repo_test<T, Fut>(
test: T,
) -> Result<(), OxenError>
Expand Down Expand Up @@ -1617,6 +1617,14 @@ pub fn test_200k_csv() -> PathBuf {
.join("celeb_a_200k.csv")
}

/// Returns: data/test/parquet/sft 100.parquet
pub fn test_100_parquet() -> PathBuf {
Path::new("data")
.join("test")
.join("parquet")
.join("sft 100.parquet")
}

/// Returns: data/test/parquet/wiki_1k.parquet
pub fn test_1k_parquet() -> PathBuf {
Path::new("data")
Expand Down

0 comments on commit 66c369c

Please sign in to comment.