Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(cli): Migrate annexAdd to its own module #3024

Merged
merged 2 commits into from
Apr 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions cli/src/worker/annex.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { assertEquals, join } from "../deps.ts"
import { annexRelativePath, hashDirLower, hashDirMixed } from "./annex.ts"

Deno.test("annexRelativePath() returns appropriate paths", () => {
assertEquals(
annexRelativePath("sub-01/anat/sub-01_T1w.nii.gz"),
join("..", ".."),
)
})

Deno.test("hashDirLower() returns the correct key prefix", async () => {
assertEquals(
await hashDirLower(
"SHA256E-s311112--c3527d7944a9619afb57863a34e6af7ec3fe4f108e56c860d9e700699ff806fb.nii.gz",
),
["2ed", "6ea"],
)
})

Deno.test("hashDirMixed() returns the correct key prefix", async () => {
assertEquals(
await hashDirMixed(
"SHA256E-s311112--c3527d7944a9619afb57863a34e6af7ec3fe4f108e56c860d9e700699ff806fb.nii.gz",
),
["Xk", "Mx"],
)
})
144 changes: 144 additions & 0 deletions cli/src/worker/annex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import { GitWorkerContext } from "./types/git-context.ts"
import { basename, dirname, git, join, relative } from "../deps.ts"
import { logger } from "../logger.ts"

/**
* Why are we using hash wasm over web crypto?
* Web crypto cannot do streaming hashes of the common git-annex functions yet.
*/
import { createMD5, createSHA256 } from "npm:hash-wasm"

/**
* Reusable hash factories
*/
const computeHashMD5 = await createMD5()
const computeHashSHA256 = await createSHA256()

/**
* git-annex hashDirLower implementation based on https://git-annex.branchable.com/internals/hashing/
* Compute the directory path from a git-annex filename
*/
export async function hashDirLower(
annexKey: string,
): Promise<[string, string]> {
const computeMD5 = await createMD5()
computeMD5.init()
computeMD5.update(annexKey)
const digest = computeMD5.digest("hex")
return [digest.slice(0, 3), digest.slice(3, 6)]
}

/**
* git-annex hashDirMixed implementation based on https://git-annex.branchable.com/internals/hashing/
*/
export async function hashDirMixed(
annexKey: string,
): Promise<[string, string]> {
const computeMD5 = await createMD5()
computeMD5.init()
computeMD5.update(annexKey)
const digest = computeMD5.digest("binary")
const firstWord = new DataView(digest.buffer).getUint32(0, true)
const nums = Array.from({ length: 4 }, (_, i) => (firstWord >> (6 * i)) & 31)
const letters = nums.map(
(num) => "0123456789zqjxkmvwgpfZQJXKMVWGPF".charAt(num),
)
return [`${letters[1]}${letters[0]}`, `${letters[3]}${letters[2]}`]
}

/**
* Return the relative path to the .git/annex directory from a repo relative path
*
* Used for symlink path cr\eation
*/
export function annexRelativePath(path: string) {
return relative(dirname(join("/", path)), "/")
}

/**
* Add a file to a configured annex
* @param annexKeys Object with key to
* @param hash Git annex hash string (e.g. MD5E or SHA256)
* @param path Absolute path to the file being added
* @param relativePath Repo relative path for file being added
* @param size File size (to avoid additional stat call)
* @param context GitWorkerContext objects
*/
export async function annexAdd(
annexKeys: Record<string, string>,
hash: string,
path: string,
relativePath: string,
size: number,
context: GitWorkerContext,
): Promise<boolean> {
// E in the backend means include the file extension
let extension = ""
if (hash.endsWith("E")) {
const filename = basename(relativePath)
extension = filename.substring(filename.indexOf("."))
}
// Compute hash
const computeHash = hash.startsWith("MD5")
? computeHashMD5
: computeHashSHA256
computeHash.init()
const stream = context.fs.createReadStream(path, {
highWaterMark: 1024 * 1024 * 10,
})
for await (const data of stream) {
computeHash.update(data)
}
const digest = computeHash.digest("hex")
const annexKey = `${hash}-s${size}--${digest}${extension}`
const annexPath = join(
".git",
"annex",
"objects",
...(await hashDirMixed(annexKey)),
annexKey,
annexKey,
)
// Path to this file in our repo
const fileRepoPath = join(context.repoPath, relativePath)

let link
let forceAdd = false
try {
// Test if the repo already has this object
link = await context.fs.promises.readlink(fileRepoPath)
} catch (_err) {
forceAdd = true
}

// Calculate the relative symlinks for our file
const symlinkTarget = join(
annexRelativePath(relativePath),
annexPath,
)

// Key has changed if the existing link points to another object
if (forceAdd || link !== symlinkTarget) {
// Upload this key after the git commit
annexKeys[annexKey] = path
// This object has a new annex hash, update the symlink and add it
const symlinkTarget = join(
annexRelativePath(relativePath),
annexPath,
)
// Verify parent directories exist
await context.fs.promises.mkdir(dirname(fileRepoPath), { recursive: true })
// Remove the existing symlink or git file
await context.fs.promises.rm(fileRepoPath, { force: true })
// Create our new symlink pointing at the right annex object
await context.fs.promises.symlink(symlinkTarget, fileRepoPath)
const options = {
...context.config(),
filepath: relativePath,
}
await git.add(options)
return true
} else {
return false
}
}
23 changes: 0 additions & 23 deletions cli/src/worker/git.test.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,7 @@
import { annexRelativePath, hashDirLower, hashDirMixed } from "./git.ts"
import { assertArrayIncludes, assertEquals, git, join, walk, SEPARATOR } from "../deps.ts"
import { addGitFiles } from "../commands/upload.ts"
import fs from "node:fs"

Deno.test("annexRelativePath() returns appropriate paths", () => {
assertEquals(annexRelativePath("sub-01/anat/sub-01_T1w.nii.gz"), join('..', '..'))
})

Deno.test("hashDirLower() returns the correct key prefix", async () => {
assertEquals(
await hashDirLower(
"SHA256E-s311112--c3527d7944a9619afb57863a34e6af7ec3fe4f108e56c860d9e700699ff806fb.nii.gz",
),
["2ed", "6ea"],
)
})

Deno.test("hashDirMixed() returns the correct key prefix", async () => {
assertEquals(
await hashDirMixed(
"SHA256E-s311112--c3527d7944a9619afb57863a34e6af7ec3fe4f108e56c860d9e700699ff806fb.nii.gz",
),
["Xk", "Mx"],
)
})

Deno.test("adds git and annexed content given a directory of files", async () => {
const testUpload = await Deno.makeTempDir()
const testRepo = await Deno.makeTempDir()
Expand Down
Loading
Loading