Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support hardlinks in tarballs #11077

Merged
merged 9 commits into from
Jul 11, 2024
59 changes: 57 additions & 2 deletions src/libfetchers/git-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ git_oid hashToOID(const Hash & hash)
return oid;
}

Object lookupObject(git_repository * repo, const git_oid & oid)
Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t type = GIT_OBJECT_ANY)
{
Object obj;
if (git_object_lookup(Setter(obj), repo, &oid, GIT_OBJECT_ANY)) {
if (git_object_lookup(Setter(obj), repo, &oid, type)) {
auto err = git_error_last();
throw Error("getting Git object '%s': %s", oid, err->message);
}
Expand Down Expand Up @@ -909,6 +909,61 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK);
}

void createHardlink(const CanonPath & path, const CanonPath & target) override
{
std::vector<std::string> pathComponents;
for (auto & c : path)
pathComponents.emplace_back(c);

if (!prepareDirs(pathComponents, false)) return;

// We can't just look up the path from the start of the root, since
// some parent directories may not have finished yet, so we compute
// a relative path that helps us find the right git_tree_builder or object.
auto relTarget = CanonPath(path).parent()->makeRelative(target);

auto dir = pendingDirs.rbegin();

// For each ../ component at the start, go up one directory.
// CanonPath::makeRelative() always puts all .. elements at the start,
// so they're all handled by this loop:
std::string_view relTargetLeft(relTarget);
while (hasPrefix(relTargetLeft, "../")) {
if (dir == pendingDirs.rend())
throw Error("invalid hard link target '%s' for path '%s'", target, path);
++dir;
relTargetLeft = relTargetLeft.substr(3);
}
if (dir == pendingDirs.rend())
throw Error("invalid hard link target '%s' for path '%s'", target, path);

// Look up the remainder of the target, starting at the
// top-most `git_treebuilder`.
std::variant<git_treebuilder *, git_oid> curDir{dir->builder.get()};
Object tree; // needed to keep `entry` alive
const git_tree_entry * entry = nullptr;

for (auto & c : CanonPath(relTargetLeft)) {
if (auto builder = std::get_if<git_treebuilder *>(&curDir)) {
assert(*builder);
if (!(entry = git_treebuilder_get(*builder, std::string(c).c_str())))
throw Error("cannot find hard link target '%s' for path '%s'", target, path);
curDir = *git_tree_entry_id(entry);
} else if (auto oid = std::get_if<git_oid>(&curDir)) {
tree = lookupObject(*repo, *oid, GIT_OBJECT_TREE);
if (!(entry = git_tree_entry_byname((const git_tree *) &*tree, std::string(c).c_str())))
throw Error("cannot find hard link target '%s' for path '%s'", target, path);
curDir = *git_tree_entry_id(entry);
}
}

assert(entry);

addToTree(*pathComponents.rbegin(),
*git_tree_entry_id(entry),
git_tree_entry_filemode(entry));
}

Hash sync() override {
updateBuilders({});

Expand Down
2 changes: 1 addition & 1 deletion src/libfetchers/git-utils.hh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace nix {

namespace fetchers { struct PublicKey; }

struct GitFileSystemObjectSink : FileSystemObjectSink
struct GitFileSystemObjectSink : ExtendedFileSystemObjectSink
{
/**
* Flush builder and return a final Git hash.
Expand Down
13 changes: 13 additions & 0 deletions src/libutil/fs-sink.hh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,19 @@ struct FileSystemObjectSink
virtual void createSymlink(const CanonPath & path, const std::string & target) = 0;
};

/**
* An extension of `FileSystemObjectSink` that supports file types
* that are not supported by Nix's FSO model.
*/
struct ExtendedFileSystemObjectSink : virtual FileSystemObjectSink
{
/**
* Create a hard link. The target must be the path of a previously
* encountered file relative to the root of the FSO.
*/
virtual void createHardlink(const CanonPath & path, const CanonPath & target) = 0;
};

/**
* Recursively copy file system objects from the source into the sink.
*/
Expand Down
11 changes: 8 additions & 3 deletions src/libutil/tarfile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ void unpackTarfile(const Path & tarFile, const Path & destDir)
extract_archive(archive, destDir);
}

time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink)
time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink & parseSink)
{
time_t lastModified = 0;

Expand All @@ -195,7 +195,12 @@ time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSin

lastModified = std::max(lastModified, archive_entry_mtime(entry));

switch (archive_entry_filetype(entry)) {
if (auto target = archive_entry_hardlink(entry)) {
parseSink.createHardlink(cpath, CanonPath(target));
continue;
}

switch (auto type = archive_entry_filetype(entry)) {

case AE_IFDIR:
parseSink.createDirectory(cpath);
Expand Down Expand Up @@ -232,7 +237,7 @@ time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSin
}

default:
throw Error("file '%s' in tarball has unsupported file type", path);
throw Error("file '%s' in tarball has unsupported file type %d", path, type);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/libutil/tarfile.hh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,6 @@ void unpackTarfile(Source & source, const Path & destDir);

void unpackTarfile(const Path & tarFile, const Path & destDir);

time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink);
time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink & parseSink);

}
12 changes: 12 additions & 0 deletions tests/functional/tarball.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,15 @@ test_tarball() {
test_tarball '' cat
test_tarball .xz xz
test_tarball .gz gzip

# Test hard links.
# All entries in tree.tar.gz refer to the same file, and all have the same inode when unpacked by GNU tar.
# We don't preserve the hard links, because that's an optimization we think is not worth the complexity,
# so we only make sure that the contents are copied correctly.
path="$(nix flake prefetch --json "tarball+file://$(pwd)/tree.tar.gz" | jq -r .storePath)"
[[ $(cat "$path/a/b/foo") = bar ]]
[[ $(cat "$path/a/b/xyzzy") = bar ]]
[[ $(cat "$path/a/yyy") = bar ]]
[[ $(cat "$path/a/zzz") = bar ]]
[[ $(cat "$path/c/aap") = bar ]]
[[ $(cat "$path/fnord") = bar ]]
Binary file added tests/functional/tree.tar.gz
Binary file not shown.
112 changes: 112 additions & 0 deletions tests/unit/libfetchers/git-utils.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#include "git-utils.hh"
#include "file-system.hh"
#include "gmock/gmock.h"
#include <git2/global.h>
#include <git2/repository.h>
#include <git2/types.h>
#include <gtest/gtest.h>
#include "fs-sink.hh"
#include "serialise.hh"

namespace nix {

class GitUtilsTest : public ::testing::Test
{
// We use a single repository for all tests.
Path tmpDir;
std::unique_ptr<AutoDelete> delTmpDir;

public:
void SetUp() override
{
tmpDir = createTempDir();
delTmpDir = std::make_unique<AutoDelete>(tmpDir, true);

// Create the repo with libgit2
git_libgit2_init();
git_repository * repo = nullptr;
auto r = git_repository_init(&repo, tmpDir.c_str(), 0);
ASSERT_EQ(r, 0);
git_repository_free(repo);
}

void TearDown() override
{
// Destroy the AutoDelete, triggering removal
// not AutoDelete::reset(), which would cancel the deletion.
delTmpDir.reset();
}

ref<GitRepo> openRepo()
{
return GitRepo::openRepo(tmpDir, true, false);
}
};

void writeString(CreateRegularFileSink & fileSink, std::string contents, bool executable)
{
if (executable)
fileSink.isExecutable();
fileSink.preallocateContents(contents.size());
fileSink(contents);
}

TEST_F(GitUtilsTest, sink_basic)
{
auto repo = openRepo();
auto sink = repo->getFileSystemObjectSink();

// TODO/Question: It seems a little odd that we use the tarball-like convention of requiring a top-level directory
// here
// The sync method does not document this behavior, should probably renamed because it's not very
// general, and I can't imagine that "non-conventional" archives or any other source to be handled by
// this sink.

sink->createDirectory(CanonPath("foo-1.1"));

sink->createRegularFile(CanonPath("foo-1.1/hello"), [](CreateRegularFileSink & fileSink) {
writeString(fileSink, "hello world", false);
});
sink->createRegularFile(CanonPath("foo-1.1/bye"), [](CreateRegularFileSink & fileSink) {
writeString(fileSink, "thanks for all the fish", false);
});
sink->createSymlink(CanonPath("foo-1.1/bye-link"), "bye");
sink->createDirectory(CanonPath("foo-1.1/empty"));
sink->createDirectory(CanonPath("foo-1.1/links"));
sink->createHardlink(CanonPath("foo-1.1/links/foo"), CanonPath("foo-1.1/hello"));

// sink->createHardlink("foo-1.1/links/foo-2", CanonPath("foo-1.1/hello"));

auto result = sink->sync();
auto accessor = repo->getAccessor(result, false);
auto entries = accessor->readDirectory(CanonPath::root);
ASSERT_EQ(entries.size(), 5);
ASSERT_EQ(accessor->readFile(CanonPath("hello")), "hello world");
ASSERT_EQ(accessor->readFile(CanonPath("bye")), "thanks for all the fish");
ASSERT_EQ(accessor->readLink(CanonPath("bye-link")), "bye");
ASSERT_EQ(accessor->readDirectory(CanonPath("empty")).size(), 0);
ASSERT_EQ(accessor->readFile(CanonPath("links/foo")), "hello world");
};

TEST_F(GitUtilsTest, sink_hardlink)
{
auto repo = openRepo();
auto sink = repo->getFileSystemObjectSink();

sink->createDirectory(CanonPath("foo-1.1"));

sink->createRegularFile(CanonPath("foo-1.1/hello"), [](CreateRegularFileSink & fileSink) {
writeString(fileSink, "hello world", false);
});

try {
sink->createHardlink(CanonPath("foo-1.1/link"), CanonPath("hello"));
FAIL() << "Expected an exception";
} catch (const nix::Error & e) {
ASSERT_THAT(e.msg(), testing::HasSubstr("invalid hard link target"));
ASSERT_THAT(e.msg(), testing::HasSubstr("/hello"));
ASSERT_THAT(e.msg(), testing::HasSubstr("foo-1.1/link"));
}
};

} // namespace nix
2 changes: 1 addition & 1 deletion tests/unit/libfetchers/local.mk
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ libfetchers-tests_LIBS = \
libstore-test-support libutil-test-support \
libfetchers libstore libutil

libfetchers-tests_LDFLAGS := -lrapidcheck $(GTEST_LIBS)
libfetchers-tests_LDFLAGS := -lrapidcheck $(GTEST_LIBS) $(LIBGIT2_LIBS)

ifdef HOST_WINDOWS
# Increase the default reserved stack size to 65 MB so Nix doesn't run out of space
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include <iostream>
#include "tracing-file-system-object-sink.hh"

namespace nix::test {

void TracingFileSystemObjectSink::createDirectory(const CanonPath & path)
{
std::cerr << "createDirectory(" << path << ")\n";
sink.createDirectory(path);
}

void TracingFileSystemObjectSink::createRegularFile(
const CanonPath & path, std::function<void(CreateRegularFileSink &)> fn)
{
std::cerr << "createRegularFile(" << path << ")\n";
sink.createRegularFile(path, [&](CreateRegularFileSink & crf) {
// We could wrap this and trace about the chunks of data and such
fn(crf);
});
}

void TracingFileSystemObjectSink::createSymlink(const CanonPath & path, const std::string & target)
{
std::cerr << "createSymlink(" << path << ", target: " << target << ")\n";
sink.createSymlink(path, target);
}

void TracingExtendedFileSystemObjectSink::createHardlink(const CanonPath & path, const CanonPath & target)
{
std::cerr << "createHardlink(" << path << ", target: " << target << ")\n";
sink.createHardlink(path, target);
}

} // namespace nix::test
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#pragma once
#include "fs-sink.hh"

namespace nix::test {

/**
* A `FileSystemObjectSink` that traces calls, writing to stderr.
*/
class TracingFileSystemObjectSink : public virtual FileSystemObjectSink
{
FileSystemObjectSink & sink;
public:
TracingFileSystemObjectSink(FileSystemObjectSink & sink)
: sink(sink)
{
}

void createDirectory(const CanonPath & path) override;

void createRegularFile(const CanonPath & path, std::function<void(CreateRegularFileSink &)> fn) override;

void createSymlink(const CanonPath & path, const std::string & target) override;
};

/**
* A `ExtendedFileSystemObjectSink` that traces calls, writing to stderr.
*/
class TracingExtendedFileSystemObjectSink : public TracingFileSystemObjectSink, public ExtendedFileSystemObjectSink
{
ExtendedFileSystemObjectSink & sink;
public:
TracingExtendedFileSystemObjectSink(ExtendedFileSystemObjectSink & sink)
: TracingFileSystemObjectSink(sink)
, sink(sink)
{
}

void createHardlink(const CanonPath & path, const CanonPath & target) override;
};

}
Loading