Skip to content

Commit

Permalink
Merge pull request #9485 from edolstra/tarball-cache
Browse files Browse the repository at this point in the history
Add a Git-based content-addressed tarball cache
  • Loading branch information
edolstra authored Feb 15, 2024
2 parents 27b8d5d + 78b8db7 commit 06be819
Show file tree
Hide file tree
Showing 8 changed files with 345 additions and 44 deletions.
5 changes: 5 additions & 0 deletions src/libfetchers/attrs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,9 @@ std::map<std::string, std::string> attrsToQuery(const Attrs & attrs)
return query;
}

Hash getRevAttr(const Attrs & attrs, const std::string & name)
{
return Hash::parseAny(getStrAttr(attrs, name), HashAlgorithm::SHA1);
}

}
2 changes: 2 additions & 0 deletions src/libfetchers/attrs.hh
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,6 @@ bool getBoolAttr(const Attrs & attrs, const std::string & name);

std::map<std::string, std::string> attrsToQuery(const Attrs & attrs);

Hash getRevAttr(const Attrs & attrs, const std::string & name);

}
166 changes: 164 additions & 2 deletions src/libfetchers/git-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
#include "finally.hh"
#include "processes.hh"
#include "signals.hh"

#include <boost/core/span.hpp>
#include "users.hh"
#include "fs-sink.hh"

#include <git2/attr.h>
#include <git2/blob.h>
Expand All @@ -28,6 +28,7 @@
#include <unordered_set>
#include <queue>
#include <regex>
#include <span>

namespace std {

Expand Down Expand Up @@ -356,6 +357,8 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>

ref<InputAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError e) override;

ref<GitFileSystemObjectSink> getFileSystemObjectSink() override;

static int sidebandProgressCallback(const char * str, int len, void * payload)
{
auto act = (Activity *) payload;
Expand Down Expand Up @@ -770,6 +773,154 @@ struct GitExportIgnoreInputAccessor : CachingFilteringInputAccessor {

};

struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
{
ref<GitRepoImpl> repo;

struct PendingDir
{
std::string name;
TreeBuilder builder;
};

std::vector<PendingDir> pendingDirs;

size_t componentsToStrip = 1;

void pushBuilder(std::string name)
{
git_treebuilder * b;
if (git_treebuilder_new(&b, *repo, nullptr))
throw Error("creating a tree builder: %s", git_error_last()->message);
pendingDirs.push_back({ .name = std::move(name), .builder = TreeBuilder(b) });
};

GitFileSystemObjectSinkImpl(ref<GitRepoImpl> repo) : repo(repo)
{
pushBuilder("");
}

std::pair<git_oid, std::string> popBuilder()
{
assert(!pendingDirs.empty());
auto pending = std::move(pendingDirs.back());
git_oid oid;
if (git_treebuilder_write(&oid, pending.builder.get()))
throw Error("creating a tree object: %s", git_error_last()->message);
pendingDirs.pop_back();
return {oid, pending.name};
};

void addToTree(const std::string & name, const git_oid & oid, git_filemode_t mode)
{
assert(!pendingDirs.empty());
auto & pending = pendingDirs.back();
if (git_treebuilder_insert(nullptr, pending.builder.get(), name.c_str(), &oid, mode))
throw Error("adding a file to a tree builder: %s", git_error_last()->message);
};

void updateBuilders(std::span<const std::string> names)
{
// Find the common prefix of pendingDirs and names.
size_t prefixLen = 0;
for (; prefixLen < names.size() && prefixLen + 1 < pendingDirs.size(); ++prefixLen)
if (names[prefixLen] != pendingDirs[prefixLen + 1].name)
break;

// Finish the builders that are not part of the common prefix.
for (auto n = pendingDirs.size(); n > prefixLen + 1; --n) {
auto [oid, name] = popBuilder();
addToTree(name, oid, GIT_FILEMODE_TREE);
}

// Create builders for the new directories.
for (auto n = prefixLen; n < names.size(); ++n)
pushBuilder(names[n]);
};

bool prepareDirs(const std::vector<std::string> & pathComponents, bool isDir)
{
std::span<const std::string> pathComponents2{pathComponents};

if (pathComponents2.size() <= componentsToStrip) return false;
pathComponents2 = pathComponents2.subspan(componentsToStrip);

updateBuilders(
isDir
? pathComponents2
: pathComponents2.first(pathComponents2.size() - 1));

return true;
}

void createRegularFile(
const Path & path,
std::function<void(CreateRegularFileSink &)> func) override
{
auto pathComponents = tokenizeString<std::vector<std::string>>(path, "/");
if (!prepareDirs(pathComponents, false)) return;

git_writestream * stream = nullptr;
if (git_blob_create_from_stream(&stream, *repo, nullptr))
throw Error("creating a blob stream object: %s", git_error_last()->message);

struct CRF : CreateRegularFileSink {
const Path & path;
GitFileSystemObjectSinkImpl & back;
git_writestream * stream;
bool executable = false;
CRF(const Path & path, GitFileSystemObjectSinkImpl & back, git_writestream * stream)
: path(path), back(back), stream(stream)
{}
void operator () (std::string_view data) override
{
if (stream->write(stream, data.data(), data.size()))
throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message);
}
void isExecutable() override
{
executable = true;
}
} crf { path, *this, stream };
func(crf);

git_oid oid;
if (git_blob_create_from_stream_commit(&oid, stream))
throw Error("creating a blob object for tarball member '%s': %s", path, git_error_last()->message);

addToTree(*pathComponents.rbegin(), oid,
crf.executable
? GIT_FILEMODE_BLOB_EXECUTABLE
: GIT_FILEMODE_BLOB);
}

void createDirectory(const Path & path) override
{
auto pathComponents = tokenizeString<std::vector<std::string>>(path, "/");
(void) prepareDirs(pathComponents, true);
}

void createSymlink(const Path & path, const std::string & target) override
{
auto pathComponents = tokenizeString<std::vector<std::string>>(path, "/");
if (!prepareDirs(pathComponents, false)) return;

git_oid oid;
if (git_blob_create_from_buffer(&oid, *repo, target.c_str(), target.size()))
throw Error("creating a blob object for tarball symlink member '%s': %s", path, git_error_last()->message);

addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK);
}

Hash sync() override {
updateBuilders({});

auto [oid, _name] = popBuilder();

return toHash(oid);
}
};

ref<GitInputAccessor> GitRepoImpl::getRawAccessor(const Hash & rev)
{
auto self = ref<GitRepoImpl>(shared_from_this());
Expand Down Expand Up @@ -804,6 +955,11 @@ ref<InputAccessor> GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool exportI
}
}

ref<GitFileSystemObjectSink> GitRepoImpl::getFileSystemObjectSink()
{
return make_ref<GitFileSystemObjectSinkImpl>(ref<GitRepoImpl>(shared_from_this()));
}

std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules(const Hash & rev, bool exportIgnore)
{
/* Read the .gitmodules files from this revision. */
Expand All @@ -830,5 +986,11 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules
return result;
}

ref<GitRepo> getTarballCache()
{
static auto repoDir = std::filesystem::path(getCacheDir()) / "nix" / "tarball-cache";

return GitRepo::openRepo(repoDir, true, true);
}

}
19 changes: 13 additions & 6 deletions src/libfetchers/git-utils.hh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,20 @@

#include "filtering-input-accessor.hh"
#include "input-accessor.hh"
#include "fs-sink.hh"

namespace nix {

namespace fetchers { struct PublicKey; }

struct GitFileSystemObjectSink : FileSystemObjectSink
{
/**
* Flush builder and return a final Git hash.
*/
virtual Hash sync() = 0;
};

struct GitRepo
{
virtual ~GitRepo()
Expand Down Expand Up @@ -64,18 +73,14 @@ struct GitRepo
const std::string & url,
const std::string & base) = 0;

struct TarballInfo
{
Hash treeHash;
time_t lastModified;
};

virtual bool hasObject(const Hash & oid) = 0;

virtual ref<InputAccessor> getAccessor(const Hash & rev, bool exportIgnore) = 0;

virtual ref<InputAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) = 0;

virtual ref<GitFileSystemObjectSink> getFileSystemObjectSink() = 0;

virtual void fetch(
const std::string & url,
const std::string & refspec,
Expand All @@ -90,4 +95,6 @@ struct GitRepo
const std::vector<fetchers::PublicKey> & publicKeys) = 0;
};

ref<GitRepo> getTarballCache();

}
Loading

0 comments on commit 06be819

Please sign in to comment.