Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the ParseSink interface #9653

Merged
merged 2 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/libstore/daemon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
eagerly consume the entire stream it's given, past the
length of the Nar. */
TeeSource savedNARSource(from, saved);
NullParseSink sink; /* just parse the NAR */
NullFileSystemObjectSink sink; /* just parse the NAR */
parseDump(sink, savedNARSource);
} else {
/* Incrementally parse the NAR file, stripping the
Expand Down Expand Up @@ -913,7 +913,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
source = std::make_unique<TunnelSource>(from, to);
else {
TeeSource tee { from, saved };
NullParseSink ether;
NullFileSystemObjectSink ether;
parseDump(ether, tee);
source = std::make_unique<StringSource>(saved.s);
}
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/export-import.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ StorePaths Store::importPaths(Source & source, CheckSigsFlag checkSigs)
/* Extract the NAR from the source. */
StringSink saved;
TeeSource tee { source, saved };
NullParseSink ether;
NullFileSystemObjectSink ether;
parseDump(ether, tee);

uint32_t magic = readInt(source);
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/local-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1048,7 +1048,7 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
bool narRead = false;
Finally cleanup = [&]() {
if (!narRead) {
NullParseSink sink;
NullFileSystemObjectSink sink;
try {
parseDump(sink, source);
} catch (...) {
Expand Down
62 changes: 39 additions & 23 deletions src/libstore/nar-accessor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,35 @@ struct NarMember
std::map<std::string, NarMember> children;
};

struct NarMemberConstructor : CreateRegularFileSink
{
private:

NarMember & narMember;

uint64_t & pos;

public:

NarMemberConstructor(NarMember & nm, uint64_t & pos)
: narMember(nm), pos(pos)
{ }

void isExecutable() override
{
narMember.stat.isExecutable = true;
}

void preallocateContents(uint64_t size) override
{
narMember.stat.fileSize = size;
narMember.stat.narOffset = pos;
}

void operator () (std::string_view data) override
{ }
};

struct NarAccessor : public SourceAccessor
{
std::optional<const std::string> nar;
Expand All @@ -27,7 +56,7 @@ struct NarAccessor : public SourceAccessor

NarMember root;

struct NarIndexer : ParseSink, Source
struct NarIndexer : FileSystemObjectSink, Source
{
NarAccessor & acc;
Source & source;
Expand All @@ -42,19 +71,22 @@ struct NarAccessor : public SourceAccessor
: acc(acc), source(source)
{ }

void createMember(const Path & path, NarMember member)
NarMember & createMember(const Path & path, NarMember member)
{
size_t level = std::count(path.begin(), path.end(), '/');
while (parents.size() > level) parents.pop();

if (parents.empty()) {
acc.root = std::move(member);
parents.push(&acc.root);
return acc.root;
} else {
if (parents.top()->stat.type != Type::tDirectory)
throw Error("NAR file missing parent directory of path '%s'", path);
auto result = parents.top()->children.emplace(baseNameOf(path), std::move(member));
parents.push(&result.first->second);
auto & ref = result.first->second;
parents.push(&ref);
return ref;
}
}

Expand All @@ -68,34 +100,18 @@ struct NarAccessor : public SourceAccessor
} });
}

void createRegularFile(const Path & path) override
void createRegularFile(const Path & path, std::function<void(CreateRegularFileSink &)> func) override
{
createMember(path, NarMember{ .stat = {
auto & nm = createMember(path, NarMember{ .stat = {
.type = Type::tRegular,
.fileSize = 0,
.isExecutable = false,
.narOffset = 0
} });
NarMemberConstructor nmc { nm, pos };
func(nmc);
}

void closeRegularFile() override
{ }

void isExecutable() override
{
parents.top()->stat.isExecutable = true;
}

void preallocateContents(uint64_t size) override
{
auto & st = parents.top()->stat;
st.fileSize = size;
st.narOffset = pos;
}

void receiveContents(std::string_view data) override
{ }

void createSymlink(const Path & path, const std::string & target) override
{
createMember(path,
Expand Down
8 changes: 4 additions & 4 deletions src/libstore/store-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -424,12 +424,12 @@ ValidPathInfo Store::addToStoreSlow(
information to narSink. */
TeeSource tapped { *fileSource, narSink };

NullParseSink blank;
NullFileSystemObjectSink blank;
auto & parseSink = method.getFileIngestionMethod() == FileIngestionMethod::Flat
? (ParseSink &) fileSink
? (FileSystemObjectSink &) fileSink
: method.getFileIngestionMethod() == FileIngestionMethod::Recursive
? (ParseSink &) blank
: (abort(), (ParseSink &)*(ParseSink *)nullptr); // handled both cases
? (FileSystemObjectSink &) blank
: (abort(), (FileSystemObjectSink &)*(FileSystemObjectSink *)nullptr); // handled both cases

/* The information that flows from tapped (besides being replicated in
narSink), is now put in parseSink. */
Expand Down
148 changes: 80 additions & 68 deletions src/libutil/archive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ static SerialisationError badArchive(const std::string & s)
}


static void parseContents(ParseSink & sink, Source & source, const Path & path)
static void parseContents(CreateRegularFileSink & sink, Source & source)
{
uint64_t size = readLongLong(source);

Expand All @@ -147,7 +147,7 @@ static void parseContents(ParseSink & sink, Source & source, const Path & path)
auto n = buf.size();
if ((uint64_t)n > left) n = left;
source(buf.data(), n);
sink.receiveContents({buf.data(), n});
sink({buf.data(), n});
left -= n;
}

Expand All @@ -164,100 +164,112 @@ struct CaseInsensitiveCompare
};


static void parse(ParseSink & sink, Source & source, const Path & path)
static void parse(FileSystemObjectSink & sink, Source & source, const Path & path)
{
std::string s;

s = readString(source);
if (s != "(") throw badArchive("expected open tag");

enum { tpUnknown, tpRegular, tpDirectory, tpSymlink } type = tpUnknown;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change I did to the parser is basically to replace this extra state variable with control flow. This is needed so that the "content" and "executable" parsing happens within the callback. I then changed the others to keep the code design consistent (and get rid of this enum entirely, as opposed to confusingly just getting rid of tpRegular).


std::map<Path, int, CaseInsensitiveCompare> names;

while (1) {
auto getString = [&]() {
checkInterrupt();
return readString(source);
};

s = readString(source);
// For first iteration
s = getString();

while (1) {

if (s == ")") {
break;
}

else if (s == "type") {
if (type != tpUnknown)
throw badArchive("multiple type fields");
std::string t = readString(source);
std::string t = getString();

if (t == "regular") {
type = tpRegular;
sink.createRegularFile(path);
sink.createRegularFile(path, [&](auto & crf) {
while (1) {
s = getString();

if (s == "contents") {
parseContents(crf, source);
}

else if (s == "executable") {
auto s2 = getString();
if (s2 != "") throw badArchive("executable marker has non-empty value");
crf.isExecutable();
}

else break;
}
});
}

else if (t == "directory") {
sink.createDirectory(path);
type = tpDirectory;
}

else if (t == "symlink") {
type = tpSymlink;
}
while (1) {
s = getString();

if (s == "entry") {
std::string name, prevName;

s = getString();
if (s != "(") throw badArchive("expected open tag");

while (1) {
s = getString();

if (s == ")") {
break;
} else if (s == "name") {
name = getString();
if (name.empty() || name == "." || name == ".." || name.find('/') != std::string::npos || name.find((char) 0) != std::string::npos)
throw Error("NAR contains invalid file name '%1%'", name);
if (name <= prevName)
throw Error("NAR directory is not sorted");
prevName = name;
if (archiveSettings.useCaseHack) {
auto i = names.find(name);
if (i != names.end()) {
debug("case collision between '%1%' and '%2%'", i->first, name);
name += caseHackSuffix;
name += std::to_string(++i->second);
} else
names[name] = 0;
}
Comment on lines +237 to +245
Copy link
Member

@roberth roberth Jan 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thought: This could be factored out into a sink to sink transformation that performs the renaming.

} else if (s == "node") {
if (name.empty()) throw badArchive("entry name missing");
parse(sink, source, path + "/" + name);
} else
throw badArchive("unknown field " + s);
}
}

else throw badArchive("unknown file type " + t);
else break;
}
}

}
else if (t == "symlink") {
s = getString();

else if (s == "contents" && type == tpRegular) {
parseContents(sink, source, path);
sink.closeRegularFile();
}
if (s != "target")
throw badArchive("expected 'target' got " + s);

else if (s == "executable" && type == tpRegular) {
auto s = readString(source);
if (s != "") throw badArchive("executable marker has non-empty value");
sink.isExecutable();
}
std::string target = getString();
sink.createSymlink(path, target);

else if (s == "entry" && type == tpDirectory) {
std::string name, prevName;

s = readString(source);
if (s != "(") throw badArchive("expected open tag");

while (1) {
checkInterrupt();

s = readString(source);

if (s == ")") {
break;
} else if (s == "name") {
name = readString(source);
if (name.empty() || name == "." || name == ".." || name.find('/') != std::string::npos || name.find((char) 0) != std::string::npos)
throw Error("NAR contains invalid file name '%1%'", name);
if (name <= prevName)
throw Error("NAR directory is not sorted");
prevName = name;
if (archiveSettings.useCaseHack) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(pre-existing)
This is only necessary when writing to a store that needs it.
It must not be done in other situations, e.g. when writing NAR contents to Git trees.

auto i = names.find(name);
if (i != names.end()) {
debug("case collision between '%1%' and '%2%'", i->first, name);
name += caseHackSuffix;
name += std::to_string(++i->second);
} else
names[name] = 0;
}
} else if (s == "node") {
if (name.empty()) throw badArchive("entry name missing");
parse(sink, source, path + "/" + name);
} else
throw badArchive("unknown field " + s);
// for the next iteration
s = getString();
}
}

else if (s == "target" && type == tpSymlink) {
std::string target = readString(source);
sink.createSymlink(path, target);
else throw badArchive("unknown file type " + t);

}

else
Expand All @@ -266,7 +278,7 @@ static void parse(ParseSink & sink, Source & source, const Path & path)
}


void parseDump(ParseSink & sink, Source & source)
void parseDump(FileSystemObjectSink & sink, Source & source)
{
std::string version;
try {
Expand Down Expand Up @@ -294,7 +306,7 @@ void copyNAR(Source & source, Sink & sink)
// FIXME: if 'source' is the output of dumpPath() followed by EOF,
// we should just forward all data directly without parsing.

NullParseSink parseSink; /* just parse the NAR */
NullFileSystemObjectSink parseSink; /* just parse the NAR */

TeeSource wrapper { source, sink };

Expand Down
2 changes: 1 addition & 1 deletion src/libutil/archive.hh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ time_t dumpPathAndGetMtime(const Path & path, Sink & sink,
*/
void dumpString(std::string_view s, Sink & sink);

void parseDump(ParseSink & sink, Source & source);
void parseDump(FileSystemObjectSink & sink, Source & source);

void restorePath(const Path & path, Source & source);

Expand Down
2 changes: 1 addition & 1 deletion src/libutil/file-content-address.hh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void dumpPath(
/**
* Restore a serialization of the given file system object.
*
* @TODO use an arbitrary `ParseSink`.
* @TODO use an arbitrary `FileSystemObjectSink`.
*/
void restorePath(
const Path & path,
Expand Down
Loading
Loading