Skip to content

Commit

Permalink
Display progress when pre-caching data
Browse files Browse the repository at this point in the history
  • Loading branch information
fdegros committed Dec 24, 2022
1 parent e651bc1 commit 35be1c6
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 27 deletions.
11 changes: 7 additions & 4 deletions lib/data_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,13 @@ DataNode DataNode::Make(zip_t* const zip,
return node;
}

void DataNode::CacheAll(zip_t* const zip, const FileNode& file_node) {
bool DataNode::CacheAll(zip_t* const zip,
const FileNode& file_node,
std::function<void(ssize_t)> progress) {
assert(!cached_reader);
if (size == 0) {
Log(LOG_DEBUG, "No need to cache ", file_node, ": Empty file");
return;
return false;
}

ZipFile file = Reader::Open(zip, id);
Expand All @@ -270,10 +272,11 @@ void DataNode::CacheAll(zip_t* const zip, const FileNode& file_node) {
const bool seekable = zip_file_is_seekable(file.get()) > 0;
if (seekable) {
Log(LOG_DEBUG, "No need to cache ", file_node, ": File is seekable");
return;
return false;
}

cached_reader = CacheFile(std::move(file), id, size);
cached_reader = CacheFile(std::move(file), id, size, std::move(progress));
return true;
}

Reader::Ptr DataNode::GetReader(zip_t* const zip,
Expand Down
4 changes: 3 additions & 1 deletion lib/data_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,9 @@ struct DataNode {
return st;
}

void CacheAll(zip_t* zip, const FileNode& file_node);
bool CacheAll(zip_t* zip,
const FileNode& file_node,
std::function<void(ssize_t)> progress = {});

Reader::Ptr GetReader(zip_t* zip, const FileNode& file_node) const;

Expand Down
4 changes: 3 additions & 1 deletion lib/file_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ struct FileNode {
children.push_front(*child);
}

void CacheAll() { data.CacheAll(zip, *this); }
bool CacheAll(std::function<void(ssize_t)> progress = {}) {
return data.CacheAll(zip, *this, std::move(progress));
}

// Gets a Reader to read file contents.
Reader::Ptr GetReader() const { return link->GetReader(zip, *this); }
Expand Down
14 changes: 10 additions & 4 deletions lib/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ class CacheFileReader : public UnbufferedReader {
const off_t expected_size)
: UnbufferedReader(Open(zip, file_id), file_id, expected_size) {}

void CacheAll() { EnsureCachedUpTo(expected_size_); }
void CacheAll(std::function<void(ssize_t)> progress) {
EnsureCachedUpTo(expected_size_, std::move(progress));
}

private:
// Creates a new, empty and hidden cache file.
Expand Down Expand Up @@ -179,7 +181,8 @@ class CacheFileReader : public UnbufferedReader {
}

// Ensures the decompressed data is cached at least up to the given offset.
void EnsureCachedUpTo(const off_t offset) {
void EnsureCachedUpTo(const off_t offset,
const std::function<void(ssize_t)> progress = {}) {
const off_t start_pos = pos_;
const off_t total_to_cache = offset - pos_;
const Timer timer;
Expand All @@ -200,6 +203,8 @@ class CacheFileReader : public UnbufferedReader {
}

WriteToCacheFile(buf, n, store_offset);
if (progress)
progress(n);
}

if (should_log_progress.Count())
Expand Down Expand Up @@ -238,12 +243,13 @@ class CacheFileReader : public UnbufferedReader {

Reader::Ptr CacheFile(ZipFile file,
const zip_int64_t file_id,
const off_t expected_size) {
const off_t expected_size,
std::function<void(ssize_t)> progress) {
CacheFileReader* const p =
new CacheFileReader(std::move(file), file_id, expected_size);
Reader::Ptr r(p);
Log(LOG_DEBUG, *p, ": Caching ", expected_size, " bytes...");
p->CacheAll();
p->CacheAll(std::move(progress));
return r;
}

Expand Down
6 changes: 5 additions & 1 deletion lib/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#define READER_H

#include <cassert>
#include <functional>
#include <memory>
#include <ostream>
#include <string_view>
Expand Down Expand Up @@ -219,6 +220,9 @@ class BufferedReader : public UnbufferedReader {

// Cache the whole file contents. Returns a Reader that will be able to serve
// the cached contents.
Reader::Ptr CacheFile(ZipFile file, zip_int64_t file_id, off_t expected_size);
Reader::Ptr CacheFile(ZipFile file,
zip_int64_t file_id,
off_t expected_size,
std::function<void(ssize_t)> progress = {});

#endif
59 changes: 43 additions & 16 deletions lib/tree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -330,19 +330,27 @@ void Tree::BuildTree() {
assert(ok);
root.release(); // Now owned by |files_by_path_|.

// Sum of all uncompressed file sizes.
uint64_t total_uncompressed_size = 0;
zip_stat_t sb;

// Concatenate all the names in a buffer in order to guess the encoding.
std::string allNames;
allNames.reserve(10000);

size_t maxNameLength = 0;

// search for absolute or parent-relative paths
for (zip_int64_t id = 0; id < n; ++id) {
const char* const p = zip_get_name(zip_, id, ZIP_FL_ENC_RAW);
if (!p)
if (zip_stat_index(zip_, id, ZIP_FL_ENC_RAW, &sb) < 0)
throw ZipError(StrCat("Cannot read entry #", id), zip_);

if ((sb.valid & ZIP_STAT_SIZE) != 0)
total_uncompressed_size += sb.size;

if ((sb.valid & ZIP_STAT_NAME) == 0 || !sb.name || !*sb.name)
continue;

const std::string_view name = p;
const std::string_view name = sb.name;
if (maxNameLength < name.size())
maxNameLength = name.size();

Expand All @@ -353,6 +361,9 @@ void Tree::BuildTree() {
need_prefix_ = name.starts_with('/') || name.starts_with("../");
}

Log(LOG_DEBUG, "Total uncompressed size = ", total_uncompressed_size,
" bytes");

// Detect filename encoding.
std::string encoding;
if (opts_.encoding)
Expand Down Expand Up @@ -382,8 +393,6 @@ void Tree::BuildTree() {
}
}

zip_stat_t sb;

struct Hardlink {
zip_int64_t id;
mode_t mode;
Expand All @@ -392,28 +401,37 @@ void Tree::BuildTree() {
std::vector<Hardlink> hardlinks;
std::string path;
Beat should_display_progress;
uint64_t total_extracted_size = 0;
const auto progress = [&should_display_progress, &total_uncompressed_size,
&total_extracted_size](const ssize_t chunk_size) {
assert(chunk_size >= 0);
total_extracted_size += chunk_size;
if (!should_display_progress)
return;
Log(LOG_INFO, "Loading ",
total_extracted_size < total_uncompressed_size
? 100 * total_extracted_size / total_uncompressed_size
: 100,
"%");
};

// Add zip entries for all items except hardlinks
for (zip_int64_t id = 0; id < n; ++id) {
if (should_display_progress)
Log(LOG_INFO, "Loading ", 100 * id / n, "%");

if (zip_stat_index(zip_, id, zipFlags, &sb) < 0)
throw ZipError(StrCat("Cannot read entry #", id), zip_);

if ((sb.valid & ZIP_STAT_NAME) == 0 || !sb.name || !*sb.name) {
Log(LOG_ERR, "Skipped entry [", id, "]: No name");
continue;
}

const Path original_path = sb.name;
const Path original_path =
(sb.valid & ZIP_STAT_NAME) != 0 && sb.name && *sb.name ? sb.name : "-";
const uint64_t size = (sb.valid & ZIP_STAT_SIZE) != 0 ? sb.size : 0;
const auto [mode, is_hardlink] = GetEntryAttributes(id, original_path);
const FileType type = GetFileType(mode);

const Path original_path_utf8 = toUtf8(original_path);
if (!Path::Normalize(&path, original_path_utf8, need_prefix_)) {
Log(LOG_ERR, "Skipped ", type, " [", id, "]: Cannot normalize path ",
original_path_utf8);
assert(total_uncompressed_size >= size);
total_uncompressed_size -= size;
continue;
}

Expand All @@ -427,13 +445,17 @@ void Tree::BuildTree() {
node->original_path = Path(original_path).WithoutTrailingSeparator();
files_by_original_path_.insert(*node);
total_block_count_ += 1;
assert(total_uncompressed_size >= size);
total_uncompressed_size -= size;
continue;
}

if (type != FileType::File &&
(type == FileType::Symlink ? !opts_.include_symlinks
: !opts_.include_special_files)) {
Log(LOG_INFO, "Skipped ", type, " [", id, "] ", Path(path));
assert(total_uncompressed_size >= size);
total_uncompressed_size -= size;
continue;
}

Expand All @@ -443,6 +465,8 @@ void Tree::BuildTree() {
} else {
Log(LOG_INFO, "Skipped ", type, " [", id, "] ", Path(path));
}
assert(total_uncompressed_size >= size);
total_uncompressed_size -= size;
continue;
}

Expand Down Expand Up @@ -483,7 +507,10 @@ void Tree::BuildTree() {
// Cache file data if necessary.
if (opts_.pre_cache) {
try {
node->CacheAll();
if (!node->CacheAll(progress)) {
assert(total_uncompressed_size >= size);
total_uncompressed_size -= size;
}
} catch (const ZipError& error) {
Log(LOG_ERR, "Cannot cache ", *node, ": ", error.what());
if (opts_.check_password) {
Expand Down

0 comments on commit 35be1c6

Please sign in to comment.