Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvement: Support ZST in mamba and enable ZST by default #2404

Merged
merged 26 commits into from
Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion libmamba/include/mamba/core/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,6 @@ namespace mamba

bool use_only_tar_bz2 = false;

bool repodata_use_zst = false;
std::vector<std::string> repodata_has_zst = { "https://conda.anaconda.org/conda-forge" };

// usernames on anaconda.org can have a underscore, which influences the
Expand Down
5 changes: 0 additions & 5 deletions libmamba/src/api/configuration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1176,11 +1176,6 @@ namespace mamba
.set_env_var_names()
.description("Permit use of the --overide-channels command-line flag"));

insert(Configurable("repodata_use_zst", &ctx.repodata_use_zst)
.group("Repodata")
.set_rc_configurable()
.description("Use zstd encoded repodata when fetching"));

insert(Configurable("repodata_has_zst", &ctx.repodata_has_zst)
.group("Repodata")
.set_rc_configurable()
Expand Down
147 changes: 45 additions & 102 deletions libmamba/src/core/subdirdata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,33 +598,30 @@ namespace mamba
auto& ctx = Context::instance();
if (!ctx.offline || forbid_cache())
{
if (ctx.repodata_use_zst)
bool has_value = m_metadata.has_zst.has_value();
bool is_expired = m_metadata.has_zst.has_value()
&& m_metadata.has_zst.value().has_expired();
bool has_zst = m_metadata.check_zst(p_channel);
if (!has_zst && (is_expired || !has_value))
{
bool has_value = m_metadata.has_zst.has_value();
bool is_expired = m_metadata.has_zst.has_value()
&& m_metadata.has_zst.value().has_expired();
bool has_zst = m_metadata.check_zst(p_channel);
if (!has_zst && (is_expired || !has_value))
m_check_targets.push_back(std::make_unique<DownloadTarget>(
m_name + " (check zst)",
m_repodata_url + ".zst",
""
));
m_check_targets.back()->set_head_only(true);
m_check_targets.back()->set_finalize_callback(&MSubdirData::finalize_check, this);
m_check_targets.back()->set_ignore_failure(true);
if (!(ctx.graphics_params.no_progress_bars || ctx.output_params.quiet
|| ctx.output_params.json))
{
m_check_targets.push_back(std::make_unique<DownloadTarget>(
m_name + " (check zst)",
m_repodata_url + ".zst",
""
));
m_check_targets.back()->set_head_only(true);
m_check_targets.back()->set_finalize_callback(&MSubdirData::finalize_check, this);
m_check_targets.back()->set_ignore_failure(true);
if (!(ctx.graphics_params.no_progress_bars || ctx.output_params.quiet
|| ctx.output_params.json))
{
m_progress_bar_check = Console::instance().add_progress_bar(
m_name + " (check zst)"
);
m_check_targets.back()->set_progress_bar(m_progress_bar_check);
m_progress_bar_check.repr().postfix.set_value("Checking");
}
return true;
m_progress_bar_check = Console::instance().add_progress_bar(
m_name + " (check zst)"
);
m_check_targets.back()->set_progress_bar(m_progress_bar_check);
m_progress_bar_check.repr().postfix.set_value("Checking");
}
return true;
}
create_target();
}
Expand Down Expand Up @@ -676,15 +673,12 @@ namespace mamba
m_solv_cache_valid = true;
}

if (Context::instance().repodata_use_zst)
{
auto state_file = json_file;
state_file.replace_extension(".state.json");
auto lock = LockFile(state_file);
m_metadata.store_file_metadata(json_file);
auto outf = open_ofstream(state_file);
m_metadata.serialize_to_stream(outf);
}
auto state_file = json_file;
state_file.replace_extension(".state.json");
auto lock = LockFile(state_file);
m_metadata.store_file_metadata(json_file);
auto outf = open_ofstream(state_file);
m_metadata.serialize_to_stream(outf);
}

bool MSubdirData::finalize_transfer(const DownloadTarget&)
Expand Down Expand Up @@ -817,79 +811,28 @@ namespace mamba
m_metadata.cache_control = m_target->get_cache_control();
m_metadata.stored_file_size = file_size;

if (!Context::instance().repodata_use_zst)
fs::u8path state_file = json_file;
state_file.replace_extension(".state.json");
std::error_code ec;
mamba_fs::rename_or_move(m_temp_file->path(), json_file, ec);
if (ec)
{
LOG_DEBUG << "Opening '" << json_file.string() << "'";
path::touch(json_file, true);
std::ofstream final_file = open_ofstream(json_file);

if (!final_file.is_open())
{
throw mamba_error(
fmt::format("Could not open file '{}'", json_file.string()),
mamba_error_code::subdirdata_not_loaded
);
}

if (m_progress_bar)
{
m_progress_bar.set_postfix("Finalizing");
}

std::ifstream temp_file = open_ifstream(m_temp_file->path());
std::stringstream temp_json;
m_metadata.serialize_to_stream_tiny(temp_json);

// replace `}` with `,`
temp_json.seekp(-1, temp_json.cur);
temp_json << ',';
final_file << temp_json.str();
temp_file.seekg(1);
std::copy(
std::istreambuf_iterator<char>(temp_file),
std::istreambuf_iterator<char>(),
std::ostreambuf_iterator<char>(final_file)
throw mamba_error(
fmt::format(
"Could not move repodata file from {} to {}: {}",
m_temp_file->path(),
json_file,
strerror(errno)
),
mamba_error_code::subdirdata_not_loaded
);

if (!temp_file)
{
std::error_code ec;
fs::remove(json_file, ec);
if (ec)
{
LOG_ERROR << "Could not remove file " << json_file << ": " << ec.message();
}
throw mamba_error(
fmt::format("Could not write out repodata file {}: {}", json_file, strerror(errno)),
mamba_error_code::subdirdata_not_loaded
);
}
fs::last_write_time(json_file, fs::now());
}
else
{
fs::u8path state_file = json_file;
state_file.replace_extension(".state.json");
std::error_code ec;
mamba_fs::rename_or_move(m_temp_file->path(), json_file, ec);
if (ec)
{
throw mamba_error(
fmt::format(
"Could not move repodata file from {} to {}: {}",
m_temp_file->path(),
json_file,
strerror(errno)
),
mamba_error_code::subdirdata_not_loaded
);
}
fs::last_write_time(json_file, fs::now());
fs::last_write_time(json_file, fs::now());

m_metadata.store_file_metadata(json_file);
std::ofstream state_file_stream = open_ofstream(state_file);
m_metadata.serialize_to_stream(state_file_stream);

m_metadata.store_file_metadata(json_file);
std::ofstream state_file_stream = open_ofstream(state_file);
m_metadata.serialize_to_stream(state_file_stream);
}
if (m_progress_bar)
{
m_progress_bar.repr().postfix.set_value("Downloaded").deactivate();
Expand Down
4 changes: 0 additions & 4 deletions libmamba/tests/src/core/test_cpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -575,8 +575,6 @@ namespace mamba
{
TEST_CASE("parse_mod_etag")
{
bool old_value = Context::instance().repodata_use_zst;
Context::instance().repodata_use_zst = true;
fs::u8path cache_folder = fs::u8path{ test_data_dir / "repodata_json_cache" };
auto mq = detail::read_metadata(cache_folder / "test_1.json");
CHECK(mq.has_value());
Expand Down Expand Up @@ -653,8 +651,6 @@ namespace mamba
CHECK_EQ(j.url, "https://conda.anaconda.org/conda-forge/noarch/repodata.json.zst");
CHECK_EQ(j.has_zst.value().value, true);
CHECK_EQ(j.has_zst.value().last_checked, parse_utc_timestamp("2023-01-06T16:33:06Z"));

Context::instance().repodata_use_zst = old_value;
}
}
} // namespace mamba
2 changes: 2 additions & 0 deletions libmambapy/libmambapy/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1434,6 +1434,8 @@ class SubdirData:
) -> None: ...
def cache_path(self) -> str: ...
def create_repo(self, arg0: Pool) -> Repo: ...
def download_and_check_targets(self, arg0: DownloadTargetList) -> bool: ...
def finalize_checks(self) -> None: ...
def loaded(self) -> bool: ...
pass

Expand Down
19 changes: 16 additions & 3 deletions libmambapy/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ PYBIND11_MODULE(bindings, m)
auto pyPackageInfo = py::class_<PackageInfo>(m, "PackageInfo");
auto pyPrefixData = py::class_<PrefixData>(m, "PrefixData");
auto pySolver = py::class_<MSolver>(m, "Solver");
auto pyMultiDownloadTarget = py::class_<MultiDownloadTarget>(m, "DownloadTargetList");
// only used in a return type; does it belong in the module?
auto pyRootRole = py::class_<validation::RootRole>(m, "RootRole");

Expand Down Expand Up @@ -439,13 +440,25 @@ PYBIND11_MODULE(bindings, m)
.def(
"cache_path",
[](const MSubdirData& self) -> std::string { return extract(self.cache_path()); }
);
)
.def(
"download_and_check_targets",
[](MSubdirData& self, MultiDownloadTarget& multi_download) -> bool
{
for (auto& check_target : self.check_targets())
{
multi_download.add(check_target.get());
}
multi_download.download(MAMBA_NO_CLEAR_PROGRESS_BARS);
return self.check_targets().size();
}
)
.def("finalize_checks", &MSubdirData::finalize_checks);

m.def("cache_fn_url", &cache_fn_url);
m.def("create_cache_dir", &create_cache_dir);

py::class_<MultiDownloadTarget>(m, "DownloadTargetList")
.def(py::init<>())
pyMultiDownloadTarget.def(py::init<>())
.def(
"add",
[](MultiDownloadTarget& self, MSubdirData& sub) -> void { self.add(sub.target()); }
Expand Down
17 changes: 15 additions & 2 deletions mamba/mamba/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,23 @@ def fixup_channel_spec(spec):
channel, channel_platform, full_url, pkgs_dirs, repodata_fn
)

needs_finalising = sd.download_and_check_targets(dlist)
index.append(
(sd, {"platform": channel_platform, "url": url, "channel": channel})
(
sd,
{
"platform": channel_platform,
"url": url,
"channel": channel,
"needs_finalising": needs_finalising,
},
)
)
dlist.add(sd)

for (sd, info) in index:
if info["needs_finalising"]:
sd.finalize_checks()
Comment on lines +119 to +120
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not too familiar with this part of the code base, can you explain what is going on here? Are there any alternatives to having this logic here in Python?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is effectively just me mimicking the code that's in channel_loader which atm is duplicated between mamba and micromamba.

Let me know if you have an idea in mind that I could test out to make this nicer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More concretely though, what's happening in the zst-enabled codepaths is that we do a HEAD request first to check if the zst variant exists, and if it does, we add another target that's the actual GET request for the zst variant.

dlist.add(sd)

is_downloaded = dlist.download(api.MAMBA_DOWNLOAD_FAILFAST)

Expand Down