From 69edb6090fea99ea4a207e1829f1afbc2f8d6172 Mon Sep 17 00:00:00 2001
From: Tessa Pierce Ward <bluegenes@users.noreply.github.com>
Date: Tue, 12 Mar 2024 14:05:13 -0700
Subject: [PATCH 01/19] MRG: enable loading lineages from annotated gather with
 match_name instead of name (#3078)

This PR enables loading from gather lineages files that contain
'match_name' instead of name. Rationale:
- we generally plan to replace 'name' with 'match_name' in gather output
(https://github.com/sourmash-bio/sourmash/issues/1555)
- branchwater plugin's fastmultigather already uses 'match_name'

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 src/sourmash/tax/tax_utils.py                 | 15 ++++++++---
 .../tax/test1.gather.with-lineages.csv        |  5 ++++
 tests/test_tax_utils.py                       | 25 ++++++++++++++++++-
 3 files changed, 40 insertions(+), 5 deletions(-)
 create mode 100644 tests/test-data/tax/test1.gather.with-lineages.csv

diff --git a/src/sourmash/tax/tax_utils.py b/src/sourmash/tax/tax_utils.py
index 55feed66d2..1615c90d74 100644
--- a/src/sourmash/tax/tax_utils.py
+++ b/src/sourmash/tax/tax_utils.py
@@ -1274,7 +1274,9 @@ def load(
                 elif "accession" in header:
                     identifier = "accession"
                     header = ["ident" if "accession" == x else x for x in header]
-                elif "name" in header and "lineage" in header:
+                elif "lineage" in header and any(
+                    ["name" in header, "match_name" in header]
+                ):
                     return cls.load_from_gather_with_lineages(
                         filename, force=force, lins=lins, ictv=ictv
                     )
@@ -1390,9 +1392,14 @@ def load_from_gather_with_lineages(
             if not header:
                 raise ValueError(f"cannot read taxonomy assignments from {filename}")
 
-            if "name" not in header or "lineage" not in header:
+            ident_col = None
+            if "name" in header:
+                ident_col = "name"
+            elif "match_name" in header:
+                ident_col = "match_name"
+            if "lineage" not in header or ident_col is None:
                 raise ValueError(
-                    "Expected headers 'name' and 'lineage' not found. Is this a with-lineages file?"
+                    "Expected headers 'name'/'match_name' and 'lineage' not found. Is this a with-lineages file?"
                 )
 
             ranks = None
@@ -1405,7 +1412,7 @@ def load_from_gather_with_lineages(
             for n, row in enumerate(r):
                 num_rows += 1
 
-                name = row["name"]
+                name = row[ident_col]
                 ident = get_ident(name)
 
                 if lins:
diff --git a/tests/test-data/tax/test1.gather.with-lineages.csv b/tests/test-data/tax/test1.gather.with-lineages.csv
new file mode 100644
index 0000000000..1c81221737
--- /dev/null
+++ b/tests/test-data/tax/test1.gather.with-lineages.csv
@@ -0,0 +1,5 @@
+intersect_bp,f_orig_query,f_match,f_unique_to_query,f_unique_weighted,average_abund,median_abund,std_abund,name,filename,md5,f_match_orig,unique_intersect_bp,gather_result_rank,remaining_bp,query_name,query_md5,query_filename,query_bp,ksize,scaled,query_n_hashes,lineage
+442000,0.08815317112086159,0.08438335242458954,0.08815317112086159,0.05815279361459521,1.6153846153846154,1.0,1.1059438185997785,"GCF_001881345.1 Escherichia coli strain=SF-596, ASM188134v1",/group/ctbrowngrp/gtdb/databases/ctb/gtdb-rs202.genomic.k31.sbt.zip,683df1ec13872b4b98d59e98b355b52c,0.042779713511420826,442000,0,4572000,test1,md5,test1.sig,5014000,31,1000,2507,Bacteria;Pseudomonadota;Gammaproteobacteria;Enterobacterales;Enterobacteriaceae;Escherichia;Escherichia coli
+390000,0.07778220981252493,0.10416666666666667,0.07778220981252493,0.050496823586903404,1.5897435897435896,1.0,0.8804995294906566,"GCF_009494285.1 Prevotella copri strain=iAK1218, ASM949428v1",/group/ctbrowngrp/gtdb/databases/ctb/gtdb-rs202.genomic.k31.sbt.zip,1266c86141e3a5603da61f57dd863ed0,0.052236806857755155,390000,1,4182000,test1,md5,test1.sig,5014000,31,1000,2507,Bacteria;Bacteroidota;Bacteroidia;Bacteroidales;Prevotellaceae;Prevotella;Prevotella copri
+138000,0.027522935779816515,0.024722321748477247,0.027522935779816515,0.015637726014008795,1.391304347826087,1.0,0.5702120455914782,"GCF_013368705.1 Bacteroides vulgatus strain=B33, ASM1336870v1",/group/ctbrowngrp/gtdb/databases/ctb/gtdb-rs202.genomic.k31.sbt.zip,7d5f4ba1d01c8c3f7a520d19faded7cb,0.012648945921173235,138000,2,4044000,test1,md5,test1.sig,5014000,31,1000,2507,Bacteria;Bacteroidota;Bacteroidia;Bacteroidales;Bacteroidaceae;Phocaeicola;Phocaeicola vulgatus
+338000,0.06741124850418827,0.013789581205311542,0.010769844435580374,0.006515719172503665,1.4814814814814814,1.0,0.738886568268889,"GCF_003471795.1 Prevotella copri strain=AM16-54, ASM347179v1",/group/ctbrowngrp/gtdb/databases/ctb/gtdb-rs202.genomic.k31.sbt.zip,0ebd36ff45fc2810808789667f4aad84,0.04337782340862423,54000,3,3990000,test1,md5,test1.sig,5014000,31,1000,2507,Bacteria;Bacteroidota;Bacteroidia;Bacteroidales;Prevotellaceae;Prevotella;Prevotella copri
diff --git a/tests/test_tax_utils.py b/tests/test_tax_utils.py
index 192406e251..dfca20628a 100644
--- a/tests/test_tax_utils.py
+++ b/tests/test_tax_utils.py
@@ -1014,7 +1014,7 @@ def test_check_and_load_gather_lineage_csvs_bad_header(runtmp):
     with pytest.raises(ValueError) as exc:
         LineageDB.load_from_gather_with_lineages(g_res)
     assert (
-        "Expected headers 'name' and 'lineage' not found. Is this a with-lineages file?"
+        "Expected headers 'name'/'match_name' and 'lineage' not found. Is this a with-lineages file?"
         in str(exc.value)
     )
 
@@ -1038,6 +1038,29 @@ def test_check_and_load_gather_lineage_csvs_isdir(runtmp):
     assert "is a directory" in str(exc.value)
 
 
+def test_check_and_load_gather_lineage_csvs_name(runtmp):
+    # test loading a with-lineage file that has 'name', not 'match_name'
+    g_res = utils.get_test_data("tax/test1.gather.with-lineages.csv")
+
+    lins = LineageDB.load_from_gather_with_lineages(g_res)
+    assert len(lins) == 4
+
+
+def test_check_and_load_gather_lineage_csvs_match_name(runtmp):
+    # test loading a with-lineage file that has 'match_name' instead of 'name'
+    g_res = utils.get_test_data("tax/test1.gather.with-lineages.csv")
+    out_lins = runtmp.output("match-name.lineages.csv")
+    with open(g_res) as f_in:
+        first_line = f_in.readline().replace("name", "match_name")
+        with open(out_lins, "w") as f_out:
+            f_out.write(first_line)
+            for line in f_in:
+                f_out.write(line)
+
+    lins = LineageDB.load_from_gather_with_lineages(out_lins)
+    assert len(lins) == 4
+
+
 def test_check_and_load_gather_csvs_fail_on_missing(runtmp):
     g_csv = utils.get_test_data("tax/test1.gather.csv")
     # make gather results with taxonomy name not in tax_assign

From ac21e1d927697a33479b206c91ff363051a5b2b5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 18 Mar 2024 15:26:22 -0700
Subject: [PATCH 02/19] Bump pypa/cibuildwheel from 2.16.5 to 2.17.0 (#3084)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from
2.16.5 to 2.17.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/pypa/cibuildwheel/releases">pypa/cibuildwheel's
releases</a>.</em></p>
<blockquote>
<h2>v2.17.0</h2>
<ul>
<li>🌟 Adds the ability to inherit configuration in TOML overrides. This
makes certain configurations much simpler. If you're overriding an
option like <code>before-build</code> or <code>environment</code>, and
you just want to add an extra command or environment variable, you can
just append (or prepend) to the previous config. See <a
href="https://cibuildwheel.pypa.io/en/stable/options/#inherit">the
docs</a> for more information. (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1730">#1730</a>)</li>
<li>🌟 Adds official support for native arm64 macOS GitHub runners. To
use them, just specify <code>macos-14</code> as an <code>os</code> of
your job in your workflow file. You can also keep <code>macos-13</code>
in your build matrix to build x86_64. Check out the new <a
href="https://cibuildwheel.pypa.io/en/stable/setup/#github-actions">GitHub
Actions example config</a>.</li>
<li>✨ You no longer need to specify <code>--platform</code> to run
cibuildwheel locally! Instead it will detect your platform
automatically. This was a safety feature, no longer necessary. (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1727">#1727</a>)</li>
<li>🛠 Removed setuptools and wheel pinned versions. This only affects
old-style projects without a <code>pyproject.toml</code>, projects with
<code>pyproject.toml</code> are already getting fresh versions of their
<code>build-system.requires</code> installed into an isolated
environment. (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1725">#1725</a>)</li>
<li>🛠 Improve how the GitHub Action passes arguments (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1757">#1757</a>)</li>
<li>🛠 Remove a system-wide install of pipx in the GitHub Action (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1745">#1745</a>)</li>
<li>🐛 No longer will cibuildwheel override the PIP_CONSTRAINT
environment variable when using the <code>build</code> frontend. Instead
it will be extended. (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1675">#1675</a>)</li>
<li>🐛 Fix a bug where building and testing both x86_86 and arm64 wheels
on the same runner caused the wrong architectures in the test
environment (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1750">#1750</a>)</li>
<li>🐛 Fix a bug that prevented testing a CPython 3.8 wheel targeting
macOS 11+ on x86_64 (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1768">#1768</a>)</li>
<li>📚 Moved the docs onto the official PyPA domain - they're now
available at <a
href="https://cibuildwheel.pypa.io">https://cibuildwheel.pypa.io</a> .
(<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1775">#1775</a>)</li>
<li>📚 Docs and examples improvements (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1762">#1762</a>,
<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1734">#1734</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md">pypa/cibuildwheel's
changelog</a>.</em></p>
<blockquote>
<h3>v2.17.0</h3>
<p><em>11 March 2024</em></p>
<ul>
<li>🌟 Adds the ability to inherit configuration in TOML overrides. This
makes certain configurations much simpler. If you're overriding an
option like <code>before-build</code> or <code>environment</code>, and
you just want to add an extra command or environment variable, you can
just append (or prepend) to the previous config. See <a
href="https://cibuildwheel.pypa.io/en/stable/options/#inherit">the
docs</a> for more information. (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1730">#1730</a>)</li>
<li>🌟 Adds official support for native arm64 macOS GitHub runners. To
use them, just specify <code>macos-14</code> as an <code>os</code> of
your job in your workflow file. You can also keep <code>macos-13</code>
in your build matrix to build x86_64. Check out the new <a
href="https://cibuildwheel.pypa.io/en/stable/setup/#github-actions">GitHub
Actions example config</a>.</li>
<li>✨ You no longer need to specify <code>--platform</code> to run
cibuildwheel locally! Instead it will detect your platform
automatically. This was a safety feature, no longer necessary. (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1727">#1727</a>)</li>
<li>🛠 Removed setuptools and wheel pinned versions. This only affects
old-style projects without a <code>pyproject.toml</code>, projects with
<code>pyproject.toml</code> are already getting fresh versions of their
<code>build-system.requires</code> installed into an isolated
environment. (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1725">#1725</a>)</li>
<li>🛠 Improve how the GitHub Action passes arguments (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1757">#1757</a>)</li>
<li>🛠 Remove a system-wide install of pipx in the GitHub Action (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1745">#1745</a>)</li>
<li>🐛 No longer will cibuildwheel override the PIP_CONSTRAINT
environment variable when using the <code>build</code> frontend. Instead
it will be extended. (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1675">#1675</a>)</li>
<li>🐛 Fix a bug where building and testing both x86_86 and arm64 wheels
on the same runner caused the wrong architectures in the test
environment (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1750">#1750</a>)</li>
<li>🐛 Fix a bug that prevented testing a CPython 3.8 wheel targeting
macOS 11+ on x86_64 (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1768">#1768</a>)</li>
<li>📚 Moved the docs onto the official PyPA domain - they're now
available at <a
href="https://cibuildwheel.pypa.io">https://cibuildwheel.pypa.io</a> .
(<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1775">#1775</a>)</li>
<li>📚 Docs and examples improvements (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1762">#1762</a>,
<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1734">#1734</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/pypa/cibuildwheel/commit/8d945475ac4b1aac4ae08b2fd27db9917158b6ce"><code>8d94547</code></a>
Bump version: v2.17.0</li>
<li><a
href="https://github.com/pypa/cibuildwheel/commit/ca06deb26f92b2b2c6019a3bc223875215fe4cf2"><code>ca06deb</code></a>
Merge pull request <a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1775">#1775</a>
from pypa/doc-domain</li>
<li><a
href="https://github.com/pypa/cibuildwheel/commit/f7e19222253830775777d4dc7e8cf56aa098d97f"><code>f7e1922</code></a>
CirrusCI fixes (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1786">#1786</a>)</li>
<li><a
href="https://github.com/pypa/cibuildwheel/commit/0d8e919dfc5b7631e641377671db317556dcc7ef"><code>0d8e919</code></a>
[Bot] Update dependencies (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1784">#1784</a>)</li>
<li><a
href="https://github.com/pypa/cibuildwheel/commit/022de07dc13bb25455653a082449a0c038632ac0"><code>022de07</code></a>
Merge pull request <a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1785">#1785</a>
from pypa/revert-1783</li>
<li><a
href="https://github.com/pypa/cibuildwheel/commit/920f574191fe30782d55398b7a0e70d62c999024"><code>920f574</code></a>
Remove manylinux1 docker pin</li>
<li><a
href="https://github.com/pypa/cibuildwheel/commit/5c06f3c28934b3830d065b17ab853c4465ce6623"><code>5c06f3c</code></a>
docs: Add how to run tests in development (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1698">#1698</a>)</li>
<li><a
href="https://github.com/pypa/cibuildwheel/commit/e2a0839555d4d2ffd366ac4cd933262f5974fd10"><code>e2a0839</code></a>
fix: set SYSTEM_VERSION_COMPAT=0 during pip install on macos (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1768">#1768</a>)</li>
<li><a
href="https://github.com/pypa/cibuildwheel/commit/87fff7728267ddada9c54df079e5864e5c5e5dfb"><code>87fff77</code></a>
chore(deps): bump the actions group with 1 update (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1776">#1776</a>)</li>
<li><a
href="https://github.com/pypa/cibuildwheel/commit/8ef9486aab2cc0aea71870a765265e294d84a679"><code>8ef9486</code></a>
Add <code>pedalboard</code> to projects.yml. (<a
href="https://redirect.github.com/pypa/cibuildwheel/issues/1781">#1781</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/pypa/cibuildwheel/compare/v2.16.5...v2.17.0">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=pypa/cibuildwheel&package-manager=github_actions&previous-version=2.16.5&new-version=2.17.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/build_wheel.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 841c0a03b9..1f85234218 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -46,7 +46,7 @@ jobs:
           python-version: '3.10'
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.16.5
+        uses: pypa/cibuildwheel@v2.17.0
         env:
           CIBW_ENVIRONMENT_MACOS: ${{ matrix.macos_target }}
           CIBW_ARCHS_LINUX: ${{ matrix.arch }}

From d0b3c4737571486d04b659029920cffbfc096ac3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 18 Mar 2024 23:45:38 +0000
Subject: [PATCH 03/19] Bump thiserror from 1.0.57 to 1.0.58 (#3082)

Bumps [thiserror](https://github.com/dtolnay/thiserror) from 1.0.57 to
1.0.58.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/dtolnay/thiserror/releases">thiserror's
releases</a>.</em></p>
<blockquote>
<h2>1.0.58</h2>
<ul>
<li>Make backtrace support available when using -Dwarnings (<a
href="https://redirect.github.com/dtolnay/thiserror/issues/292">#292</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/dtolnay/thiserror/commit/df8bffad18e408e9b88d3360797506fc1282babe"><code>df8bffa</code></a>
Release 1.0.58</li>
<li><a
href="https://github.com/dtolnay/thiserror/commit/14be209a74e075861fc754d41c376d2d5c6da06b"><code>14be209</code></a>
Merge pull request <a
href="https://redirect.github.com/dtolnay/thiserror/issues/292">#292</a>
from dtolnay/deadcode</li>
<li><a
href="https://github.com/dtolnay/thiserror/commit/15a1d647cf5e98835e800e5b6d17ec260dcb89cc"><code>15a1d64</code></a>
Make compatible with -Dwarnings</li>
<li><a
href="https://github.com/dtolnay/thiserror/commit/f55a5d28da7114b44e967451f697d8d0fedb20be"><code>f55a5d2</code></a>
Ignore mixed_attributes_style clippy lint</li>
<li><a
href="https://github.com/dtolnay/thiserror/commit/f770921a4fc7fc972b9fe00a841831b886e83f10"><code>f770921</code></a>
Resolve assigning_clones clippy lint</li>
<li>See full diff in <a
href="https://github.com/dtolnay/thiserror/compare/1.0.57...1.0.58">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=thiserror&package-manager=cargo&previous-version=1.0.57&new-version=1.0.58)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f39774cc5b..2db96670de 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1736,18 +1736,18 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.57"
+version = "1.0.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b"
+checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.57"
+version = "1.0.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81"
+checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
 dependencies = [
  "proc-macro2",
  "quote",

From feaa9b155dd186c0694d9d457762e965d6756bd0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 19 Mar 2024 12:12:32 +0000
Subject: [PATCH 04/19] Bump DeterminateSystems/nix-installer-action from 9 to
 10 (#3083)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps
[DeterminateSystems/nix-installer-action](https://github.com/determinatesystems/nix-installer-action)
from 9 to 10.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/determinatesystems/nix-installer-action/releases">DeterminateSystems/nix-installer-action's
releases</a>.</em></p>
<blockquote>
<h2>v10</h2>
<h2>What's Changed</h2>
<ul>
<li>action: post-run-job: try clean daemon container, warn on failure by
<a href="https://github.com/colemickens"><code>@​colemickens</code></a>
in <a
href="https://redirect.github.com/DeterminateSystems/nix-installer-action/pull/61">DeterminateSystems/nix-installer-action#61</a></li>
<li>No longer require sudo by <a
href="https://github.com/Hoverbear"><code>@​Hoverbear</code></a> in <a
href="https://redirect.github.com/DeterminateSystems/nix-installer-action/pull/64">DeterminateSystems/nix-installer-action#64</a></li>
<li>Handle docker not existing. by <a
href="https://github.com/Hoverbear"><code>@​Hoverbear</code></a> in <a
href="https://redirect.github.com/DeterminateSystems/nix-installer-action/pull/66">DeterminateSystems/nix-installer-action#66</a></li>
<li>Don't use docker shim if only using a mounted docker.sock instead of
docker-in-docker by <a
href="https://github.com/Hoverbear"><code>@​Hoverbear</code></a> in <a
href="https://redirect.github.com/DeterminateSystems/nix-installer-action/pull/67">DeterminateSystems/nix-installer-action#67</a></li>
<li>DETERMINATE_NIX_KVM fixup, support Magic Nix Cache + FlakeHub Cache
on Namespace runners by <a
href="https://github.com/grahamc"><code>@​grahamc</code></a> in <a
href="https://redirect.github.com/DeterminateSystems/nix-installer-action/pull/72">DeterminateSystems/nix-installer-action#72</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/DeterminateSystems/nix-installer-action/compare/v9...v10">https://github.com/DeterminateSystems/nix-installer-action/compare/v9...v10</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/DeterminateSystems/nix-installer-action/commit/de22e16c4711fca50c816cc9081563429d1cf563"><code>de22e16</code></a>
DETERMINATE_NIX_KVM fixup, support Magic Nix Cache + FlakeHub Cache on
Namesp...</li>
<li><a
href="https://github.com/DeterminateSystems/nix-installer-action/commit/e279ba56d8266c08a0e65738145aabb824f308ed"><code>e279ba5</code></a>
Merge pull request <a
href="https://redirect.github.com/determinatesystems/nix-installer-action/issues/67">#67</a>
from DeterminateSystems/hoverbear/fh-161-after-running...</li>
<li><a
href="https://github.com/DeterminateSystems/nix-installer-action/commit/f4a0ffe230179abc19a2f6fd8c9670ff36852318"><code>f4a0ffe</code></a>
Don't use docker shim if only using a mounted docker.sock instead of
docker-i...</li>
<li><a
href="https://github.com/DeterminateSystems/nix-installer-action/commit/ffea801f3052344aca70d89bb416dd08acbdd644"><code>ffea801</code></a>
Merge pull request <a
href="https://redirect.github.com/determinatesystems/nix-installer-action/issues/66">#66</a>
from DeterminateSystems/hoverbear/fh-160-action-should...</li>
<li><a
href="https://github.com/DeterminateSystems/nix-installer-action/commit/4126bb83b330975ce4a7e3ce3e964cfb4e5be9d1"><code>4126bb8</code></a>
Merge branch 'main' into
hoverbear/fh-160-action-should-work-under-nektosact-...</li>
<li><a
href="https://github.com/DeterminateSystems/nix-installer-action/commit/81ee88fd4a96dcf6a6f21c79564f2feb12a145bb"><code>81ee88f</code></a>
Handle docker not existing</li>
<li><a
href="https://github.com/DeterminateSystems/nix-installer-action/commit/0f8fa3d242f328af9abef2b9533980ca6b29a34b"><code>0f8fa3d</code></a>
Merge pull request <a
href="https://redirect.github.com/determinatesystems/nix-installer-action/issues/64">#64</a>
from DeterminateSystems/hoverbear/fh-156-installer-act...</li>
<li><a
href="https://github.com/DeterminateSystems/nix-installer-action/commit/f576e90e2d0a8dfe7216076d6061efe6258c09df"><code>f576e90</code></a>
Fix logic inversion</li>
<li><a
href="https://github.com/DeterminateSystems/nix-installer-action/commit/161c1f6904d53e431892beb03013f0191ce3cfcb"><code>161c1f6</code></a>
Use uid not username</li>
<li><a
href="https://github.com/DeterminateSystems/nix-installer-action/commit/0e5b7249797898c43d377657b98b01885044979f"><code>0e5b724</code></a>
No longer require sudo</li>
<li>Additional commits viewable in <a
href="https://github.com/determinatesystems/nix-installer-action/compare/v9...v10">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=DeterminateSystems/nix-installer-action&package-manager=github_actions&previous-version=9&new-version=10)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/dev_envs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/dev_envs.yml b/.github/workflows/dev_envs.yml
index a34c4e5301..0936086444 100644
--- a/.github/workflows/dev_envs.yml
+++ b/.github/workflows/dev_envs.yml
@@ -15,7 +15,7 @@ jobs:
         fetch-depth: 0
 
     - name: Install Nix
-      uses: DeterminateSystems/nix-installer-action@v9
+      uses: DeterminateSystems/nix-installer-action@v10
     - name: Run the Magic Nix Cache
       uses: DeterminateSystems/magic-nix-cache-action@v3
 

From 571904dfbc9af01a7ad195e01f91acf1feba91a0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 19 Mar 2024 12:49:27 +0000
Subject: [PATCH 05/19] Bump DeterminateSystems/magic-nix-cache-action from 3
 to 4 (#3085)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps
[DeterminateSystems/magic-nix-cache-action](https://github.com/determinatesystems/magic-nix-cache-action)
from 3 to 4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/determinatesystems/magic-nix-cache-action/releases">DeterminateSystems/magic-nix-cache-action's
releases</a>.</em></p>
<blockquote>
<h2>v4</h2>
<h2>What's Changed</h2>
<ul>
<li>Correct privacy policy link by <a
href="https://github.com/grahamc"><code>@​grahamc</code></a> in <a
href="https://redirect.github.com/DeterminateSystems/magic-nix-cache-action/pull/33">DeterminateSystems/magic-nix-cache-action#33</a></li>
<li>Stop setting the default source branch to 'main' by <a
href="https://github.com/grahamc"><code>@​grahamc</code></a> in <a
href="https://redirect.github.com/DeterminateSystems/magic-nix-cache-action/pull/36">DeterminateSystems/magic-nix-cache-action#36</a></li>
<li>Introduce FlakeHub Cache by <a
href="https://github.com/grahamc"><code>@​grahamc</code></a> in <a
href="https://redirect.github.com/DeterminateSystems/magic-nix-cache-action/pull/35">DeterminateSystems/magic-nix-cache-action#35</a></li>
<li>Update readme for flakehub cache by <a
href="https://github.com/grahamc"><code>@​grahamc</code></a> in <a
href="https://redirect.github.com/DeterminateSystems/magic-nix-cache-action/pull/38">DeterminateSystems/magic-nix-cache-action#38</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/DeterminateSystems/magic-nix-cache-action/compare/v3...v4">https://github.com/DeterminateSystems/magic-nix-cache-action/compare/v3...v4</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/DeterminateSystems/magic-nix-cache-action/commit/fc6aaceb40b9845a02b91e059ec147e78d1b4e41"><code>fc6aace</code></a>
Merge pull request <a
href="https://redirect.github.com/determinatesystems/magic-nix-cache-action/issues/38">#38</a>
from DeterminateSystems/flakehub-cache-readme</li>
<li><a
href="https://github.com/DeterminateSystems/magic-nix-cache-action/commit/f7df6894cf981e094cc90ae2e2c1ce2cb442f512"><code>f7df689</code></a>
Don't publish to ids</li>
<li><a
href="https://github.com/DeterminateSystems/magic-nix-cache-action/commit/b505f491cfe78683f3f043778dfefa6825c33fa5"><code>b505f49</code></a>
Update readme for flakehub cache</li>
<li><a
href="https://github.com/DeterminateSystems/magic-nix-cache-action/commit/122e91d3464f7a0985c87c9639b08aa5d9160b31"><code>122e91d</code></a>
Merge pull request <a
href="https://redirect.github.com/determinatesystems/magic-nix-cache-action/issues/35">#35</a>
from DeterminateSystems/flakehub-cache</li>
<li><a
href="https://github.com/DeterminateSystems/magic-nix-cache-action/commit/2c553b21e6f6d0fdf2d886db4950e41c07b4fa58"><code>2c553b2</code></a>
Add shellcheck checks to CI</li>
<li><a
href="https://github.com/DeterminateSystems/magic-nix-cache-action/commit/455b9185656000156842a00cce28404ed9d4a962"><code>455b918</code></a>
Use -closure ids</li>
<li><a
href="https://github.com/DeterminateSystems/magic-nix-cache-action/commit/1b7becab50d38fe7f5961bb379c1d68c1ef108bc"><code>1b7beca</code></a>
Merge remote-tracking branch 'origin/main' into flakehub-cache</li>
<li><a
href="https://github.com/DeterminateSystems/magic-nix-cache-action/commit/54acdd10fd23a8dff501938381c2303407ef71ff"><code>54acdd1</code></a>
Merge pull request <a
href="https://redirect.github.com/determinatesystems/magic-nix-cache-action/issues/36">#36</a>
from DeterminateSystems/grahamc-patch-1</li>
<li><a
href="https://github.com/DeterminateSystems/magic-nix-cache-action/commit/79f590d9a26e96e588b13a36c8cd549cdc5b5ddb"><code>79f590d</code></a>
/latest/ -&gt; /stable/</li>
<li><a
href="https://github.com/DeterminateSystems/magic-nix-cache-action/commit/14fda4e07607666fbdadd43a584ee4bc68398082"><code>14fda4e</code></a>
Stop setting the default source branch to 'main'</li>
<li>Additional commits viewable in <a
href="https://github.com/determinatesystems/magic-nix-cache-action/compare/v3...v4">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=DeterminateSystems/magic-nix-cache-action&package-manager=github_actions&previous-version=3&new-version=4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/dev_envs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/dev_envs.yml b/.github/workflows/dev_envs.yml
index 0936086444..8ffb98db64 100644
--- a/.github/workflows/dev_envs.yml
+++ b/.github/workflows/dev_envs.yml
@@ -17,7 +17,7 @@ jobs:
     - name: Install Nix
       uses: DeterminateSystems/nix-installer-action@v10
     - name: Run the Magic Nix Cache
-      uses: DeterminateSystems/magic-nix-cache-action@v3
+      uses: DeterminateSystems/magic-nix-cache-action@v4
 
     - run: nix run .# -- --version
 

From 8d5b6bfbf9438aae90d2e0954133daac42330fd3 Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Wed, 20 Mar 2024 14:14:29 -0700
Subject: [PATCH 06/19] MRG: fix clippy beta issues (#3088)

Fixes https://github.com/sourmash-bio/sourmash/issues/3087
---
 src/core/src/collection.rs     |  3 +-
 src/core/src/encodings.rs      |  2 --
 src/core/src/manifest.rs       |  3 +-
 src/core/src/signature.rs      |  3 --
 src/core/src/sketch/minhash.rs | 52 +---------------------------------
 5 files changed, 3 insertions(+), 60 deletions(-)

diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs
index 8cc6129cf4..9f708381ef 100644
--- a/src/core/src/collection.rs
+++ b/src/core/src/collection.rs
@@ -6,8 +6,7 @@ use camino::Utf8PathBuf as PathBuf;
 use crate::encodings::Idx;
 use crate::manifest::{Manifest, Record};
 use crate::prelude::*;
-use crate::signature::Signature;
-use crate::storage::{FSStorage, InnerStorage, MemStorage, SigStore, Storage, ZipStorage};
+use crate::storage::{FSStorage, InnerStorage, MemStorage, SigStore, ZipStorage};
 use crate::{Error, Result};
 
 #[cfg(feature = "parallel")]
diff --git a/src/core/src/encodings.rs b/src/core/src/encodings.rs
index ac69cd58eb..f8934596dc 100644
--- a/src/core/src/encodings.rs
+++ b/src/core/src/encodings.rs
@@ -1,8 +1,6 @@
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
-use std::convert::TryFrom;
 use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher};
-use std::iter::Iterator;
 use std::str;
 
 use nohash_hasher::BuildNoHashHasher;
diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs
index c82ca6ee1e..7441a9b69f 100644
--- a/src/core/src/manifest.rs
+++ b/src/core/src/manifest.rs
@@ -1,4 +1,3 @@
-use std::convert::TryInto;
 use std::fs::File;
 use std::io::{BufRead, BufReader, Read, Write};
 use std::ops::Deref;
@@ -12,7 +11,7 @@ use serde::{Deserialize, Serialize};
 
 use crate::encodings::HashFunctions;
 use crate::prelude::*;
-use crate::signature::{Signature, SigsTrait};
+use crate::signature::SigsTrait;
 use crate::sketch::Sketch;
 use crate::Result;
 
diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs
index da38587dc3..0ab8190f98 100644
--- a/src/core/src/signature.rs
+++ b/src/core/src/signature.rs
@@ -6,7 +6,6 @@ use core::iter::FusedIterator;
 
 use std::fs::File;
 use std::io;
-use std::iter::Iterator;
 use std::path::Path;
 use std::str;
 
@@ -18,7 +17,6 @@ use typed_builder::TypedBuilder;
 
 use crate::encodings::{aa_to_dayhoff, aa_to_hp, revcomp, to_aa, HashFunctions, VALID};
 use crate::prelude::*;
-use crate::selection::{Select, Selection};
 use crate::sketch::minhash::KmerMinHash;
 use crate::sketch::Sketch;
 use crate::Error;
@@ -891,7 +889,6 @@ impl PartialEq for Signature {
 
 #[cfg(test)]
 mod test {
-    use std::convert::TryInto;
     use std::fs::File;
     use std::io::{BufReader, Read};
     use std::path::PathBuf;
diff --git a/src/core/src/sketch/minhash.rs b/src/core/src/sketch/minhash.rs
index 24cdc9539f..1ee747745a 100644
--- a/src/core/src/sketch/minhash.rs
+++ b/src/core/src/sketch/minhash.rs
@@ -2,7 +2,7 @@ use std::cmp::Ordering;
 use std::collections::{BTreeMap, BTreeSet};
 use std::f64::consts::PI;
 use std::fmt::Write;
-use std::iter::{Iterator, Peekable};
+use std::iter::Peekable;
 use std::str;
 use std::sync::Mutex;
 
@@ -942,56 +942,6 @@ impl<T: Ord, I: Iterator<Item = T>> Iterator for Intersection<T, I> {
     }
 }
 
-struct Union<T, I: Iterator<Item = T>> {
-    iter: Peekable<I>,
-    other: Peekable<I>,
-}
-
-impl<T: Ord, I: Iterator<Item = T>> Iterator for Union<T, I> {
-    type Item = T;
-
-    fn next(&mut self) -> Option<T> {
-        let res = match (self.iter.peek(), self.other.peek()) {
-            (Some(ref left_key), Some(ref right_key)) => left_key.cmp(right_key),
-            (None, Some(_)) => {
-                return self.other.next();
-            }
-            (Some(_), None) => {
-                return self.iter.next();
-            }
-            _ => return None,
-        };
-
-        match res {
-            Ordering::Less => self.iter.next(),
-            Ordering::Greater => self.other.next(),
-            Ordering::Equal => {
-                self.other.next();
-                self.iter.next()
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::Union;
-
-    #[test]
-    fn test_union() {
-        let v1 = [1u64, 2, 4, 10];
-        let v2 = [1u64, 3, 4, 9];
-
-        let union: Vec<u64> = Union {
-            iter: v1.iter().peekable(),
-            other: v2.iter().peekable(),
-        }
-        .cloned()
-        .collect();
-        assert_eq!(union, [1, 2, 3, 4, 9, 10]);
-    }
-}
-
 //#############
 // A MinHash implementation for low scaled or large cardinalities
 

From cfe6a968aadea555bcbd265047f93b61fcad2b60 Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Wed, 20 Mar 2024 15:08:17 -0700
Subject: [PATCH 07/19] MRG: rework the manifest documentation; do misc cleanup
 (#3027)

This PR:
* fixes a minor nit in `sourmash sig collect` output where it said
"loaded 0 signatures"
* updates a lot of the documentation around standalone manifests to
encourage their use
* in tandem, modifies docs to discourage loading from
pathlists/from-files and directory hierarchies

TODO:
- [x] look at TODO item re directories in sig collect
- [x] think about adding
https://github.com/sourmash-bio/sourmash/issues/3023 information into
docs about lazy loading; maybe in the advanced databases document?
- [x] update `sig manifest` docs to point out that they do not generate
standalone manifests
- [x] revisit branchwater plugin documentation to, to either make issues
or make changes
- [x] update `sig check` and `sig collect` to tell people to expand
their paths ref https://github.com/sourmash-bio/sourmash/issues/3039
- [x] update docs more to recommend against pathlists and directories
per https://github.com/sourmash-bio/sourmash/issues/3040

Related issues:
* https://github.com/sourmash-bio/sourmash_plugin_branchwater/issues/235
* Fixes https://github.com/sourmash-bio/sourmash/issues/3048
* Fixes https://github.com/sourmash-bio/sourmash/issues/3009 by
recommending `sig collect` and `sig check` instead of `sig manifest` for
making standalone manifests
* https://github.com/sourmash-bio/sourmash/issues/3053
* Fixes https://github.com/sourmash-bio/sourmash/issues/3023
* Fixes https://github.com/sourmash-bio/sourmash/issues/3039
* Fixes https://github.com/sourmash-bio/sourmash/issues/3040

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Tessa Pierce Ward <bluegenes@users.noreply.github.com>
---
 doc/command-line.md               | 165 +++++++++++++++++++-----------
 doc/databases-advanced.md         |  82 +++++++++++----
 doc/faq.md                        |   2 +-
 doc/release-notes/sourmash-2.0.md |   2 +-
 doc/sourmash-sketch.md            |   6 +-
 doc/using-sourmash-a-guide.md     |   2 +-
 src/sourmash/sig/__main__.py      |   2 -
 7 files changed, 173 insertions(+), 88 deletions(-)

diff --git a/doc/command-line.md b/doc/command-line.md
index 71173792cf..90633d342e 100644
--- a/doc/command-line.md
+++ b/doc/command-line.md
@@ -1914,7 +1914,10 @@ will continue processing input sequences.
 
 ### `sourmash signature manifest` - output a manifest for a file
 
-Output a manifest for a file, database, or collection.
+Output a manifest for a file, database, or collection.  Note that
+these manifests are not usually suitable for use as standalone
+manifests; the `sourmash sig collect` and `sourmash sig check`
+commands produce standalone manifests.
 
 For example,
 ```
@@ -1942,8 +1945,10 @@ CSV and SQLite manifest files.
 
 ### `sourmash signature check` - compare picklists and manifests
 
-Compare picklists and manifests across databases, and optionally output matches
-and missing items.
+Compare picklists and manifests across databases, and optionally
+output matches and missing items.  In particular, `sig check` can be
+used to create standalone manifests for a subset of a large collection,
+using picklists.
 
 For example,
 ```
@@ -1962,17 +1967,28 @@ collections of signatures and identifiers.
 With `-m/--save-manifest-matching`, `sig check` creates a standalone
 manifest. In these manifests, sourmash v4 will by default write paths
 to the matched elements that are relative to the current working
-directory.  In some cases - when the output manifest is in different
+directory.  In some cases - when the output manifest is in a different
 directory - this will create manifests that do not work properly
 with sourmash.  The `--relpath` argument will rewrite the paths to be
 relative to the manifest, while the `--abspath` argument will rewrite
 paths to be absolute.  The `--relpath` behavior will be the default in
 sourmash v5.
 
+Standalone manifests created with `-m/--save-manifest-matching` will
+use the paths given to `sig check` on the command line; we recommend
+using zip files and sig files, and avoiding directory hierarchies or
+path lists. You can use `--from-file` to pass in long lists of
+filenames via a text file.
+
 ### `sourmash signature collect` - collect manifests across databases
 
 Collect manifests from across (many) files and merge into a single
-standalone manifest.
+standalone manifest. Standalone manifests can be used directly as a
+sourmash database; they support efficient searching and selection of
+sketches, as well as lazy loading of individual sketches from large
+collections.  See
+[advanced usage information on sourmash databases](databases-advanced.md)
+for more information.
 
 For example,
 ```
@@ -1987,20 +2003,30 @@ This manifest file can be loaded directly from the command line by sourmash.
 particularly useful when working with large collections of signatures and
 identifiers, and has command line options for merging and updating manifests.
 
+The standalone manifests created by `sig collect` will reference the
+paths given on the command line; we recommend using zip files and sig
+files, and avoiding directory hierarchies or path lists. You can also
+use `--from-file` to pass in long lists of filenames.
+
+Standalone manifests produced by `sig collect` work most efficiently
+when constructed from many small zip file collections.  
+
 As with `sig check`, the standalone manifests created by `sig collect`
 in sourmash v4 will by default write paths to the matched elements
 relative to the current working directory.  When the output manifest
-is in a different directory, this will create manifests that do not work
-properly with sourmash.  The `--relpath` argument will rewrite the
-paths to be relative to the manifest, while the `--abspath` argument
-will rewrite paths to be absolute.  The `--relpath` behavior will be
-the default in sourmash v5.
+is in a different directory, this will create manifests that do not
+work properly with sourmash.  The `--relpath` argument will rewrite
+the paths to be relative to the manifest, while the `--abspath`
+argument will rewrite paths to be absolute.  The `--relpath` behavior
+will be the default in sourmash v5.
 
 ## Advanced command-line usage
 
 ### Loading signatures and databases
 
-sourmash uses several different command-line styles.
+sourmash uses several different command-line styles.  Most sourmash
+commands can load sketches from any standard collection type; we
+primarily recommend using zipfiles (but read on!)
 
 Briefly,
 
@@ -2011,22 +2037,18 @@ Briefly,
   need to provide a selector (ksize with `-k`, moltype with `--dna` etc,
   or md5sum with `--query-md5`) that picks out a single signature.
 
-* `compare` takes multiple signatures and can load them from files,
-  directories, and indexed databases (SBT or LCA).  It can also take
-  a list of file paths in a text file, using `--from-file` (see below).
+* `compare` takes multiple signatures and can load them from any
+  sourmash collection type.
   
 * the `lca classify` and `lca summarize` commands take multiple
   signatures with `--query`, and multiple LCA databases, with
   `--db`. `sourmash multigather` also uses this style.  This allows these
   commands to specify multiple queries **and** multiple databases without
-  (too much) confusion.  These commands will take files containing
-  signature files using `--query-from-file` (see below).
+  (too much) confusion.  The database must be LCA databases.
   
 * `index` and `lca index` take a few fixed parameters (database name,
   and for `lca index`, a taxonomy file) and then an arbitrary number of
-  other files that contain signatures, including files, directories,
-  and indexed databases. These commands will also take `--from-file`
-  (see below).
+  other files that contain signatures.
 
 None of these commands currently support searching, comparing, or indexing
 signatures with multiple ksizes or moltypes at the same time; you need
@@ -2092,7 +2114,7 @@ The following `coltype`s are currently supported for picklists:
 * `gather` - use the CSV output of `sourmash gather` as a picklist
 * `prefetch` - use the CSV output of `sourmash prefetch` as a picklist
 * `search` - use the CSV output of `sourmash prefetch` as a picklist
-* `manifest` - use the CSV output of `sourmash sig manifest` as a picklist
+* `manifest` - use CSV manifests produced by `sig manifest` as a picklist
 
 Identifiers are constructed by using the first space delimited word in
 the signature name.
@@ -2101,7 +2123,7 @@ One way to build a picklist is to use `sourmash sig grep <pattern>
 <collection> --csv out.csv` to construct a CSV file containing a list
 of all sketches that match the pattern (which can be a string or
 regexp). The `out.csv` file can be used as a picklist via the picklist
-manifest format with `--picklist out.csv::manifest`.
+manifest CSV format with `--picklist out.csv::manifest`.
 
 You can also use `sourmash sig describe --csv out.csv <signatures>` or
 `sourmash sig manifest -o out.csv <filename_or_db>` to construct an
@@ -2144,7 +2166,9 @@ slow, especially for many (100s or 1000s) of signatures.
 All of the `sourmash` commands support loading collections of
 signatures from zip files.  You can create a compressed collection of
 signatures using `sourmash sig cat *.sig -o collections.zip` and then
-specifying `collections.zip` on the command line in place of `*.sig`.
+specifying `collections.zip` on the command line in place of `*.sig`;
+you can also sketch FASTA/FASTQ files directly into a zip file with
+`-o collections.zip`.
 
 ### Choosing signature output formats
 
@@ -2171,7 +2195,7 @@ to stdout.
 All of these save formats can be loaded by sourmash commands.
 
 **We strongly suggest using .zip files to store signatures: they are fast,
-small, and fully supported by all the sourmash commands.**
+small, and fully supported by all the sourmash commands and API.**
 
 Note that when outputting large collections of signatures, some save
 formats require holding all the sketches in memory until they can be
@@ -2186,19 +2210,6 @@ databases!](databases-advanced.md)
 
 ### Loading many signatures
 
-#### Loading signatures within a directory hierarchy
-
-All of the `sourmash` commands support loading signatures from
-beneath directories; provide the paths on the command line.
-
-#### Passing in lists of files
-
-Most sourmash commands will also take a `--from-file` or
-`--query-from-file`, which will take the location of a text file containing
-a list of file paths. This can be useful for situations where you want
-to specify thousands of queries, or a subset of signatures produced by
-some other command.
-
 #### Indexed databases
 
 Indexed databases can make searching signatures much faster. SBT
@@ -2209,9 +2220,6 @@ SQLite databases (new in sourmash v4.4.0) are typically larger on disk
 than SBTs and LCAs, but in turn are fast to load and support very low
 memory search.
 
-(LCA databases also directly permit taxonomic searches using `sourmash lca`
-functions.)
-
 Commands that take multiple signatures or collections of signatures
 will also work with indexed databases.
 
@@ -2223,9 +2231,9 @@ only at one scaled value. If the database signature type is
 incompatible with the other signatures, sourmash will complain
 appropriately.
 
-In contrast, signature files, zip collections, and directory
-hierarchies can contain many different types of signatures, and
-compatible ones will be selected automatically.
+In contrast, signature files and zip collections can contain many
+different types of signatures, and compatible ones will be selected
+automatically.
 
 Use the `sourmash index` command to create an SBT.
 
@@ -2235,6 +2243,29 @@ database can be saved in JSON or SQL format with `-F json` or `-F sql`.
 Use `sourmash sig cat <list of signatures> -o <output>.sqldb` to create
 a SQLite indexed database.
 
+#### Loading signatures within a directory hierarchy
+
+All of the `sourmash` commands support loading signatures (`.sig` or
+`.sig.gz` files) from within directory hierarchies; you can just
+provide the paths to the top-level directory on the command line.
+
+However, this is no longer recommended because it can be very
+inefficient; we instead suggest passing all of the sketch files in
+the directory into `sig collect` to build a standalone manifest, or
+using `sig cat` on the directory to generate a zip file.
+
+#### Passing in lists of files
+
+sourmash commands support `--from-file` or `--query-from-file`, which
+will take the location of a text file containing a list of file
+paths. This can be useful for situations where you want to specify
+thousands of queries, or a subset of signatures produced by some other
+command.
+
+This is no longer recommended when using large collections; we instead
+suggest using standalone manifests built with `sig collect` and `sig
+check`, which will include extra metadata that supports fast loading.
+
 ### Combining search databases on the command line
 
 All of the commands in sourmash operate in "online" mode, so you can
@@ -2242,7 +2273,7 @@ combine multiple databases and signatures on the command line and get
 the same answer as if you built a single large database from all of
 them.  The only caveat to this rule is that if you have multiple
 identical matches present across the databases, the order in which
-they are found will differ depending on the order that the files are
+they are used may depend on the order that the files are
 passed in on the command line.
 
 ### Using stdin
@@ -2250,11 +2281,12 @@ passed in on the command line.
 Most commands will take signature JSON data via stdin using the usual
 UNIX convention, `-`.  Moreover, `sourmash sketch` and the `sourmash
 sig` commands will output to stdout.  So, for example,
+```
+sourmash sketch ... -o - | sourmash sig describe -
+```
+will describe the signatures that were just created.
 
-`sourmash sketch ... -o - | sourmash sig describe -` will describe the
-signatures that were just created.
-
-### Using manifests to explicitly refer to collections of files
+### Using standalone manifests to explicitly refer to collections of files
 
 (sourmash v4.4 and later)
 
@@ -2264,9 +2296,9 @@ internals to speed up signature selection through picklists and
 pattern matching.
 
 Manifests can _also_ be used externally (via the command-line), and
-may be useful for organizing large collections of signatures. They can
-be generated with the `sig collect`, `sig manifest`, and `sig check`
-subcommands.
+these "standalone manifests" may be useful for organizing large
+collections of signatures. They can be generated with the `sig
+collect`, `sig manifest`, and `sig check` subcommands.
 
 Suppose you have a large collection of signatures (`.sig` or `.sig.gz`
 files) in a location (e.g., under a directory, or in a zip file). You
@@ -2280,21 +2312,32 @@ sourmash sig fileinfo manifest.sqlmf
 ```
 This manifest contains _references_ to the signatures (but not the
 signatures themselves) and can then be used as a database target for most
-sourmash operations - search, gather, etc.
+sourmash operations - search, gather, etc. Manifests support
+fast selection and lazy loading of sketches in many situations.
+
+The `sig check` command can also be used to create standalone manifests
+from collections using a picklist, with the `-m/--save-manifest-matching`
+option. This is useful for commands that don't support picklists natively,
+e.g. plugins and extensions.
 
-Note that `sig collect` will generate manifests containing the
-pathnames given to it - so if you use relative paths, the references
-will be relative to the working directory in which `sig collect` was
+Note that `sig collect` and `sig check` will generate manifests containing the
+pathnames given to them - so if you use relative paths, the references
+will be relative to the working directory in which the command was
 run.  You can use `sig collect --abspath` to rewrite the paths
-into absolute paths.
+into absolute paths, or `sig collect --relpath` to rewrite the paths
+relative to the manifest file.
 
 **Our advice:** We suggest using zip file collections for most
-situations; we primarily recommend using explicit manifests for
-situations where you have a **very large** collection of signatures
-(1000s or more), and don't want to make multiple copies of signatures
-in the collection (as you would have to, with a zipfile). This can be
-useful if you want to refer to different subsets of the collection
-without making multiple copies in a zip file.
+situations; we strongly recommend using standalone manifests for
+situations where you have **very large** sketches or a **very large**
+collection of sketches (1000s or more), and don't want to make
+multiple copies of signatures in the collection (as you would have to,
+with a zipfile). This is particularly useful if you want to refer to different
+subsets of the collection without making multiple copies in a zip
+file.
+
+You can read more about the details of zip files and manifests in
+[the advanced usage information for databases](databases-advanced.md).
 
 ### Using sourmash plugins
 
diff --git a/doc/databases-advanced.md b/doc/databases-advanced.md
index 9e4d1c25d7..2a1f61fd28 100644
--- a/doc/databases-advanced.md
+++ b/doc/databases-advanced.md
@@ -54,39 +54,83 @@ Both SBTs and LCA databases can only store homogenous collections of signature t
 
 We recommend SBT and LCA databases for use only in specific situations - e.g. SBTs are great for single-genome "best match" search for SBTs, and `sourmash lca` commands require LCA databases.
 
-### Manifests
-
-Manifests are catalogs of signature metadata - name, molecule type, k-mer size, and other information - that can be used to select specific signatures for searching or processing. Typically when using manifests the actual signatures themselves are not loaded until they are needed, although the efficiency of this depends on the signature storage mechanism; for example, JSON-format containers (`.sig` and `.lca.json` files) must be entirely loaded before any signature in the file them can be used, unlike zip containers.
-
-As of sourmash 4.4 manifests can be *directly* loaded from the command line as standalone collections. This lets manifests serve as a catalog of signatures stored in many different locations.
-
-Standalone manifests are preferable to both directory storage and pathlists (below), because they support fast selection and direct lazy loading. They are the most effective solution for managing custom collections of thousands to millions of signatures.
-
-Standalone manifests can be created with `sourmash sig collect`
-(sourmash v4.4 and later).
-
-Sourmash supports two manifest file formats - CSV and SQLite. SQLite manifests are much faster and lower-memory than CSV manifests in exchange for consuming some extra disk space.
+### Standalone manifests
+
+Manifests are catalogs of signature metadata - name, molecule type,
+k-mer size, and other information - that can be used to select
+specific signatures for searching or processing. Typically when using
+manifests the actual signatures themselves are not loaded until they
+are needed, although the efficiency of this depends on the signature
+storage mechanism; for example, JSON-format containers (`.sig` and
+`.lca.json` files) must be entirely loaded before any signature in the
+file them can be used, unlike zip containers.
+
+As of sourmash 4.4 manifests can be *directly* loaded from the command
+line as standalone collections. This lets manifests serve as a catalog
+of signatures stored in many different locations. Sketches can be
+selected by name, k-mer size, molecule type, and other features
+without loading the actual sketch data.
+
+Standalone manifests are preferable to both directory storage and
+pathlists (below), because they support fast selection and direct lazy
+loading. This means that sourmash operations that support streaming or
+online search (such as `prefetch` and `gather`, among others) can
+avoid loading everything all at once.
+
+Standalone manifests are the most effective solution for managing custom
+collections of thousands to millions of signatures, as well as working
+with multiple large sketches.
+
+They can be created with `sourmash sig collect` and `sourmash sig
+check` (sourmash v4.4 and later).
+
+Sourmash supports two manifest file formats - CSV and SQLite. SQLite
+manifests are much faster and lower-memory than CSV manifests.
 
 ### Directories
 
-Directory hierarchies of signatures are read natively by sourmash, and can be created or extended by specifying `-o dirname/` (with a trailing slash).
+Directory hierarchies of signatures are read natively by sourmash, and
+can be created or extended by specifying `-o dirname/` (with a
+trailing slash).
 
-To read from a directory, specify the directory name on the sourmash command line. When reading from directories, the entire directory hierarchy is traversed and all `.sig` and `.sig.gz` files are loaded as signatures. If `--force` is specified, _all_ files will be read, and failures will be ignored.
+To read from a directory, specify the directory name on the sourmash
+command line. When reading from directories, the entire directory
+hierarchy is traversed and all `.sig` and `.sig.gz` files are loaded
+as signatures. If `--force` is specified, _all_ files will be read,
+and failures will be ignored.
 
-When directories are specified as outputs, the signatures will be saved by their complete md5sum underneath the directory.
+When directories are specified as outputs, the signatures will be
+saved by their complete md5sum underneath the directory.
 
-We don't particularly recommend storing signatures in directory hierarchies, since most of their use cases are now covered by other approaches.
+We don't recommend loading signatures from directory hierarchies,
+since the implementation is not particularly memory efficient and most
+of the use cases for directories are now covered by other approaches -
+in particular, standalone manifests.
 
 ### Pathlists
 
-Pathlists are text files containing paths to one or more sourmash databases; any type of sourmash-readable collection can be listed.
+Pathlists are text files containing paths to one or more sourmash
+databases; any type of sourmash-readable collection can be listed.
 
-The paths in pathlists can be relative or absolute within the file system. If they are relative, they must resolve with respect to the current working directory of the sourmash command.
+The paths in pathlists can be relative or absolute within the file
+system. If they are relative, they must resolve with respect to the
+current working directory of the sourmash command.
 
-We don't recommend using pathlists any more, since the original use cases are now supported with picklists, but they are still supported!
+We don't recommend using pathlists, since the original use cases are
+now supported with picklists and standalone manifests, but they are
+still supported. Loading sketches from pathlists is also not very
+efficient.
 
 Pathlists are not output by any sourmash commands.
 
+Many commands support `--query-from-file` or `--from-file` as a way to
+pass in a file containing many paths to sketches or collections. The
+internal implementation of sourmash simply adds these to the
+command-line arguments, and this is an effective and efficient way to
+provide long lists of files to commands like `sig check` and `sig
+collect` that create standalone manifests to support efficient lazy
+loading.
+
 ## Storing taxonomies
 
 sourmash supports taxonomic information output via the `sourmash lca` and `sourmash tax` subcommands. Both sets of commands rely on the same 7 taxonomic ranks: superkingdom, phylum, class, order, family, genus, and species (with limited support for a 'strain' rank). And both sets of subcommands take lineage spreadsheets that link specific identifiers to taxonomic lineages.
diff --git a/doc/faq.md b/doc/faq.md
index d8d9da0622..227952ff40 100644
--- a/doc/faq.md
+++ b/doc/faq.md
@@ -139,7 +139,7 @@ you use [the precomputed databases](databases.md), you will always end up
 using your query sketches at a minimum scaled of 1000, even if you created
 them with a lower scaled value.
 
-Please also see [What resolution should my signatures be?](using-sourmash-a-guide.md#what-resolution-should-my-signatures-be-how-should-i-create-them).
+Please also see [What resolution should my signatures be?](using-sourmash-a-guide.md#what-resolution-should-my-signatures-be-and-how-should-i-create-them).
 
 ## What threshold-bp value should I use with `sourmash prefetch` and `sourmash gather`?
 
diff --git a/doc/release-notes/sourmash-2.0.md b/doc/release-notes/sourmash-2.0.md
index c3b8647dd5..fbb541ad49 100644
--- a/doc/release-notes/sourmash-2.0.md
+++ b/doc/release-notes/sourmash-2.0.md
@@ -23,7 +23,7 @@ This is a list of substantial new features and functionality in sourmash 2.0.
 * Created [precomputed databases](../databases.md) for most of GenBank genomes.
 * Added taxonomic reporting functionality in the `sourmash lca` submodule - [see command-line docs](../command-line.md#sourmash-lca-subcommands-for-in-memory-taxonomy-integration).
 * Added signature manipulation utilities in the `sourmash signature` submodule - [see command-line docs](../command-line.md#sourmash-signature-subcommands-for-signature-manipulation)
-* Introduced new modulo hash or "scaled" signatures for containment analysis; see [Using sourmash: a practical guide](../using-sourmash-a-guide.md#what-resolution-should-my-signatures-be--how-should-i-create-them) and [more details in the Python API examples](../api-example.md#advanced-features-of-sourmash-minhash-objects---scaled-and-num).
+* Introduced new modulo hash or "scaled" signatures for containment analysis; see [Using sourmash: a practical guide](../using-sourmash-a-guide.md#what-resolution-should-my-signatures-be-and-how-should-i-create-them) and [more details in the Python API examples](../api-example.md#advanced-features-of-sourmash-minhash-objects---scaled-and-num).
 * Switched to using JSON instead of YAML for signatures.
 * Many performance optimizations!
 * Many more tests!
diff --git a/doc/sourmash-sketch.md b/doc/sourmash-sketch.md
index caba1a19a8..5ad43d266e 100644
--- a/doc/sourmash-sketch.md
+++ b/doc/sourmash-sketch.md
@@ -146,7 +146,7 @@ Some of the key command-line options supported by `fromfile` are:
 * `-o/--output-signatures` will save generated signatures to any of the [standard supported output formats](command-line.md#choosing-signature-output-formats).
 * `-o/--output-csv-info` will save a CSV file of input filenames and parameter strings for use with the `sourmash sketch` command line; this can be used to construct signatures in parallel.
 * `--already-done` will take a list of existing signatures/databases to check against; signatures with matching names and parameter strings will not be rebuilt.
-* `--output-manifest-matching` will output a manifest of already-existing signatures, which can then be used with `sourmash sig cat` to collate signatures across databases; see [using manifests](command-line.md#using-manifests-to-explicitly-refer-to-collections-of-files). (This provides [`sourmash sig check` functionality](command-line.md#sourmash-signature-check---compare-picklists-and-manifests) in `sketch fromfile`.)
+* `--output-manifest-matching` will output a manifest of already-existing signatures, which can then be used with `sourmash sig cat` to collate signatures across databases; see [using manifests](command-line.md#using-standalone-manifests-to-explicitly-refer-to-collections-of-files). (This provides [`sourmash sig check` functionality](command-line.md#sourmash-signature-check---compare-picklists-and-manifests) in `sketch fromfile`.)
 
 If you would like help and advice on constructing large databases, or
 pointers to code for generating the `fromfile` CSV format, please ask
@@ -200,8 +200,8 @@ The `-p` argument to `sourmash sketch` provides parameter strings to sourmash, a
 
 A parameter string is a space-delimited collection that can contain one or more fields, comma-separated.
 * `k=<ksize>` - create a sketch at this k-mer size; can provide more than one time in a parameter string. Typically `ksize` is between 4 and 100.
-* `scaled=<int>` - create a scaled MinHash with k-mers sampled deterministically at 1 per `<scaled>` value. This controls sketch compression rates and resolution; for example, a 5 Mbp genome sketched with a scaled of 1000 would yield approximately 5,000 k-mers. `scaled` is incompatible with `num`. See [our guide to signature resolution](using-sourmash-a-guide.md#what-resolution-should-my-signatures-be--how-should-i-create-them) for more information.
-* `num=<int>` - create a standard MinHash with no more than `<num>` k-mers kept. This will produce sketches identical to [mash sketches](https://mash.readthedocs.io/en/latest/). `num` is incompatible with `scaled`. See [our guide to signature resolution](using-sourmash-a-guide.md#what-resolution-should-my-signatures-be--how-should-i-create-them) for more information.
+* `scaled=<int>` - create a scaled MinHash with k-mers sampled deterministically at 1 per `<scaled>` value. This controls sketch compression rates and resolution; for example, a 5 Mbp genome sketched with a scaled of 1000 would yield approximately 5,000 k-mers. `scaled` is incompatible with `num`. See [our guide to signature resolution](using-sourmash-a-guide.md#what-resolution-should-my-signatures-be-and-how-should-i-create-them) for more information.
+* `num=<int>` - create a standard MinHash with no more than `<num>` k-mers kept. This will produce sketches identical to [mash sketches](https://mash.readthedocs.io/en/latest/). `num` is incompatible with `scaled`. See [our guide to signature resolution](using-sourmash-a-guide.md#what-resolution-should-my-signatures-be-and-how-should-i-create-them) for more information.
 * `abund` / `noabund` - create abundance-weighted (or not) sketches. See [Classify signatures: Abundance Weighting](classifying-signatures.md#abundance-weighting) for details of how this works.
 * `dna`, `protein`, `dayhoff`, `hp` - create this kind of sketch. Note that `sourmash sketch dna -p protein` and `sourmash sketch protein -p dna` are invalid; please use `sourmash sketch translate` for the former.
 * `seed=<int>` - set the random number seed used for k-mer hashing. This is for advanced users who want to choose a completely different set of k-mers for sketches! The default is 42.
diff --git a/doc/using-sourmash-a-guide.md b/doc/using-sourmash-a-guide.md
index 29ccc52ec1..a3600c1337 100644
--- a/doc/using-sourmash-a-guide.md
+++ b/doc/using-sourmash-a-guide.md
@@ -41,7 +41,7 @@ however, and it probably doesn't really matter.
 (When we have blog posts or publications providing more formal
 guidance, we'll link to them here!)
 
-## What resolution should my signatures be / how should I create them?
+## What resolution should my signatures be and how should I create them?
 
 sourmash supports two ways of choosing the resolution or size of
 your signatures: using `num` to specify the maximum number of hashes,
diff --git a/src/sourmash/sig/__main__.py b/src/sourmash/sig/__main__.py
index 94e1928175..0a9cd4bc9e 100644
--- a/src/sourmash/sig/__main__.py
+++ b/src/sourmash/sig/__main__.py
@@ -1550,8 +1550,6 @@ def check(args):
 
 def collect(args):
     "Collect signature metadata across many locations, save to manifest"
-    # TODO:
-    # test what happens with directories :)
     set_quiet(False, args.debug)
 
     if os.path.exists(args.output):

From 24ab89cb1240c125c1cdf0b1517d6e9a1c07b691 Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Thu, 21 Mar 2024 09:40:37 -0700
Subject: [PATCH 08/19] MRG: 4.8.7 release branch (#3090)

# Release notes for sourmash v4.8.7

Note: This release changes the way `sourmash multigather` names output
files in some situations. Please see
https://github.com/sourmash-bio/sourmash/pull/2722 for details.

Minor new features:

* support proper manifest creation with `--relpath` for `sig check` and
`sig collect` (#3054)
* fix `multigather` output by adding md5sum along with
`-U/--output-add-query-md5sum` (#2722)
* enable loading lineages from annotated gather with match_name instead
of name (#3078)

Bug fixes:

* fix output for `sketch ... --singleton` (#3066)
* fix `calculate_gather_stats` `threshold=0` bug (#3052)

Cleanup and documentation updates:

* adjust protein ksize for record/manifest (#3019)
* Resolve `sourmash gather --help` issue (#3032)
* rework the manifest documentation; do misc cleanup (#3027)
* add branchwater web to docs (#3018)

Developer updates:

* make core Manifest booleans python compatible (core) (#3007)
* safer ksize selection while still accommodating k=k*3 (#3028)
* fix clippy beta issues (#3088)
* tell dependabot to ignore upgrades to `byteorder`, `chrono`,
`once_cell`, and `wasm-bindgen` (#3065)
* update rust changelog for r0.13.0 in preparation for release (#3033)
* Allow changing storage location for a collection in RevIndex (#3015)
* Fix tox and nix configs so all tox tests execute correctly (#2992)
* Calculate all gather stats in rust; use for rocksdb gather (#2943)
* bump screed req to 1.1.3 (#3067)
* bump to v4.8.7-dev (#2989)

Dependabot updates:

* Bump DeterminateSystems/magic-nix-cache-action from 1 to 3 (#2994)
* Bump DeterminateSystems/magic-nix-cache-action from 3 to 4 (#3085)
* Bump DeterminateSystems/nix-installer-action from 4 to 9 (#2995)
* Bump DeterminateSystems/nix-installer-action from 9 to 10 (#3083)
* Bump chrono from 0.4.33 to 0.4.34 (#3000)
* Bump conda-incubator/setup-miniconda from 3.0.1 to 3.0.2 (#3046)
* Bump conda-incubator/setup-miniconda from 3.0.2 to 3.0.3 (#3057)
* Bump histogram from 0.9.0 to 0.9.1 (#3002)
* Bump itertools from 0.12.0 to 0.12.1 (#3043)
* Bump log from 0.4.20 to 0.4.21 (#3062)
* Bump num-iter from 0.1.43 to 0.1.44 (#2997)
* Bump pypa/cibuildwheel from 2.16.5 to 2.17.0 (#3084)
* Bump rayon from 1.8.1 to 1.9.0 (#3058)
* Bump roaring from 0.10.2 to 0.10.3 (#3014)
* Bump serde from 1.0.196 to 1.0.197 (#3045)
* Bump serde_json from 1.0.113 to 1.0.114 (#3044)
* Bump tempfile from 3.10.0 to 3.10.1 (#3059)
* Bump thiserror from 1.0.56 to 1.0.57 (#2999)
* Bump thiserror from 1.0.57 to 1.0.58 (#3082)
* Bump wasm-bindgen from 0.2.91 to 0.2.92 (#3060)
* Bump wasm-bindgen-test from 0.3.40 to 0.3.41 (#2996)
* Bump wasm-bindgen-test from 0.3.41 to 0.3.42 (#3063)
* Bump web-sys from 0.3.67 to 0.3.68 (#2998)
* Bump web-sys from 0.3.68 to 0.3.69 (#3061)
* Revert "Bump wasm-bindgen from 0.2.91 to 0.2.92 (#3060)" (#3064)
* Update asv to 0.6.2 (#3025)
* Update pytest requirement from <8.1.0,>=6.2.4 to >=6.2.4,<8.2.0
(#3075)
---
 flake.nix      | 2 +-
 pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/flake.nix b/flake.nix
index 06ecc32fb4..8d4fae898e 100644
--- a/flake.nix
+++ b/flake.nix
@@ -68,7 +68,7 @@
 
           sourmash = python.buildPythonPackage ( commonArgs // rec {
             pname = "sourmash";
-            version = "4.8.6";
+            version = "4.8.7";
             format = "pyproject";
 
             cargoDeps = rustPlatform.importCargoLock {
diff --git a/pyproject.toml b/pyproject.toml
index 291c732093..3e93f49d9c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ build-backend = 'maturin'
 name = "sourmash"
 description = "tools for comparing biological sequences with k-mer sketches"
 readme = "README.md"
-version = "4.8.7-dev"
+version = "4.8.7"
 
 authors = [
   { name="Luiz Irber", orcid="0000-0003-4371-9659" },

From 2b1bf0ddc27b342fdd686a01e7a6a0bdbe0e8585 Mon Sep 17 00:00:00 2001
From: Luiz Irber <luizirber@users.noreply.github.com>
Date: Sat, 23 Mar 2024 20:31:50 +0000
Subject: [PATCH 09/19] Implement file parsing for webassembly (#3047)

Address
https://github.com/sourmash-bio/sourmash/issues/1577#issuecomment-916891602

This PR implements `Read` for `File` in browsers, which allows using
`niffler` + `needletail` to parse FASTA/Q, `.gz`compressed or not, in
browsers.

I also added error handling, so the browser can print nicer error
messages instead of something cryptic to `console.log`.
---
 Cargo.lock                         |   3 +-
 Makefile                           |   3 +
 flake.nix                          |   1 +
 src/core/CHANGELOG.md              |  30 ++++-
 src/core/Cargo.toml                |  27 ++--
 src/core/src/wasm.rs               | 193 ++++++++++++++++++++++++++---
 src/core/tests/dedicated_worker.rs |   5 +
 src/core/tests/node.rs             |   8 ++
 src/core/tests/service_worker.rs   |   5 +
 src/core/tests/shared_worker.rs    |   5 +
 src/core/tests/web.rs              |   5 +
 11 files changed, 254 insertions(+), 31 deletions(-)
 create mode 100644 src/core/tests/dedicated_worker.rs
 create mode 100644 src/core/tests/node.rs
 create mode 100644 src/core/tests/service_worker.rs
 create mode 100644 src/core/tests/shared_worker.rs
 create mode 100644 src/core/tests/web.rs

diff --git a/Cargo.lock b/Cargo.lock
index 2db96670de..0795b19d9b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1607,7 +1607,7 @@ checksum = "9f1341053f34bb13b5e9590afb7d94b48b48d4b87467ec28e3c238693bb553de"
 
 [[package]]
 name = "sourmash"
-version = "0.13.0"
+version = "0.13.1"
 dependencies = [
  "az",
  "byteorder",
@@ -1624,6 +1624,7 @@ dependencies = [
  "getset",
  "histogram",
  "itertools 0.12.1",
+ "js-sys",
  "log",
  "md5",
  "memmap2",
diff --git a/Makefile b/Makefile
index 9b26d91331..891b710732 100644
--- a/Makefile
+++ b/Makefile
@@ -56,6 +56,9 @@ last-tag:
 wasm:
 	wasm-pack build src/core -d ../../pkg
 
+wasm-test:
+	wasm-pack test --node src/core
+
 wasi:
 	cargo wasi build
 
diff --git a/flake.nix b/flake.nix
index 8d4fae898e..57213ac6aa 100644
--- a/flake.nix
+++ b/flake.nix
@@ -128,6 +128,7 @@
             cargo-outdated
             cargo-udeps
             cargo-deny
+            cargo-wasi
             #cargo-semver-checks
             nixpkgs-fmt
           ];
diff --git a/src/core/CHANGELOG.md b/src/core/CHANGELOG.md
index 67a3134144..ac4d169e80 100644
--- a/src/core/CHANGELOG.md
+++ b/src/core/CHANGELOG.md
@@ -5,6 +5,29 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [unreleased]
+
+## [0.13.1] - 2024-03-23
+
+MSRV: 1.65
+
+Changes/additions:
+
+* Implement file parsing for webassembly (#3047)
+* fix `calculate_gather_stats` `threshold=0` bug (#3052)
+* fix clippy beta issues (#3088)
+
+Updates:
+
+* Bump wasm-bindgen-test from 0.3.41 to 0.3.42 (#3063)
+* Bump web-sys from 0.3.68 to 0.3.69 (#3061)
+* Bump log from 0.4.20 to 0.4.21 (#3062)
+* Bump rayon from 1.8.1 to 1.9.0 (#3058)
+* Bump tempfile from 3.10.0 to 3.10.1 (#3059)
+* Bump serde_json from 1.0.113 to 1.0.114 (#3044)
+* Bump serde from 1.0.196 to 1.0.197 (#3045)
+* Bump itertools from 0.12.0 to 0.12.1 (#3043)
+
 ## [0.13.0] - 2024-02-23
 
 MSRV: 1.65
@@ -17,6 +40,7 @@ Changes/additions:
 * make core Manifest booleans python compatible (core) (#3007)
 
 Updates:
+
 * Bump roaring from 0.10.2 to 0.10.3 (#3014)
 * Bump histogram from 0.9.0 to 0.9.1 (#3002)
 * Bump chrono from 0.4.33 to 0.4.34 (#3000)
@@ -287,7 +311,11 @@ Fixed:
 - Fix mem leak in get_mins (#807)
 - Fixes for WASI and WASM compilation (#771) (#723)
 
-[unreleased]: https://github.com/sourmash-bio/sourmash/compare/r0.11.0...HEAD
+[unreleased]: https://github.com/sourmash-bio/sourmash/compare/r0.13.1...HEAD
+[0.13.1]: https://github.com/sourmash-bio/sourmash/compare/r0.13.0...r0.13.1
+[0.13.0]: https://github.com/sourmash-bio/sourmash/compare/r0.12.1...r0.13.0
+[0.12.1]: https://github.com/sourmash-bio/sourmash/compare/r0.12.0...r0.12.1
+[0.12.0]: https://github.com/sourmash-bio/sourmash/compare/r0.11.0...r0.12.0
 [0.11.0]: https://github.com/sourmash-bio/sourmash/compare/r0.10.0...r0.11.0
 [0.10.0]: https://github.com/sourmash-bio/sourmash/compare/r0.9.0...r0.10.0
 [0.9.0]: https://github.com/sourmash-bio/sourmash/compare/r0.9.0...r0.10.0
diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml
index 0f292db6d6..2b4ae08b59 100644
--- a/src/core/Cargo.toml
+++ b/src/core/Cargo.toml
@@ -1,8 +1,8 @@
 [package]
 name = "sourmash"
-version = "0.13.0"
-authors = ["Luiz Irber <luiz.irber@gmail.com>"]
-description = "MinHash sketches for genomic data"
+version = "0.13.1"
+authors = ["Luiz Irber <luiz@sourmash.bio>", "N. Tessa Pierce-Ward <tessa@sourmash.bio>"]
+description = "tools for comparing biological sequences with k-mer sketches"
 repository = "https://github.com/sourmash-bio/sourmash"
 keywords = ["minhash", "bioinformatics"]
 categories = ["science", "algorithms", "data-structures"]
@@ -43,6 +43,7 @@ log = "0.4.21"
 md5 = "0.7.0"
 memmap2 = "0.9.4"
 murmurhash3 = "0.0.5"
+needletail = { version = "0.5.1", default-features = false }
 niffler = { version = "2.3.1", default-features = false, features = [ "gz" ] }
 nohash-hasher = "0.2.0"
 num-iter = "0.1.44"
@@ -64,8 +65,6 @@ typed-builder = "0.18.0"
 vec-collections = "0.4.3"
 
 [dev-dependencies]
-criterion = "0.5.1"
-needletail = { version = "0.5.1", default-features = false }
 proptest = { version = "1.4.0", default-features = false, features = ["std"]}
 rand = "0.8.2"
 tempfile = "3.10.1"
@@ -95,17 +94,13 @@ skip_feature_sets = [
 
 ## Wasm section. Crates only used for WASM, as well as specific configurations
 
-[target.'cfg(all(target_arch = "wasm32", target_os="unknown"))'.dependencies.wasm-bindgen]
-version = "0.2.89"
-features = ["serde-serialize"]
+[target.'cfg(all(target_arch = "wasm32", target_os="unknown"))'.dependencies]
+js-sys = "0.3.68"
+web-sys = { version = "0.3.69", features = ["console", "File", "FileReaderSync"] }
+wasm-bindgen = { version = "0.2.89", features = ["serde-serialize"] }
 
-[target.'cfg(all(target_arch = "wasm32", target_os="unknown"))'.dependencies.web-sys]
-version = "0.3.69"
-features = ["console", "File"]
-
-[target.'cfg(all(target_arch = "wasm32"))'.dependencies.chrono]
-version = "0.4.32"
-features = ["wasmbind"]
+[target.'cfg(all(target_arch = "wasm32"))'.dependencies]
+chrono = { version = "0.4.32", features = ["wasmbind"] }
 
 [target.'cfg(all(target_arch = "wasm32", target_os="unknown"))'.dev-dependencies]
 wasm-bindgen-test = "0.3.42"
@@ -113,3 +108,5 @@ wasm-bindgen-test = "0.3.42"
 ### These crates don't compile on wasm
 [target.'cfg(not(target_arch = "wasm32"))'.dependencies]
 rocksdb = { version = "0.21.0", optional = true }
+[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
+criterion = "0.5.1"
diff --git a/src/core/src/wasm.rs b/src/core/src/wasm.rs
index c2a0eb6c30..cd9efec091 100644
--- a/src/core/src/wasm.rs
+++ b/src/core/src/wasm.rs
@@ -4,6 +4,7 @@
 #[global_allocator]
 static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;
 
+use needletail::parse_fastx_reader;
 use wasm_bindgen::prelude::*;
 
 use crate::cmd::ComputeParameters as _ComputeParameters;
@@ -57,15 +58,15 @@ impl KmerMinHash {
     }
 
     #[wasm_bindgen]
-    pub fn add_sequence_js(&mut self, buf: &str) {
-        self.0
-            .add_sequence(buf.as_bytes(), true)
-            .expect("Error adding sequence");
+    pub fn add_sequence_js(&mut self, buf: &str) -> Result<(), JsErrors> {
+        self.0.add_sequence(buf.as_bytes(), true)?;
+        Ok(())
     }
 
     #[wasm_bindgen]
-    pub fn to_json(&mut self) -> String {
-        serde_json::to_string(&self.0).unwrap()
+    pub fn to_json(&mut self) -> Result<String, JsErrors> {
+        let json = serde_json::to_string(&self.0)?;
+        Ok(json)
     }
 }
 
@@ -81,6 +82,40 @@ impl ComputeParameters {
     pub fn set_ksizes(&mut self, ksizes: Vec<u32>) {
         self.0.set_ksizes(ksizes);
     }
+
+    #[wasm_bindgen]
+    pub fn set_scaled(&mut self, scaled: u32) {
+        self.0.set_scaled(scaled as u64);
+    }
+
+    #[wasm_bindgen]
+    pub fn set_num(&mut self, num: u32) {
+        self.0.set_num_hashes(num);
+    }
+
+    #[wasm_bindgen]
+    pub fn set_protein(&mut self, is_protein: bool) {
+        self.0.set_protein(is_protein);
+    }
+
+    #[wasm_bindgen]
+    pub fn set_dayhoff(&mut self, dayhoff: bool) {
+        self.0.set_dayhoff(dayhoff);
+    }
+
+    #[wasm_bindgen]
+    pub fn set_hp(&mut self, hp: bool) {
+        self.0.set_hp(hp);
+    }
+
+    #[wasm_bindgen]
+    pub fn set_track_abundance(&mut self, track: bool) {
+        self.0.set_track_abundance(track);
+    }
+    #[wasm_bindgen]
+    pub fn set_seed(&mut self, seed: u32) {
+        self.0.set_seed(seed.into());
+    }
 }
 
 #[wasm_bindgen]
@@ -93,20 +128,39 @@ impl Signature {
     }
 
     #[wasm_bindgen]
-    pub fn add_sequence_js(&mut self, buf: &str) {
-        self.0
-            .add_sequence(buf.as_bytes(), true)
-            .expect("Error adding sequence");
+    pub fn add_sequence_js(&mut self, buf: &str) -> Result<(), JsErrors> {
+        self.0.add_sequence(buf.as_bytes(), true)?;
+
+        Ok(())
     }
 
     #[wasm_bindgen]
-    pub fn add_from_file(&mut self, fp: web_sys::File) {
-        unimplemented!()
+    pub fn add_from_file(
+        &mut self,
+        fp: web_sys::File,
+        callback: Option<js_sys::Function>,
+    ) -> Result<(), JsErrors> {
+        let wf = SyncFile::new(fp, callback);
+
+        let (rdr, _format) = niffler::send::get_reader(Box::new(wf))?;
+
+        let mut parser = parse_fastx_reader(std::io::BufReader::with_capacity(
+            1024 << 14, // 16 MiB
+            rdr,
+        ))?;
+
+        while let Some(record) = parser.next() {
+            let record = record?;
+            self.0.add_sequence(&record.seq(), true)?;
+        }
+
+        Ok(())
     }
 
     #[wasm_bindgen]
-    pub fn to_json(&mut self) -> String {
-        serde_json::to_string(&self.0).unwrap()
+    pub fn to_json(&mut self) -> Result<String, JsErrors> {
+        let json = serde_json::to_string(&self.0)?;
+        Ok(json)
     }
 
     pub fn size(&self) -> usize {
@@ -114,6 +168,28 @@ impl Signature {
     }
 }
 
+#[derive(thiserror::Error, Debug)]
+pub enum JsErrors {
+    #[error(transparent)]
+    SourmashError(#[from] crate::Error),
+
+    #[error(transparent)]
+    SerdeError(#[from] serde_json::error::Error),
+
+    #[error(transparent)]
+    NifflerError(#[from] niffler::Error),
+
+    #[error(transparent)]
+    NeedletailError(#[from] needletail::errors::ParseError),
+}
+
+impl Into<JsValue> for JsErrors {
+    fn into(self) -> JsValue {
+        let error = js_sys::Error::new(&self.to_string());
+        error.into()
+    }
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
@@ -127,3 +203,92 @@ mod test {
         assert_eq!(sig.size(), 3);
     }
 }
+
+// ==============================
+
+use js_sys::Number;
+use js_sys::Uint8Array;
+use once_cell::sync::Lazy;
+use web_sys::FileReaderSync;
+
+thread_local! {
+    static FILE_READER_SYNC: Lazy<FileReaderSync> = Lazy::new(|| {
+      FileReaderSync::new().expect("Failed to create FileReaderSync. Is it running in a web worker context?")
+    });
+}
+
+/// Wrapper around a `web_sys::File` that implements `Read` and `Seek`.
+pub struct SyncFile {
+    file: web_sys::File,
+    pos: u64,
+    cb: Option<js_sys::Function>,
+}
+
+/// Because this needs to be initialized in a Web Worker, it is safe to make it Send.
+/// (hopefully. I don't think they can be sent across Web Workers, nor accessed from other WW)
+unsafe impl Send for SyncFile {}
+
+impl SyncFile {
+    pub fn new(file: web_sys::File, cb: Option<js_sys::Function>) -> Self {
+        Self { file, pos: 0, cb }
+    }
+
+    /// File size in bytes.
+    pub fn size(&self) -> u64 {
+        let size = self.file.size();
+        if size <= Number::MAX_SAFE_INTEGER {
+            return size as u64;
+        } else {
+            panic!("size is not safe to convert to integer from float")
+        }
+    }
+
+    fn set_pos(&mut self, pos: u64) {
+        self.pos = pos;
+        self.cb.as_ref().map(|f| {
+            let arr = js_sys::Array::new_with_length(1);
+            arr.set(0, self.progress().into());
+            f.apply(&JsValue::null(), &arr)
+                .expect("Error calling progress callback");
+        });
+    }
+
+    /// Current progress on the file
+    pub fn progress(&self) -> f64 {
+        self.pos as f64 / self.file.size()
+    }
+}
+
+impl std::io::Read for SyncFile {
+    fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
+        let current_offset = self.pos;
+        let new_offset_f64 = current_offset as f64;
+        let new_offset_end_f64 = current_offset.saturating_add(
+            u64::try_from(buf.len()).map_err(|_| std::io::Error::other("Can't convert to u64"))?,
+        ) as f64;
+
+        let blob = self
+            .file
+            .slice_with_f64_and_f64(new_offset_f64, new_offset_end_f64)
+            .map_err(|_| std::io::Error::other("failed to slice file"))?;
+        let array_buffer = FILE_READER_SYNC
+            .with(|frs| frs.read_as_array_buffer(&blob))
+            .map_err(|_| std::io::Error::other("failed to read as array buffer"))?;
+
+        let array = Uint8Array::new(&array_buffer);
+        let read_bytes = usize::try_from(array.byte_length())
+            .map_err(|_| std::io::Error::other("read too many bytes at once"))?;
+
+        // Copy to output buffer
+        array.copy_to(&mut buf[..read_bytes]);
+
+        // Update position
+        self.set_pos(
+            current_offset
+                .checked_add(read_bytes as u64)
+                .ok_or_else(|| std::io::Error::other("new position too large"))?,
+        );
+
+        Ok(read_bytes)
+    }
+}
diff --git a/src/core/tests/dedicated_worker.rs b/src/core/tests/dedicated_worker.rs
new file mode 100644
index 0000000000..f7186a003f
--- /dev/null
+++ b/src/core/tests/dedicated_worker.rs
@@ -0,0 +1,5 @@
+#![cfg(all(target_arch = "wasm32", target_os = "unknown"))]
+
+use wasm_bindgen_test::wasm_bindgen_test_configure;
+
+wasm_bindgen_test_configure!(run_in_dedicated_worker);
diff --git a/src/core/tests/node.rs b/src/core/tests/node.rs
new file mode 100644
index 0000000000..f846433061
--- /dev/null
+++ b/src/core/tests/node.rs
@@ -0,0 +1,8 @@
+#![cfg(all(target_arch = "wasm32", target_os = "unknown"))]
+
+use wasm_bindgen_test::*;
+
+#[wasm_bindgen_test]
+fn pass() {
+    assert_eq!(1, 1);
+}
diff --git a/src/core/tests/service_worker.rs b/src/core/tests/service_worker.rs
new file mode 100644
index 0000000000..dae9341d9e
--- /dev/null
+++ b/src/core/tests/service_worker.rs
@@ -0,0 +1,5 @@
+#![cfg(all(target_arch = "wasm32", target_os = "unknown"))]
+
+use wasm_bindgen_test::wasm_bindgen_test_configure;
+
+wasm_bindgen_test_configure!(run_in_service_worker);
diff --git a/src/core/tests/shared_worker.rs b/src/core/tests/shared_worker.rs
new file mode 100644
index 0000000000..8d8bfc7a4f
--- /dev/null
+++ b/src/core/tests/shared_worker.rs
@@ -0,0 +1,5 @@
+#![cfg(all(target_arch = "wasm32", target_os = "unknown"))]
+
+use wasm_bindgen_test::wasm_bindgen_test_configure;
+
+wasm_bindgen_test_configure!(run_in_shared_worker);
diff --git a/src/core/tests/web.rs b/src/core/tests/web.rs
new file mode 100644
index 0000000000..3bbc3dad61
--- /dev/null
+++ b/src/core/tests/web.rs
@@ -0,0 +1,5 @@
+#![cfg(all(target_arch = "wasm32", target_os = "unknown"))]
+
+use wasm_bindgen_test::wasm_bindgen_test_configure;
+
+wasm_bindgen_test_configure!(run_in_browser);

From 9d9fe98089c708776114dfd5ccb291d15978a9e2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 27 Mar 2024 05:43:40 -0700
Subject: [PATCH 10/19] Bump rayon from 1.9.0 to 1.10.0 (#3098)

Bumps [rayon](https://github.com/rayon-rs/rayon) from 1.9.0 to 1.10.0.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/rayon-rs/rayon/blob/main/RELEASES.md">rayon's
changelog</a>.</em></p>
<blockquote>
<h1>Release rayon 1.10.0 (2024-03-23)</h1>
<ul>
<li>The new methods <code>ParallelSlice::par_chunk_by</code> and
<code>ParallelSliceMut::par_chunk_by_mut</code> work like the slice
methods <code>chunk_by</code>
and <code>chunk_by_mut</code> added in Rust 1.77.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/rayon-rs/rayon/commit/4a6e9bf6f348c213d780c5a0eff000c011ce055e"><code>4a6e9bf</code></a>
Merge <a
href="https://redirect.github.com/rayon-rs/rayon/issues/991">#991</a></li>
<li><a
href="https://github.com/rayon-rs/rayon/commit/b0008f31b168a99e55d224a728ff2a4ddc2fe11a"><code>b0008f3</code></a>
Release rayon 1.6.0 / rayon-core 1.10.0</li>
<li><a
href="https://github.com/rayon-rs/rayon/commit/c2dfa5c8684d88c20b0ba27a8a3bf762cf96af92"><code>c2dfa5c</code></a>
Merge <a
href="https://redirect.github.com/rayon-rs/rayon/issues/990">#990</a></li>
<li><a
href="https://github.com/rayon-rs/rayon/commit/17f5b08bb3d6df7393b4e7eb8fc3b7829e501fb9"><code>17f5b08</code></a>
fix typo</li>
<li><a
href="https://github.com/rayon-rs/rayon/commit/ca9b279d8316285aebef9f736edc35933de3f023"><code>ca9b279</code></a>
Merge <a
href="https://redirect.github.com/rayon-rs/rayon/issues/989">#989</a></li>
<li><a
href="https://github.com/rayon-rs/rayon/commit/a119f2323aca7fbf9e74b4b632e63161026b5b52"><code>a119f23</code></a>
Unify <code>chunks</code>, <code>fold_chunks</code>, and
<code>fold_chunks_with</code></li>
<li><a
href="https://github.com/rayon-rs/rayon/commit/911d6d098c385ed07a66be7402ba3319d119a9c1"><code>911d6d0</code></a>
Merge <a
href="https://redirect.github.com/rayon-rs/rayon/issues/492">#492</a></li>
<li><a
href="https://github.com/rayon-rs/rayon/commit/9ef85cd5d84966bc332eaa408c38be141f52e0d6"><code>9ef85cd</code></a>
Add some documentation about <em>when</em> broadcasts run</li>
<li><a
href="https://github.com/rayon-rs/rayon/commit/bd7b61ca8bf2ec472c74d221adfc4f8b22d2d090"><code>bd7b61c</code></a>
Add more internal enforcement of static/scope lifetimes</li>
<li><a
href="https://github.com/rayon-rs/rayon/commit/812ca025aedddea8a4c7d8477146527b71b33e19"><code>812ca02</code></a>
Simplify calls that use the panic_handler</li>
<li>Additional commits viewable in <a
href="https://github.com/rayon-rs/rayon/compare/rayon-core-v1.9.0...rayon-core-v1.10.0">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=rayon&package-manager=cargo&previous-version=1.9.0&new-version=1.10.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock          | 4 ++--
 src/core/Cargo.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 0795b19d9b..3d3f72ec05 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1347,9 +1347,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
 
 [[package]]
 name = "rayon"
-version = "1.9.0"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd"
+checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
 dependencies = [
  "either",
  "rayon-core",
diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml
index 2b4ae08b59..1161e21428 100644
--- a/src/core/Cargo.toml
+++ b/src/core/Cargo.toml
@@ -51,7 +51,7 @@ once_cell = "1.18.0"
 ouroboros = "0.18.3"
 piz = "0.5.0"
 primal-check = "0.3.1"
-rayon = { version = "1.9.0", optional = true }
+rayon = { version = "1.10.0", optional = true }
 rkyv = { version = "0.7.44", optional = true }
 roaring = "0.10.3"
 roots = "0.0.8"

From e0ed4c3c86046316aa725a0b56ea7e7f0c06d7df Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 27 Mar 2024 13:25:30 +0000
Subject: [PATCH 11/19] Update pytest-cov requirement from <5.0,>=4 to >=4,<6.0
 (#3097)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Updates the requirements on
[pytest-cov](https://github.com/pytest-dev/pytest-cov) to permit the
latest version.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/pytest-dev/pytest-cov/blob/master/CHANGELOG.rst">pytest-cov's
changelog</a>.</em></p>
<blockquote>
<h2>5.0.0 (2024-03-24)</h2>
<ul>
<li>Removed support for xdist rsync (now deprecated).
Contributed by Matthias Reichenbach in
<code>[#623](https://github.com/pytest-dev/pytest-cov/issues/623)
&lt;https://github.com/pytest-dev/pytest-cov/pull/623&gt;</code>_.</li>
<li>Switched docs theme to Furo.</li>
<li>Various legacy Python cleanup and CI improvements.
Contributed by Christian Clauss and Hugo van Kemenade in
<code>[#630](https://github.com/pytest-dev/pytest-cov/issues/630)
&lt;https://github.com/pytest-dev/pytest-cov/pull/630&gt;</code><em>,
<code>[#631](https://github.com/pytest-dev/pytest-cov/issues/631)
&lt;https://github.com/pytest-dev/pytest-cov/pull/631&gt;</code></em>,
<code>[#632](https://github.com/pytest-dev/pytest-cov/issues/632)
&lt;https://github.com/pytest-dev/pytest-cov/pull/632&gt;</code>_ and
<code>[#633](https://github.com/pytest-dev/pytest-cov/issues/633)
&lt;https://github.com/pytest-dev/pytest-cov/pull/633&gt;</code>_.</li>
<li>Added a <code>pyproject.toml</code> example in the docs.
Contributed by Dawn James in
<code>[#626](https://github.com/pytest-dev/pytest-cov/issues/626)
&lt;https://github.com/pytest-dev/pytest-cov/pull/626&gt;</code>_.</li>
<li>Modernized project's pre-commit hooks to use ruff. Initial POC
contributed by
Christian Clauss in
<code>[#584](https://github.com/pytest-dev/pytest-cov/issues/584)
&lt;https://github.com/pytest-dev/pytest-cov/pull/584&gt;</code>_.</li>
</ul>
<h2>4.1.0 (2023-05-24)</h2>
<ul>
<li>Updated CI with new Pythons and dependencies.</li>
<li>Removed rsyncdir support. This makes pytest-cov compatible with
xdist 3.0.
Contributed by Sorin Sbarnea in
<code>[#558](https://github.com/pytest-dev/pytest-cov/issues/558)
&lt;https://github.com/pytest-dev/pytest-cov/pull/558&gt;</code>_.</li>
<li>Optimized summary generation to not be performed if no reporting is
active (for example,
when <code>--cov-report=''</code> is used without
<code>--cov-fail-under</code>).
Contributed by Jonathan Stewmon in
<code>[#589](https://github.com/pytest-dev/pytest-cov/issues/589)
&lt;https://github.com/pytest-dev/pytest-cov/pull/589&gt;</code>_.</li>
<li>Added support for JSON reporting.
Contributed by Matthew Gamble in
<code>[#582](https://github.com/pytest-dev/pytest-cov/issues/582)
&lt;https://github.com/pytest-dev/pytest-cov/pull/582&gt;</code>_.</li>
<li>Refactored code to use f-strings.
Contributed by Mark Mayo in
<code>[#572](https://github.com/pytest-dev/pytest-cov/issues/572)
&lt;https://github.com/pytest-dev/pytest-cov/pull/572&gt;</code>_.</li>
<li>Fixed a skip in the test suite for some old xdist.
Contributed by a bunch of people in
<code>[#565](https://github.com/pytest-dev/pytest-cov/issues/565)
&lt;https://github.com/pytest-dev/pytest-cov/pull/565&gt;</code>_.</li>
</ul>
<h2>4.0.0 (2022-09-28)</h2>
<p><strong>Note that this release drops support for
multiprocessing.</strong></p>
<ul>
<li>
<p><code>--cov-fail-under</code> no longer causes <code>pytest
--collect-only</code> to fail
Contributed by Zac Hatfield-Dodds in
<code>[#511](https://github.com/pytest-dev/pytest-cov/issues/511)
&lt;https://github.com/pytest-dev/pytest-cov/pull/511&gt;</code>_.</p>
</li>
<li>
<p>Dropped support for multiprocessing (mostly because <code>issue 82408
&lt;https://github.com/python/cpython/issues/82408&gt;</code>_). This
feature was
mostly working but very broken in certain scenarios and made the test
suite very flaky and slow.</p>
<p>There is builtin multiprocessing support in coverage and you can
migrate to that. All you need is this in your
<code>.coveragerc</code>::</p>
<p>[run]
concurrency = multiprocessing</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/pytest-dev/pytest-cov/commit/5295ce01c84262cec88f31255e9ac538718f3047"><code>5295ce0</code></a>
Bump version: 4.1.0 → 5.0.0</li>
<li><a
href="https://github.com/pytest-dev/pytest-cov/commit/1181b067972bf94569f8011f3b18f271690f9ab1"><code>1181b06</code></a>
Update changelog.</li>
<li><a
href="https://github.com/pytest-dev/pytest-cov/commit/9757222e2e044361e70125ebdd96e5eb87395983"><code>9757222</code></a>
Fix a minor grammar error (<a
href="https://redirect.github.com/pytest-dev/pytest-cov/issues/636">#636</a>)</li>
<li><a
href="https://github.com/pytest-dev/pytest-cov/commit/9f5cd81a0dbe3fe41681efdbef516c08988fe8ff"><code>9f5cd81</code></a>
Cleanup releasing instructions. Closes <a
href="https://redirect.github.com/pytest-dev/pytest-cov/issues/616">#616</a>.</li>
<li><a
href="https://github.com/pytest-dev/pytest-cov/commit/93b5047ec5050d63c10a6fe16a09b671a7a03df8"><code>93b5047</code></a>
Add test for pyproject.toml loading without explicit --cov-config. Ref
<a
href="https://redirect.github.com/pytest-dev/pytest-cov/issues/508">#508</a>.</li>
<li><a
href="https://github.com/pytest-dev/pytest-cov/commit/ff50860d7c67b920503745d92a3f0944cf41f982"><code>ff50860</code></a>
docs: add config instructions for pyproject.toml.</li>
<li><a
href="https://github.com/pytest-dev/pytest-cov/commit/4a5a4b5fa4b1c63ddcab5cbc1813798c9b6f1d36"><code>4a5a4b5</code></a>
Keep GitHub Actions up to date with GitHub's Dependabot</li>
<li><a
href="https://github.com/pytest-dev/pytest-cov/commit/1d7f55963d5138f41c452a946f7cca7e0b6ee8b2"><code>1d7f559</code></a>
Fix or remove URLs that are causing docs tests to fail</li>
<li><a
href="https://github.com/pytest-dev/pytest-cov/commit/6a5af8e85b8242ac815f33e26adf9068f5f0ebc3"><code>6a5af8e</code></a>
Update changelog.</li>
<li><a
href="https://github.com/pytest-dev/pytest-cov/commit/d9fe8dfed15023d3410dd299c5092e755b8981c2"><code>d9fe8df</code></a>
Switch to furo. Closes <a
href="https://redirect.github.com/pytest-dev/pytest-cov/issues/618">#618</a>.</li>
<li>Additional commits viewable in <a
href="https://github.com/pytest-dev/pytest-cov/compare/v4.0.0...v5.0.0">compare
view</a></li>
</ul>
</details>
<br />


Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3e93f49d9c..3a141d4b79 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -101,7 +101,7 @@ license = { text = "BSD 3-Clause License" }
 [project.optional-dependencies]
 test = [
   "pytest>=6.2.4,<8.2.0",
-  "pytest-cov>=4,<5.0",
+  "pytest-cov>=4,<6.0",
   "pytest-xdist>=3.1",
   "pyyaml>=6,<7",
   "diff-cover>=7.3",

From 534a3dbf2aba6b7c1d2c75b60698aba8a1651689 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 1 Apr 2024 22:14:08 +0000
Subject: [PATCH 12/19] Bump serde_json from 1.0.114 to 1.0.115 (#3101)

Bumps [serde_json](https://github.com/serde-rs/json) from 1.0.114 to
1.0.115.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/serde-rs/json/releases">serde_json's
releases</a>.</em></p>
<blockquote>
<h2>v1.0.115</h2>
<ul>
<li>Documentation improvements</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/serde-rs/json/commit/b1ebf3888ed728c66c69581ba8d9c4aa7483f486"><code>b1ebf38</code></a>
Release 1.0.115</li>
<li><a
href="https://github.com/serde-rs/json/commit/c3dc153e0681f32e9c55152cb11cbfa6a7192a7f"><code>c3dc153</code></a>
Merge pull request <a
href="https://redirect.github.com/serde-rs/json/issues/1119">#1119</a>
from titaniumtraveler/pr</li>
<li><a
href="https://github.com/serde-rs/json/commit/218770bb7531b0e491a2883eafc40ade5b1eeaf5"><code>218770b</code></a>
Explicitly install a Rust toolchain for cargo-outdated job</li>
<li><a
href="https://github.com/serde-rs/json/commit/840da8e89241d46482c40038d3acdb6745ed4f05"><code>840da8e</code></a>
Fix missing backticks in doc comments</li>
<li><a
href="https://github.com/serde-rs/json/commit/3a3f61b1c9a2dce973179fad1650f709f63bdaa5"><code>3a3f61b</code></a>
Temporarily disable miri on doctests</li>
<li><a
href="https://github.com/serde-rs/json/commit/4a0be88b5ac6cda971a52df9f027b551fe566347"><code>4a0be88</code></a>
Format regression tests with rustfmt</li>
<li><a
href="https://github.com/serde-rs/json/commit/d2dbbf7055666b42957dee59b6a4ea57413517ff"><code>d2dbbf7</code></a>
Ignore dead code lint in tests</li>
<li><a
href="https://github.com/serde-rs/json/commit/8e7b37bf7e20d3385cf389c630831c3817ef6c79"><code>8e7b37b</code></a>
Merge pull request <a
href="https://redirect.github.com/serde-rs/json/issues/1118">#1118</a>
from serde-rs/transparent</li>
<li><a
href="https://github.com/serde-rs/json/commit/a25f6c6f2af1ac268175e79ad8d537106dbb3a3a"><code>a25f6c6</code></a>
Remove conditional on repr(transparent)</li>
<li><a
href="https://github.com/serde-rs/json/commit/fedf8341eedd6fc0ad5c5336d1747463c7d85ece"><code>fedf834</code></a>
Ignore non_local_definitions false positive in test</li>
<li>See full diff in <a
href="https://github.com/serde-rs/json/compare/v1.0.114...v1.0.115">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=serde_json&package-manager=cargo&previous-version=1.0.114&new-version=1.0.115)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock          | 4 ++--
 src/core/Cargo.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 3d3f72ec05..a6ea3e12ad 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1559,9 +1559,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.114"
+version = "1.0.115"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"
+checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd"
 dependencies = [
  "itoa",
  "ryu",
diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml
index 1161e21428..574d38be65 100644
--- a/src/core/Cargo.toml
+++ b/src/core/Cargo.toml
@@ -56,7 +56,7 @@ rkyv = { version = "0.7.44", optional = true }
 roaring = "0.10.3"
 roots = "0.0.8"
 serde = { version = "1.0.197", features = ["derive"] }
-serde_json = "1.0.114"
+serde_json = "1.0.115"
 statrs = "0.16.0"
 streaming-stats = "0.2.3"
 thiserror = "1.0"

From 0af3cbb828e77676496abd1dfe0196818c54c511 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 1 Apr 2024 16:01:36 -0700
Subject: [PATCH 13/19] Bump enum_dispatch from 0.3.12 to 0.3.13 (#3102)

Bumps [enum_dispatch](https://gitlab.com/antonok/enum_dispatch) from
0.3.12 to 0.3.13.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://gitlab.com/antonok/enum_dispatch/blob/master/CHANGELOG.md">enum_dispatch's
changelog</a>.</em></p>
<blockquote>
<h2>0.3.13</h2>
<ul>
<li>Fix namespace collision with imports named <code>core</code>
(!35)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://gitlab.com/antonok/enum_dispatch/commit/c20b482de9bcfd320ff5d0ad6a69e8c379094d9f"><code>c20b482</code></a>
v0.3.13</li>
<li><a
href="https://gitlab.com/antonok/enum_dispatch/commit/36b13dc549e11ba74a6719e559c7dfcd7318d6e0"><code>36b13dc</code></a>
add test for <code>::core</code> namespace collision</li>
<li><a
href="https://gitlab.com/antonok/enum_dispatch/commit/e7e6ce528a66fcd83ca38884eab9e697b80f857d"><code>e7e6ce5</code></a>
Merge branch 'master' into 'master'</li>
<li><a
href="https://gitlab.com/antonok/enum_dispatch/commit/9a2e6e081cdaf263ea04cb4257e1ae20d65a0770"><code>9a2e6e0</code></a>
added prefix to specifer to fix naming conflicts</li>
<li><a
href="https://gitlab.com/antonok/enum_dispatch/commit/38d9dd504c5dee5a8df350e969e3dbfca2ea94a1"><code>38d9dd5</code></a>
Merge branch 'include-tests' into 'master'</li>
<li><a
href="https://gitlab.com/antonok/enum_dispatch/commit/523938266976728802a57b211078a55d7e6f6ef2"><code>5239382</code></a>
Cargo.toml: include tests in crate</li>
<li>See full diff in <a
href="https://gitlab.com/antonok/enum_dispatch/compare/v0.3.12...v0.3.13">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=enum_dispatch&package-manager=cargo&previous-version=0.3.12&new-version=0.3.13)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock          | 4 ++--
 src/core/Cargo.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a6ea3e12ad..2e9f16247f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -535,9 +535,9 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
 
 [[package]]
 name = "enum_dispatch"
-version = "0.3.12"
+version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f33313078bb8d4d05a2733a94ac4c2d8a0df9a2b84424ebf4f33bfc224a890e"
+checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd"
 dependencies = [
  "once_cell",
  "proc-macro2",
diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml
index 574d38be65..23a30b3b57 100644
--- a/src/core/Cargo.toml
+++ b/src/core/Cargo.toml
@@ -32,7 +32,7 @@ camino = { version = "1.1.6", features = ["serde1"] }
 cfg-if = "1.0"
 counter = "0.5.7"
 csv = "1.3.0"
-enum_dispatch = "0.3.12"
+enum_dispatch = "0.3.13"
 finch = { version = "0.6.0", optional = true }
 fixedbitset = "0.4.0"
 getrandom = { version = "0.2", features = ["js"] }

From 20e61952e1926711d6e9f28c021bc9bcad21271a Mon Sep 17 00:00:00 2001
From: Tessa Pierce Ward <bluegenes@users.noreply.github.com>
Date: Sat, 6 Apr 2024 07:25:51 -0700
Subject: [PATCH 14/19] MRG: add pyopensci review badge (#3105)

Add pyopensci review badge, linking to accepted review:
https://github.com/pyOpenSci/software-submission/issues/129
---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index f12d6a65ce..702a729dd9 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,9 @@ Quickly search, compare, and analyze genomic and metagenomic data sets.
 <a href="https://github.com/sourmash-bio/sourmash/blob/latest/LICENSE"><img alt="License: 3-Clause BSD" src="https://img.shields.io/badge/License-BSD%203--Clause-blue.svg"></a>
 [![Documentation](https://readthedocs.org/projects/sourmash/badge/?version=latest)](http://sourmash.readthedocs.io/en/latest/)
 [![Gitter](https://badges.gitter.im/sourmash-bio/community.svg)](https://gitter.im/sourmash-bio/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
+
 [![DOI](http://joss.theoj.org/papers/10.21105/joss.00027/status.svg)](http://joss.theoj.org/papers/10.21105/joss.00027)
+[![pyOpenSci](https://tinyurl.com/y22nb8up)](https://github.com/pyOpenSci/software-submission/issues/129)
 
 [![Bioconda install](https://img.shields.io/conda/dn/bioconda/sourmash.svg?style=flag&label=Bioconda)](https://anaconda.org/bioconda/sourmash)
 <a href="https://pypi.org/project/sourmash/"><img alt="PyPI" src="https://badge.fury.io/py/sourmash.svg"></a>

From 5b5337e14f8e7f08ec7d85eb98e1bd40ffca7464 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 8 Apr 2024 14:42:57 -0700
Subject: [PATCH 15/19] Bump getrandom from 0.2.12 to 0.2.14 (#3108)

Bumps [getrandom](https://github.com/rust-random/getrandom) from 0.2.12
to 0.2.14.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/rust-random/getrandom/blob/master/CHANGELOG.md">getrandom's
changelog</a>.</em></p>
<blockquote>
<h2>[0.2.14] - 2024-04-08</h2>
<h3>Fixed</h3>
<ul>
<li>Enable <code>/dev/urandom</code> fallback for MUSL-based Linux
targets <a
href="https://redirect.github.com/rust-random/getrandom/issues/408">#408</a></li>
</ul>
<p><a
href="https://redirect.github.com/rust-random/getrandom/issues/408">#408</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/408">rust-random/getrandom#408</a></p>
<h2>[0.2.13] - 2024-04-06</h2>
<h3>Added</h3>
<ul>
<li><code>linux_disable_fallback</code> crate feature to disable
<code>/dev/urandom</code>-based fallback on Linux and
Android targets. Enabling this feature bumps minimum supported Linux
kernel version to 3.17 and
Android API level to 23 (Marshmallow). <a
href="https://redirect.github.com/rust-random/getrandom/issues/396">#396</a></li>
</ul>
<h3>Changed</h3>
<ul>
<li>Disable <code>/dev/urandom</code> fallback for Linux targets outside
of the following <code>target_arch</code>es:
<code>aarch64</code>, <code>arm</code>, <code>powerpc</code>,
<code>powerpc64</code>, <code>s390x</code>, <code>x86</code>,
<code>x86_64</code> <a
href="https://redirect.github.com/rust-random/getrandom/issues/396">#396</a></li>
<li>Do not catch <code>EPERM</code> error code on Android while checking
availability of
the <code>getrandom</code> syscall <a
href="https://redirect.github.com/rust-random/getrandom/issues/396">#396</a></li>
</ul>
<p><a
href="https://redirect.github.com/rust-random/getrandom/issues/396">#396</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/396">rust-random/getrandom#396</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/rust-random/getrandom/commit/a39033a34a0b81c5b15ef1fba28696ab93aac9db"><code>a39033a</code></a>
Enable <code>/dev/urandom</code> fallback for MUSL-based Linux targets
(<a
href="https://redirect.github.com/rust-random/getrandom/issues/408">#408</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/968dd484e2209c18330f725b854f8c13a6f4d09e"><code>968dd48</code></a>
Release v0.2.13 (<a
href="https://redirect.github.com/rust-random/getrandom/issues/405">#405</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/8ffd43e62a5076480e53275e93c21fcde4fe9f7f"><code>8ffd43e</code></a>
Conditionally disable file fallback for Android and Linux (<a
href="https://redirect.github.com/rust-random/getrandom/issues/396">#396</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/6b7bcb5991bc92bd9d24648700cb35377bafedce"><code>6b7bcb5</code></a>
Replace man7.org links with manned.org (<a
href="https://redirect.github.com/rust-random/getrandom/issues/404">#404</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/5f0701faba5b83ebf144af9973582904f60849b7"><code>5f0701f</code></a>
CI: Run tests on aarch64-apple-darwin and aarch64-apple-ios-sim. (<a
href="https://redirect.github.com/rust-random/getrandom/issues/398">#398</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/489eeee3ad86ca639a2d5ea069791444776b425c"><code>489eeee</code></a>
Fix nightly build by removing redundant <code>use</code> (<a
href="https://redirect.github.com/rust-random/getrandom/issues/399">#399</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/d102c3655fa589675058f0ba44762b1151f7c753"><code>d102c36</code></a>
Use <code>doc_auto_cfg</code> instead of <code>doc_cfg</code> (<a
href="https://redirect.github.com/rust-random/getrandom/issues/392">#392</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/2e4bb4d72289a2169ca1eb25e23dcc7a32a4d034"><code>2e4bb4d</code></a>
Correct comments regarding LazyUsize (<a
href="https://redirect.github.com/rust-random/getrandom/issues/391">#391</a>)</li>
<li>See full diff in <a
href="https://github.com/rust-random/getrandom/compare/v0.2.12...v0.2.14">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=getrandom&package-manager=cargo&previous-version=0.2.12&new-version=0.2.14)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 2e9f16247f..6743510def 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -604,9 +604,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
 
 [[package]]
 name = "getrandom"
-version = "0.2.12"
+version = "0.2.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
+checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c"
 dependencies = [
  "cfg-if",
  "js-sys",

From 9d472a1828a7c361929d3731dcf12cc3ac1d840e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 8 Apr 2024 15:56:04 -0700
Subject: [PATCH 16/19] Bump histogram from 0.9.1 to 0.10.0 (#3109)

Bumps [histogram](https://github.com/pelikan-io/rustcommon) from 0.9.1
to 0.10.0.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a
href="https://github.com/pelikan-io/rustcommon/commits">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=histogram&package-manager=cargo&previous-version=0.9.1&new-version=0.10.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock          | 4 ++--
 src/core/Cargo.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6743510def..51ef233466 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -662,9 +662,9 @@ checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
 
 [[package]]
 name = "histogram"
-version = "0.9.1"
+version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b634390eb8a63662e127836d4e2f26d7ae930600d4e05ee0fd85a009eeb1175"
+checksum = "f4d3bddd75a32b17e75762f128ffc7a33158b933b6eb27424da9be4a58f30eb9"
 dependencies = [
  "thiserror",
 ]
diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml
index 23a30b3b57..7be417cfd6 100644
--- a/src/core/Cargo.toml
+++ b/src/core/Cargo.toml
@@ -37,7 +37,7 @@ finch = { version = "0.6.0", optional = true }
 fixedbitset = "0.4.0"
 getrandom = { version = "0.2", features = ["js"] }
 getset = "0.1.1"
-histogram = "0.9.1"
+histogram = "0.10.0"
 itertools = "0.12.1"
 log = "0.4.21"
 md5 = "0.7.0"

From a387d222677e81c1736739a24a7a4a57be3c55df Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Tue, 9 Apr 2024 15:05:14 -0700
Subject: [PATCH 17/19] MRG: 4.8.8 release branch (#3110)

Release candidate testing:
- [x] Command line tests pass for a release candidate
- [x] All eight release candidate wheels are built

Releasing to PyPI:

- [ ] RC tag(s)s deleted on github
- [ ] Release tag cut
- [ ] Release notes written
- [ ] All eight release wheels built
- [ ] Release wheels uploaded to pypi
- [ ] tar.gz distribution uploaded to pypi

After release to PyPI and conda-forge/bioconda packages built:

- [ ] [PyPI page](https://pypi.org/project/sourmash/) updated
- [ ] Zenodo DOI successfully minted upon new github release - [see
search
results](https://zenodo.org/search?page=1&size=20&q=sourmash&sort=mostrecent)
- [ ] `pip install sourmash` installs the correct version
- [ ] [conda-forge
sourmash-minimal-feedstock](https://github.com/conda-forge/sourmash-minimal-feedstock)
has updated `sourmash-minimal` to the correct version
- [ ] `mamba create -n smash-release -y sourmash` installs the correct
version

Optional but recommended:

- [ ] PR submitted to update pyodide version
- [ ] PR submitted to update spack version

---

## Release notes:

- Bump histogram from 0.9.1 to 0.10.0 (#3109)
- Bump getrandom from 0.2.12 to 0.2.14 (#3108)
- MRG: add pyopensci review badge (#3105)
- Bump enum_dispatch from 0.3.12 to 0.3.13 (#3102)
- Bump serde_json from 1.0.114 to 1.0.115 (#3101)
- Update pytest-cov requirement from <5.0,>=4 to >=4,<6.0 (#3097)
- Bump rayon from 1.9.0 to 1.10.0 (#3098)
- Implement file parsing for webassembly (#3047)
---
 flake.nix      | 2 +-
 pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/flake.nix b/flake.nix
index 57213ac6aa..5c3795f1d4 100644
--- a/flake.nix
+++ b/flake.nix
@@ -68,7 +68,7 @@
 
           sourmash = python.buildPythonPackage ( commonArgs // rec {
             pname = "sourmash";
-            version = "4.8.7";
+            version = "4.8.8";
             format = "pyproject";
 
             cargoDeps = rustPlatform.importCargoLock {
diff --git a/pyproject.toml b/pyproject.toml
index 3a141d4b79..083016d1c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ build-backend = 'maturin'
 name = "sourmash"
 description = "tools for comparing biological sequences with k-mer sketches"
 readme = "README.md"
-version = "4.8.7"
+version = "4.8.8"
 
 authors = [
   { name="Luiz Irber", orcid="0000-0003-4371-9659" },

From e0d002a55c67dfb62c73ba76b3e01addefa88cf6 Mon Sep 17 00:00:00 2001
From: Tessa Pierce Ward <bluegenes@users.noreply.github.com>
Date: Fri, 12 Apr 2024 10:34:00 -0700
Subject: [PATCH 18/19] MRG: force continue past `tax genome` classification
 errors (#3100)

When we were doing one or a few genome classifications, it made sense to
error out completely if there was an issue. Now that we have
fastmultigather and can do 10s of thousands at once, It would be nice to
be able to continue past errors (logging them).

**Changed behavior:**
- If there is a failed classification, notify the error and do not write
that result. Continue with classification.
- Finish classification and write output file, BUT exit with an error
code if there were errors, except if --force is used.
- Remove some previously useful reporting about the classification
ranks, because it's too much output for large-scale classification.

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 src/sourmash/tax/__main__.py  |  25 ++++++-
 src/sourmash/tax/tax_utils.py |   3 -
 tests/test_tax.py             | 133 ++++++++++++++++++++++++++++------
 3 files changed, 131 insertions(+), 30 deletions(-)

diff --git a/src/sourmash/tax/__main__.py b/src/sourmash/tax/__main__.py
index 073977cb79..1a5d22940a 100644
--- a/src/sourmash/tax/__main__.py
+++ b/src/sourmash/tax/__main__.py
@@ -313,6 +313,9 @@ def genome(args):
         sys.exit(-1)
 
     # for each queryResult, summarize at rank and classify according to thresholds, reporting any errors that occur.
+    n_total = len(query_gather_results)
+    classified_results = []
+    found_error = False
     for queryResult in query_gather_results:
         try:
             queryResult.build_classification_result(
@@ -322,10 +325,21 @@ def genome(args):
                 lingroup_ranks=lg_ranks,
                 lingroups=all_lgs,
             )
+            classified_results.append(queryResult)
 
         except ValueError as exc:
-            error(f"ERROR: {str(exc)}")
-            sys.exit(-1)
+            found_error = True
+            notify(f"ERROR: {str(exc)}")
+
+    n_classified = len(classified_results)
+    if n_classified == 0:
+        notify("No queries could be classified. Exiting.")
+        sys.exit(-1)
+    else:
+        classif_perc = (float(n_classified) / float(n_total)) * 100
+        notify(
+            f"classified {n_classified}/{n_total} queries ({classif_perc :.2f}%). Writing results"
+        )
 
     # write outputs
     if "csv_summary" in args.output_format:
@@ -334,7 +348,7 @@ def genome(args):
         )
         with FileOutputCSV(summary_outfile) as out_fp:
             tax_utils.write_summary(
-                query_gather_results,
+                classified_results,
                 out_fp,
                 limit_float_decimals=limit_float,
                 classification=True,
@@ -389,6 +403,11 @@ def genome(args):
         with FileOutputCSV(lineage_outfile) as out_fp:
             tax_utils.write_output(header, lineage_results, out_fp)
 
+    # if there was a classification error, exit with err code
+    if found_error:
+        if not args.force:
+            sys.exit(-1)
+
 
 def annotate(args):
     """
diff --git a/src/sourmash/tax/tax_utils.py b/src/sourmash/tax/tax_utils.py
index 1615c90d74..a2fbeb3f30 100644
--- a/src/sourmash/tax/tax_utils.py
+++ b/src/sourmash/tax/tax_utils.py
@@ -2328,9 +2328,6 @@ def summarize_up_ranks(self, single_rank=None, force_resummarize=False):
                     f"Error: rank '{single_rank}' not in available ranks ({', '.join(self.summarized_ranks)})"
                 )
             self.summarized_ranks = [single_rank]
-        notify(
-            f"Starting summarization up rank(s): {', '.join(self.summarized_ranks)} "
-        )
         for taxres in self.raw_taxresults:
             lininfo = taxres.lineageInfo
             if (
diff --git a/tests/test_tax.py b/tests/test_tax.py
index 70b4f14fc0..fc68b6448d 100644
--- a/tests/test_tax.py
+++ b/tests/test_tax.py
@@ -2700,6 +2700,115 @@ def test_genome_gather_two_files_empty_force(runtmp):
     )
 
 
+def test_genome_gather_two_files_one_classif_fail(runtmp):
+    # if one query cant be classified still get classif for second
+    # no --force = fail but still write file
+    c = runtmp
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
+
+    # make test2 results (identical to test1 except query_name and filename)
+    g_res2 = runtmp.output("test2.gather.csv")
+    test2_results = [
+        x.replace("test1", "test2") + "\n" for x in Path(g_res).read_text().splitlines()
+    ]
+    test2_results[1] = test2_results[1].replace(
+        "0.08815317112086159", "1.1"
+    )  # make test2 f_unique_to_query sum to >1
+    for line in test2_results:
+        print(line)
+    with open(g_res2, "w") as fp:
+        fp.writelines(test2_results)
+
+    with pytest.raises(SourmashCommandFailed):
+        c.run_sourmash(
+            "tax",
+            "genome",
+            "-g",
+            g_res,
+            g_res2,
+            "--taxonomy-csv",
+            taxonomy_csv,
+            "--rank",
+            "species",
+            "--containment-threshold",
+            "0",
+        )
+
+    print(c.last_result.status)
+    print(c.last_result.out)
+    print(c.last_result.err)
+
+    assert c.last_result.status == -1
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
+    assert "test2" not in c.last_result.out
+    assert (
+        "ERROR: Summarized fraction is > 100% of the query! This should not be possible. Please check that your input files come directly from a single gather run per query."
+        in c.last_result.err
+    )
+
+
+def test_genome_gather_two_files_one_classif(runtmp):
+    # if one query cant be classified, still get classif for second
+    c = runtmp
+    taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
+    g_res = utils.get_test_data("tax/test1.gather.csv")
+
+    # make test2 results (identical to test1 except query_name and filename)
+    g_res2 = runtmp.output("test2.gather.csv")
+    test2_results = [
+        x.replace("test1", "test2") + "\n" for x in Path(g_res).read_text().splitlines()
+    ]
+    test2_results[1] = test2_results[1].replace(
+        "0.08815317112086159", "1.1"
+    )  # make test2 f_unique_to_query sum to >1
+    for line in test2_results:
+        print(line)
+    with open(g_res2, "w") as fp:
+        fp.writelines(test2_results)
+
+    c.run_sourmash(
+        "tax",
+        "genome",
+        "-g",
+        g_res,
+        g_res2,
+        "--taxonomy-csv",
+        taxonomy_csv,
+        "--rank",
+        "species",
+        "--containment-threshold",
+        "0",
+        "--force",
+    )
+
+    print(c.last_result.status)
+    print(c.last_result.out)
+    print(c.last_result.err)
+
+    assert c.last_result.status == 0
+    assert (
+        "query_name,status,rank,fraction,lineage,query_md5,query_filename,f_weighted_at_rank,bp_match_at_rank"
+        in c.last_result.out
+    )
+    assert (
+        "test1,match,species,0.089,d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri,md5,test1.sig,0.057,444000"
+        in c.last_result.out
+    )
+    assert "test2" not in c.last_result.out
+    assert (
+        "ERROR: Summarized fraction is > 100% of the query! This should not be possible. Please check that your input files come directly from a single gather run per query."
+        in c.last_result.err
+    )
+
+
 def test_genome_gather_duplicate_filename(runtmp):
     c = runtmp
     taxonomy_csv = utils.get_test_data("tax/test.taxonomy.csv")
@@ -5936,10 +6045,6 @@ def test_metagenome_LIN_lingroups(runtmp):
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert (
-        "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0"
-        in c.last_result.err
-    )
     assert (
         "Read 5 lingroup rows and found 5 distinct lingroup prefixes."
         in c.last_result.err
@@ -5970,10 +6075,6 @@ def test_metagenome_LIN_human_summary_no_lin_position(runtmp):
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert (
-        "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0"
-        in c.last_result.err
-    )
     assert "sample name    proportion   cANI   lineage" in c.last_result.out
     assert "-----------    ----------   ----   -------" in c.last_result.out
     assert "test1             86.9%     -      unclassified" in c.last_result.out
@@ -6020,10 +6121,6 @@ def test_metagenome_LIN_human_summary_lin_position_5(runtmp):
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert (
-        "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0"
-        in c.last_result.err
-    )
     assert "sample name    proportion   cANI   lineage" in c.last_result.out
     assert "-----------    ----------   ----   -------" in c.last_result.out
     assert "test1             86.9%     -      unclassified" in c.last_result.out
@@ -6058,10 +6155,6 @@ def test_metagenome_LIN_krona_lin_position_5(runtmp):
     print(c.last_result.err)
 
     assert c.last_result.status == 0
-    assert (
-        "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0"
-        in c.last_result.err
-    )
     assert "fraction	0	1	2	3	4	5" in c.last_result.out
     assert "0.08815317112086159	0	0	0	0	0	0" in c.last_result.out
     assert "0.07778220981252493	1	0	0	0	0	0" in c.last_result.out
@@ -6133,10 +6226,6 @@ def test_metagenome_LIN_lingroups_empty_lg_file(runtmp):
     print(c.last_result.err)
 
     assert c.last_result.status != 0
-    assert (
-        "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0"
-        in c.last_result.err
-    )
     assert (
         f"Cannot read lingroups from '{lg_file}'. Is file empty?" in c.last_result.err
     )
@@ -6302,8 +6391,4 @@ def test_metagenome_LIN_lingroups_lg_only_header(runtmp):
     print(c.last_result.err)
 
     assert c.last_result.status != 0
-    assert (
-        "Starting summarization up rank(s): 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0"
-        in c.last_result.err
-    )
     assert f"No lingroups loaded from {lg_file}" in c.last_result.err

From f4e720552c0e6dc22a9670289ef201ad0176225a Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Fri, 12 Apr 2024 11:44:18 -0700
Subject: [PATCH 19/19] MRG: prepare to remove `sourmash compute` for sourmash
 v5.0 (#3103)

This PR refactors code to eliminate any internal dependencies on the
`compute` command or codebase, in preparation for removing `sourmash
compute` in v5.0, per
https://github.com/sourmash-bio/sourmash/issues/1286.

Specifically, this PR:
* shifts common sketching code from `src/sourmash/command_compute.py` to
`src/sourmash/command_sketch.py`;
* refactors three tests that were still using `sourmash compute` to use
`sourmash sketch` instead.

No functionality is altered or adjusted in this PR; it's all just code
refactoring.

The next step for #1286 would maybe be adjust the code and the tests to
respect the `--v4` and `--v5` flags (as used in
https://github.com/sourmash-bio/sourmash/pull/3072 /
https://github.com/sourmash-bio/sourmash/pull/3074). The actual compute
code and command wouldn't be removed until after a 5.0 release, I think.
---
 src/sourmash/command_compute.py |  12 +-
 src/sourmash/command_sketch.py  | 461 +++++++++++++++++++++++++++++++-
 tests/test_cmd_signature.py     |  34 ++-
 tests/test_sourmash.py          |  38 ++-
 tests/test_sourmash_sketch.py   |   3 +-
 5 files changed, 519 insertions(+), 29 deletions(-)

diff --git a/src/sourmash/command_compute.py b/src/sourmash/command_compute.py
index aac66def13..dbb3c42ad1 100644
--- a/src/sourmash/command_compute.py
+++ b/src/sourmash/command_compute.py
@@ -13,9 +13,15 @@
 from .utils import RustObject
 from ._lowlevel import ffi, lib
 
-DEFAULT_COMPUTE_K = "21,31,51"
-DEFAULT_MMHASH_SEED = 42
-DEFAULT_LINE_COUNT = 1500
+
+from .command_sketch import (
+    _compute_individual,
+    _compute_merged,
+    ComputeParameters,
+    add_seq,
+    set_sig_name,
+    DEFAULT_MMHASH_SEED,
+)
 
 
 def compute(args):
diff --git a/src/sourmash/command_sketch.py b/src/sourmash/command_sketch.py
index 508cac7c01..e98212f8c1 100644
--- a/src/sourmash/command_sketch.py
+++ b/src/sourmash/command_sketch.py
@@ -12,18 +12,14 @@
 import sourmash
 from .signature import SourmashSignature
 from .logging import notify, error, set_quiet, print_results
-from .command_compute import (
-    _compute_individual,
-    _compute_merged,
-    ComputeParameters,
-    add_seq,
-    set_sig_name,
-    DEFAULT_MMHASH_SEED,
-)
 from sourmash import sourmash_args
 from sourmash.sourmash_args import check_scaled_bounds, check_num_bounds
 from sourmash.sig.__main__ import _summarize_manifest, _SketchInfo
 from sourmash.manifest import CollectionManifest
+from .utils import RustObject
+from ._lowlevel import ffi, lib
+
+DEFAULT_MMHASH_SEED = 42
 
 DEFAULTS = dict(
     dna="k=31,scaled=1000,noabund",
@@ -637,3 +633,452 @@ def fromfile(args):
     notify(
         f"** {total_sigs} total requested; output {total_sigs - skipped_sigs}, skipped {skipped_sigs}"
     )
+
+
+class _signatures_for_compute_factory:
+    "Build signatures on demand, based on args input to 'compute'."
+
+    def __init__(self, args):
+        self.args = args
+
+    def __call__(self):
+        args = self.args
+        params = ComputeParameters(
+            ksizes=args.ksizes,
+            seed=args.seed,
+            protein=args.protein,
+            dayhoff=args.dayhoff,
+            hp=args.hp,
+            dna=args.dna,
+            num_hashes=args.num_hashes,
+            track_abundance=args.track_abundance,
+            scaled=args.scaled,
+        )
+        sig = SourmashSignature.from_params(params)
+        return [sig]
+
+
+def _compute_individual(args, signatures_factory):
+    # this is where output signatures will go.
+    save_sigs = None
+
+    # track: is this the first file? in cases where we have empty inputs,
+    # we don't want to open any outputs.
+    first_file_for_output = True
+
+    # if args.output is set, we are aggregating all output to a single file.
+    # do not open a new output file for each input.
+    open_output_each_time = True
+    if args.output:
+        open_output_each_time = False
+
+    for filename in args.filenames:
+        if open_output_each_time:
+            # for each input file, construct output filename
+            sigfile = os.path.basename(filename) + ".sig"
+            if args.output_dir:
+                sigfile = os.path.join(args.output_dir, sigfile)
+
+            # does it already exist? skip if so.
+            if os.path.exists(sigfile) and not args.force:
+                notify("skipping {} - already done", filename)
+                continue  # go on to next file.
+
+            # nope? ok, let's save to it.
+            assert not save_sigs
+            save_sigs = sourmash_args.SaveSignaturesToLocation(sigfile)
+
+        #
+        # calculate signatures!
+        #
+
+        # now, set up to iterate over sequences.
+        with screed.open(filename) as screed_iter:
+            if not screed_iter:
+                notify(f"no sequences found in '{filename}'?!")
+                continue
+
+            # open output for signatures
+            if open_output_each_time:
+                save_sigs.open()
+            # or... is this the first time to write something to args.output?
+            elif first_file_for_output:
+                save_sigs = sourmash_args.SaveSignaturesToLocation(args.output)
+                save_sigs.open()
+                first_file_for_output = False
+
+            # make a new signature for each sequence?
+            if args.singleton:
+                n_calculated = 0
+                for n, record in enumerate(screed_iter):
+                    sigs = signatures_factory()
+                    try:
+                        add_seq(
+                            sigs,
+                            record.sequence,
+                            args.input_is_protein,
+                            args.check_sequence,
+                        )
+                    except ValueError as exc:
+                        error(f"ERROR when reading from '{filename}' - ")
+                        error(str(exc))
+                        sys.exit(-1)
+
+                    n_calculated += len(sigs)
+                    set_sig_name(sigs, filename, name=record.name)
+                    save_sigs_to_location(sigs, save_sigs)
+
+                notify(
+                    "calculated {} signatures for {} sequences in {}",
+                    n_calculated,
+                    n + 1,
+                    filename,
+                )
+
+            # nope; make a single sig for the whole file
+            else:
+                sigs = signatures_factory()
+
+                # consume & calculate signatures
+                notify(f"... reading sequences from {filename}")
+                name = None
+                for n, record in enumerate(screed_iter):
+                    if n % 10000 == 0:
+                        if n:
+                            notify("\r...{} {}", filename, n, end="")
+                        elif args.name_from_first:
+                            name = record.name
+
+                    try:
+                        add_seq(
+                            sigs,
+                            record.sequence,
+                            args.input_is_protein,
+                            args.check_sequence,
+                        )
+                    except ValueError as exc:
+                        error(f"ERROR when reading from '{filename}' - ")
+                        error(str(exc))
+                        sys.exit(-1)
+
+                notify("...{} {} sequences", filename, n, end="")
+
+                set_sig_name(sigs, filename, name)
+                save_sigs_to_location(sigs, save_sigs)
+
+                notify(
+                    f"calculated {len(sigs)} signatures for {n+1} sequences in {filename}"
+                )
+
+        # if not args.output, close output for every input filename.
+        if open_output_each_time:
+            save_sigs.close()
+            notify(
+                f"saved {len(save_sigs)} signature(s) to '{save_sigs.location}'. Note: signature license is CC0."
+            )
+            save_sigs = None
+
+    # if --output-dir specified, all collected signatures => args.output,
+    # and we need to close here.
+    if args.output and save_sigs is not None:
+        save_sigs.close()
+        notify(
+            f"saved {len(save_sigs)} signature(s) to '{save_sigs.location}'. Note: signature license is CC0."
+        )
+
+
+def _compute_merged(args, signatures_factory):
+    # make a signature for the whole file
+    sigs = signatures_factory()
+
+    total_seq = 0
+    for filename in args.filenames:
+        # consume & calculate signatures
+        notify("... reading sequences from {}", filename)
+
+        n = None
+        with screed.open(filename) as f:
+            for n, record in enumerate(f):
+                if n % 10000 == 0 and n:
+                    notify("\r... {} {}", filename, n, end="")
+
+                add_seq(
+                    sigs, record.sequence, args.input_is_protein, args.check_sequence
+                )
+        if n is not None:
+            notify("... {} {} sequences", filename, n + 1)
+            total_seq += n + 1
+        else:
+            notify(f"no sequences found in '{filename}'?!")
+
+    if total_seq:
+        set_sig_name(sigs, filename, name=args.merge)
+        notify(
+            "calculated 1 signature for {} sequences taken from {} files",
+            total_seq,
+            len(args.filenames),
+        )
+
+        # at end, save!
+        save_siglist(sigs, args.output)
+
+
+def add_seq(sigs, seq, input_is_protein, check_sequence):
+    for sig in sigs:
+        if input_is_protein:
+            sig.add_protein(seq)
+        else:
+            sig.add_sequence(seq, not check_sequence)
+
+
+def set_sig_name(sigs, filename, name=None):
+    if filename == "-":  # if stdin, set filename to empty.
+        filename = ""
+    for sig in sigs:
+        if name is not None:
+            sig._name = name
+
+        sig.filename = filename
+
+
+def save_siglist(siglist, sigfile_name):
+    "Save multiple signatures to a filename."
+
+    # save!
+    with sourmash_args.SaveSignaturesToLocation(sigfile_name) as save_sig:
+        for ss in siglist:
+            save_sig.add(ss)
+
+        notify(f"saved {len(save_sig)} signature(s) to '{save_sig.location}'")
+
+
+def save_sigs_to_location(siglist, save_sig):
+    "Save multiple signatures to an already-open location."
+    import sourmash
+
+    for ss in siglist:
+        save_sig.add(ss)
+
+
+class ComputeParameters(RustObject):
+    __dealloc_func__ = lib.computeparams_free
+
+    def __init__(
+        self,
+        *,
+        ksizes=(21, 31, 51),
+        seed=42,
+        protein=False,
+        dayhoff=False,
+        hp=False,
+        dna=True,
+        num_hashes=500,
+        track_abundance=False,
+        scaled=0,
+    ):
+        self._objptr = lib.computeparams_new()
+
+        self.seed = seed
+        self.ksizes = ksizes
+        self.protein = protein
+        self.dayhoff = dayhoff
+        self.hp = hp
+        self.dna = dna
+        self.num_hashes = num_hashes
+        self.track_abundance = track_abundance
+        self.scaled = scaled
+
+    @classmethod
+    def from_manifest_row(cls, row):
+        "convert a CollectionManifest row into a ComputeParameters object"
+        is_dna = is_protein = is_dayhoff = is_hp = False
+        if row["moltype"] == "DNA":
+            is_dna = True
+        elif row["moltype"] == "protein":
+            is_protein = True
+        elif row["moltype"] == "hp":
+            is_hp = True
+        elif row["moltype"] == "dayhoff":
+            is_dayhoff = True
+        else:
+            assert 0
+
+        if is_dna:
+            ksize = row["ksize"]
+        else:
+            ksize = row["ksize"] * 3
+
+        p = cls(
+            ksizes=[ksize],
+            seed=DEFAULT_MMHASH_SEED,
+            protein=is_protein,
+            dayhoff=is_dayhoff,
+            hp=is_hp,
+            dna=is_dna,
+            num_hashes=row["num"],
+            track_abundance=row["with_abundance"],
+            scaled=row["scaled"],
+        )
+
+        return p
+
+    def to_param_str(self):
+        "Convert object to equivalent params str."
+        pi = []
+
+        if self.dna:
+            pi.append("dna")
+        elif self.protein:
+            pi.append("protein")
+        elif self.hp:
+            pi.append("hp")
+        elif self.dayhoff:
+            pi.append("dayhoff")
+        else:
+            assert 0  # must be one of the previous
+
+        if self.dna:
+            kstr = [f"k={k}" for k in self.ksizes]
+        else:
+            # for protein, divide ksize by three.
+            kstr = [f"k={k//3}" for k in self.ksizes]
+        assert kstr
+        pi.extend(kstr)
+
+        if self.num_hashes != 0:
+            pi.append(f"num={self.num_hashes}")
+        elif self.scaled != 0:
+            pi.append(f"scaled={self.scaled}")
+        else:
+            assert 0
+
+        if self.track_abundance:
+            pi.append("abund")
+        # noabund is default
+
+        if self.seed != DEFAULT_MMHASH_SEED:
+            pi.append(f"seed={self.seed}")
+        # self.seed
+
+        return ",".join(pi)
+
+    def __repr__(self):
+        return f"ComputeParameters(ksizes={self.ksizes}, seed={self.seed}, protein={self.protein}, dayhoff={self.dayhoff}, hp={self.hp}, dna={self.dna}, num_hashes={self.num_hashes}, track_abundance={self.track_abundance}, scaled={self.scaled})"
+
+    def __eq__(self, other):
+        return (
+            self.ksizes == other.ksizes
+            and self.seed == other.seed
+            and self.protein == other.protein
+            and self.dayhoff == other.dayhoff
+            and self.hp == other.hp
+            and self.dna == other.dna
+            and self.num_hashes == other.num_hashes
+            and self.track_abundance == other.track_abundance
+            and self.scaled == other.scaled
+        )
+
+    @staticmethod
+    def from_args(args):
+        ptr = lib.computeparams_new()
+        ret = ComputeParameters._from_objptr(ptr)
+
+        for arg, value in vars(args).items():
+            try:
+                getattr(type(ret), arg).fset(ret, value)
+            except AttributeError:
+                pass
+
+        return ret
+
+    @property
+    def seed(self):
+        return self._methodcall(lib.computeparams_seed)
+
+    @seed.setter
+    def seed(self, v):
+        return self._methodcall(lib.computeparams_set_seed, v)
+
+    @property
+    def ksizes(self):
+        size = ffi.new("uintptr_t *")
+        ksizes_ptr = self._methodcall(lib.computeparams_ksizes, size)
+        size = size[0]
+        ksizes = ffi.unpack(ksizes_ptr, size)
+        lib.computeparams_ksizes_free(ksizes_ptr, size)
+        return ksizes
+
+    @ksizes.setter
+    def ksizes(self, v):
+        return self._methodcall(lib.computeparams_set_ksizes, list(v), len(v))
+
+    @property
+    def protein(self):
+        return self._methodcall(lib.computeparams_protein)
+
+    @protein.setter
+    def protein(self, v):
+        return self._methodcall(lib.computeparams_set_protein, v)
+
+    @property
+    def dayhoff(self):
+        return self._methodcall(lib.computeparams_dayhoff)
+
+    @dayhoff.setter
+    def dayhoff(self, v):
+        return self._methodcall(lib.computeparams_set_dayhoff, v)
+
+    @property
+    def hp(self):
+        return self._methodcall(lib.computeparams_hp)
+
+    @hp.setter
+    def hp(self, v):
+        return self._methodcall(lib.computeparams_set_hp, v)
+
+    @property
+    def dna(self):
+        return self._methodcall(lib.computeparams_dna)
+
+    @dna.setter
+    def dna(self, v):
+        return self._methodcall(lib.computeparams_set_dna, v)
+
+    @property
+    def moltype(self):
+        if self.dna:
+            moltype = "DNA"
+        elif self.protein:
+            moltype = "protein"
+        elif self.hp:
+            moltype = "hp"
+        elif self.dayhoff:
+            moltype = "dayhoff"
+        else:
+            assert 0
+
+        return moltype
+
+    @property
+    def num_hashes(self):
+        return self._methodcall(lib.computeparams_num_hashes)
+
+    @num_hashes.setter
+    def num_hashes(self, v):
+        return self._methodcall(lib.computeparams_set_num_hashes, v)
+
+    @property
+    def track_abundance(self):
+        return self._methodcall(lib.computeparams_track_abundance)
+
+    @track_abundance.setter
+    def track_abundance(self, v):
+        return self._methodcall(lib.computeparams_set_track_abundance, v)
+
+    @property
+    def scaled(self):
+        return self._methodcall(lib.computeparams_scaled)
+
+    @scaled.setter
+    def scaled(self, v):
+        return self._methodcall(lib.computeparams_set_scaled, int(v))
diff --git a/tests/test_cmd_signature.py b/tests/test_cmd_signature.py
index 9f14b6df58..8dfe8dc74a 100644
--- a/tests/test_cmd_signature.py
+++ b/tests/test_cmd_signature.py
@@ -3355,16 +3355,35 @@ def test_sig_describe_dayhoff(c):
     )
 
 
-@utils.in_tempdir
-def test_sig_describe_1_hp(c):
+def test_sig_describe_1_hp(runtmp):
+    c = runtmp
+
     # get basic info on a signature
     testdata = utils.get_test_data("short.fa")
-    c.run_sourmash(
-        "compute", "-k", "21,30", "--dayhoff", "--hp", "--protein", "--dna", testdata
+
+    # run four separate commands to make 4 different sets of sigs...
+    c.sourmash("sketch", "dna", "-p", "k=21,k=30,num=500", "-o", "out.zip", testdata)
+    c.sourmash(
+        "sketch", "translate", "-p", "k=7,k=10,num=500", "-o", "out.zip", testdata
+    )
+    c.sourmash(
+        "sketch", "translate", "-p", "k=7,k=10,num=500,hp", "-o", "out.zip", testdata
+    )
+    c.sourmash(
+        "sketch",
+        "translate",
+        "-p",
+        "k=7,k=10,num=500,dayhoff",
+        "-o",
+        "out.zip",
+        testdata,
     )
-    # stdout should be new signature
-    computed_sig = os.path.join(c.location, "short.fa.sig")
-    c.run_sourmash("sig", "describe", computed_sig)
+
+    # then combine into one .sig file
+    c.sourmash("sig", "cat", "out.zip", "-o", "short.fa.sig")
+
+    # & run sig describe
+    c.run_sourmash("sig", "describe", "short.fa.sig")
 
     out = c.last_result.out
     print(c.last_result.out)
@@ -3444,7 +3463,6 @@ def test_sig_describe_1_hp(c):
 signature license: CC0
 
 ---
-signature filename: short.fa.sig
 signature: ** no name **
 source file: short.fa
 md5: 71f7c111c01785e5f38efad45b00a0e1
diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py
index fc083a21e5..23647e517b 100644
--- a/tests/test_sourmash.py
+++ b/tests/test_sourmash.py
@@ -1791,26 +1791,48 @@ def test_compare_deduce_molecule(runtmp):
 
 
 def test_compare_choose_molecule_dna(runtmp):
-    # choose molecule type
+    # choose molecule type with --dna, ignoring protein
     testdata1 = utils.get_test_data("short.fa")
     testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash("compute", "-k", "30", "--dna", "--protein", testdata1, testdata2)
-
-    runtmp.sourmash("compare", "--dna", "short.fa.sig", "short2.fa.sig")
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=30,num=500", testdata1, testdata2, "-o", "sigs.zip"
+    )
+    runtmp.sourmash(
+        "sketch",
+        "translate",
+        "-p",
+        "k=10,num=500",
+        testdata1,
+        testdata2,
+        "-o",
+        "sigs.zip",
+    )
+    runtmp.sourmash("compare", "--dna", "sigs.zip")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
     assert "min similarity in matrix: 0.938" in runtmp.last_result.out
 
 
 def test_compare_choose_molecule_protein(runtmp):
-    # choose molecule type
+    # choose molecule type with --protein, ignoring DNA
     testdata1 = utils.get_test_data("short.fa")
     testdata2 = utils.get_test_data("short2.fa")
 
-    runtmp.sourmash("compute", "-k", "30", "--dna", "--protein", testdata1, testdata2)
-
-    runtmp.sourmash("compare", "--protein", "short.fa.sig", "short2.fa.sig")
+    runtmp.sourmash(
+        "sketch", "dna", "-p", "k=30,num=500", testdata1, testdata2, "-o", "sigs.zip"
+    )
+    runtmp.sourmash(
+        "sketch",
+        "translate",
+        "-p",
+        "k=10,num=500",
+        testdata1,
+        testdata2,
+        "-o",
+        "sigs.zip",
+    )
+    runtmp.sourmash("compare", "--protein", "sigs.zip")
 
     print(runtmp.last_result.status, runtmp.last_result.out, runtmp.last_result.err)
     assert "min similarity in matrix: 0.91" in runtmp.last_result.out
diff --git a/tests/test_sourmash_sketch.py b/tests/test_sourmash_sketch.py
index 87460dcbcb..98448e4d6b 100644
--- a/tests/test_sourmash_sketch.py
+++ b/tests/test_sourmash_sketch.py
@@ -15,8 +15,7 @@
 from sourmash import MinHash
 from sourmash.sbt import SBT, Node
 from sourmash.sbtmh import SigLeaf, load_sbt_index
-from sourmash.command_compute import ComputeParameters
-from sourmash.cli.compute import subparser
+from sourmash.command_sketch import ComputeParameters
 from sourmash.cli import SourmashParser
 from sourmash import manifest