diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 5b4a924..f32cd71 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -53,8 +53,8 @@ jobs:
     - name: Run cargo fmt
       run: cargo fmt --all -- --check --verbose
 
-  #  - name: rust tests
-  #    run: cargo test --verbose --no-fail-fast
+#    - name: rust tests
+#      run: cargo test --verbose --no-fail-fast
 
     - name: build
       shell: bash -l {0}
diff --git a/README.md b/README.md
index e004d12..20fd991 100644
--- a/README.md
+++ b/README.md
@@ -153,29 +153,34 @@ options:
 ```
 
 ## `urlsketch`
-download and sketch directly from a url
+download and sketch directly from URL(s)
+
 ### Create an input file
 
 First, create a file, e.g. `acc-url.csv` with identifiers, sketch names, and other required info.
 ```
-accession,name,moltype,md5sum,download_filename,url
-GCA_000961135.2,GCA_000961135.2 Candidatus Aramenus sulfurataquae isolate AZ1-454,dna,47b9fb20c51f0552b87db5d44d5d4566,GCA_000961135.2_genomic.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_genomic.fna.gz
-GCA_000961135.2,GCA_000961135.2 Candidatus Aramenus sulfurataquae isolate AZ1-454,protein,fb7920fb8f3cf5d6ab9b6b754a5976a4,GCA_000961135.2_protein.urlsketch.faa.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_protein.faa.gz
-GCA_000175535.1,GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14,dna,a1a8f1c6dc56999c73fe298871c963d1,GCA_000175535.1_genomic.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz
+accession,name,moltype,md5sum,download_filename,url,range
+GCA_000961135.2,GCA_000961135.2 Candidatus Aramenus sulfurataquae isolate AZ1-454,dna,47b9fb20c51f0552b87db5d44d5d4566,GCA_000961135.2_genomic.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_genomic.fna.gz,
+GCA_000961135.2,GCA_000961135.2 Candidatus Aramenus sulfurataquae isolate AZ1-454,protein,fb7920fb8f3cf5d6ab9b6b754a5976a4,GCA_000961135.2_protein.urlsketch.faa.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_protein.faa.gz,
+GCA_000175535.1,GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14,dna,a1a8f1c6dc56999c73fe298871c963d1,GCA_000175535.1_genomic.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz,
 ```
 > Six columns must be present:
 > - `accession` - an accession or unique identifier. Ideally no spaces.
 > - `name` - full name for the sketch.
 > - `moltype` - is the file 'dna' or 'protein'?
-> - `md5sum` - expected md5sum (optional, will be checked after download if provided)
+> - `md5sum` - expected md5sum(s). Optional, will be checked after download if provided.
 > - `download_filename` - filename for FASTA download. Required if `--keep-fastas`, but useful for signatures, too (saved in sig data).
-> - `url` - direct link for the file
+> - `url` - direct link(s) for the file(s)
+> - `range` - if desired, include base pair range(s), e.g 500-10000. This range will be selected from the record(s) and sketched (and/or saved to the download_filename). If there are multiple records in a FASTA file, the range will be applied to each record.
+
+#### Note: Merging Files into the same signature
+As of v0.5.0, `urlsketch` allows specification of multiple URLs to be downloaded and sketched into a single signature. If providing multiple URLs for a single accession/name, you must either provide no `md5sum` or `range`, or the number of entries in these columns must match the number of URLs. In each case, separate the entries with ';' -- e.g. "abc;def" for two md5sums.
 
 ### Run:
 
-To run the test accession file at `tests/test-data/acc-url.csv`, run:
+To run after creating file above:
 ```
-sourmash scripts urlsketch tests/test-data/acc-url.csv -o test-urlsketch.zip -f out_fastas -k --failed test.failed.csv -p dna,k=21,k=31,scaled=1000,abund -p protein,k=10,scaled=100,abund -r 1
+sourmash scripts urlsketch acc-url.csv -o test-urlsketch.zip -f out_fastas -k --failed test.failed.csv -p dna,k=21,k=31,scaled=1000,abund -p protein,k=10,scaled=100,abund -r 1
 ```
 
 Full Usage:
diff --git a/src/directsketch.rs b/src/directsketch.rs
index 56fb5af..a5d0d96 100644
--- a/src/directsketch.rs
+++ b/src/directsketch.rs
@@ -1,6 +1,7 @@
 use anyhow::{anyhow, bail, Context, Error, Result};
 use async_zip::base::write::ZipFileWriter;
 use camino::Utf8PathBuf as PathBuf;
+use needletail::parser::SequenceRecord;
 use regex::Regex;
 use reqwest::Client;
 use sourmash::collection::Collection;
@@ -9,7 +10,7 @@ use std::collections::HashMap;
 use std::fs::{self, create_dir_all};
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
-use tokio::fs::File;
+use tokio::fs::{File, OpenOptions};
 use tokio::io::{AsyncWriteExt, BufWriter};
 use tokio::sync::Semaphore;
 use tokio_util::compat::Compat;
@@ -17,8 +18,8 @@ use tokio_util::compat::Compat;
 use pyo3::prelude::*;
 
 use crate::utils::{
-    load_accession_info, load_gbassembly_info, AccessionData, GBAssemblyData, GenBankFileType,
-    InputMolType, MultiCollection,
+    load_accession_info, load_gbassembly_info, AccessionData, FailedChecksum, FailedDownload,
+    GBAssemblyData, GenBankFileType, InputMolType, MultiCollection,
 };
 
 use crate::utils::buildutils::{BuildCollection, BuildManifest, MultiSelect, MultiSelection};
@@ -207,26 +208,6 @@ async fn download_with_retry(
     }))
 }
 
-pub struct FailedDownload {
-    accession: String,
-    name: String,
-    moltype: String,
-    md5sum: Option<String>,
-    download_filename: Option<String>,
-    url: Option<Url>,
-}
-
-pub struct FailedChecksum {
-    accession: String,
-    name: String,
-    moltype: String,
-    md5sum_url: Option<Url>,
-    download_filename: Option<String>,
-    url: Option<Url>,
-    expected_md5sum: Option<String>,
-    reason: String,
-}
-
 #[allow(clippy::too_many_arguments)]
 async fn dl_sketch_assembly_accession(
     client: &Client,
@@ -245,36 +226,38 @@ async fn dl_sketch_assembly_accession(
     let mut download_failures = Vec::<FailedDownload>::new();
     let mut checksum_failures = Vec::<FailedChecksum>::new();
 
-    let name = accinfo.name;
-    let accession = accinfo.accession;
+    let name = accinfo.name.clone();
+    let accession = accinfo.accession.clone();
 
     // keep track of any accessions for which we fail to find URLs
     let (base_url, full_name) =
-        match fetch_genbank_filename(client, accession.as_str(), accinfo.url).await {
+        match fetch_genbank_filename(client, accession.as_str(), accinfo.url.clone()).await {
             Ok(result) => result,
             Err(_err) => {
                 // Add accession to failed downloads with each moltype
                 if !proteomes_only {
-                    let failed_download_dna = FailedDownload {
-                        accession: accession.clone(),
-                        name: name.clone(),
-                        moltype: "dna".to_string(),
-                        md5sum: None,
-                        download_filename: None,
-                        url: None,
-                    };
+                    let failed_download_dna = FailedDownload::from_gbassembly(
+                        accession.clone(),
+                        name.clone(),
+                        "dna".to_string(),
+                        None, // No MD5 checksum
+                        None, // No Download filename
+                        None, // URL of the file
+                        None, // No range in this case
+                    );
                     download_failures.push(failed_download_dna);
                 }
                 if !genomes_only {
-                    let failed_download_protein = FailedDownload {
-                        accession: accession.clone(),
-                        name: name.clone(),
-                        moltype: "protein".to_string(),
-                        md5sum: None,
-                        download_filename: None,
-                        url: None,
-                    };
-                    download_failures.push(failed_download_protein);
+                    let failed_download_protein = FailedDownload::from_gbassembly(
+                        accession.clone(),
+                        name.clone(),
+                        "protein".to_string(),
+                        None, // No MD5 checksum
+                        None, // No Download filename
+                        None, // URL of the file
+                        None, // No range in this case
+                    );
+                    download_failures.push(failed_download_protein)
                 }
 
                 return Ok((empty_coll, download_failures, checksum_failures));
@@ -303,16 +286,16 @@ async fn dl_sketch_assembly_accession(
                 // get filename, filetype info to facilitate downstream
                 let url = file_type.url(&base_url, &full_name);
                 let file_name = file_type.filename_to_write(&accession);
-                let failed_checksum_download: FailedChecksum = FailedChecksum {
-                    accession: accession.clone(),
-                    name: name.clone(),
-                    moltype: file_type.moltype(),
-                    md5sum_url: Some(md5sum_url.clone()),
-                    download_filename: Some(file_name),
-                    url: Some(url),
-                    expected_md5sum: None,
-                    reason: error_message.clone(), // write full error message
-                };
+                let failed_checksum_download: FailedChecksum = FailedChecksum::new(
+                    accession.clone(),
+                    name.clone(),
+                    file_type.moltype(),
+                    Some(md5sum_url.clone()),
+                    Some(file_name),
+                    Some(url),
+                    None,
+                    error_message.clone(), // write full error message
+                );
                 checksum_failures.push(failed_checksum_download);
             }
             // return early from function b/c we can't check any checksums
@@ -334,26 +317,27 @@ async fn dl_sketch_assembly_accession(
                     // did we have a checksum error or a download error?
                     // here --> keep track of accession errors + filetype
                     if error_message.contains("MD5 hash does not match") {
-                        let checksum_mismatch: FailedChecksum = FailedChecksum {
-                            accession: accession.clone(),
-                            name: name.clone(),
-                            moltype: file_type.moltype(),
-                            md5sum_url: Some(md5sum_url.clone()),
-                            download_filename: Some(file_name.clone()),
-                            url: Some(url.clone()),
-                            expected_md5sum: expected_md5.cloned(),
-                            reason: error_message.clone(),
-                        };
+                        let checksum_mismatch: FailedChecksum = FailedChecksum::new(
+                            accession.clone(),
+                            name.clone(),
+                            file_type.moltype(),
+                            Some(md5sum_url.clone()),
+                            Some(file_name.clone()),
+                            Some(url.clone()),
+                            expected_md5.cloned(),
+                            error_message.clone(),
+                        );
                         checksum_failures.push(checksum_mismatch);
                     } else {
-                        let failed_download = FailedDownload {
-                            accession: accession.clone(),
-                            name: name.clone(),
-                            moltype: file_type.moltype(),
-                            md5sum: expected_md5.map(|x| x.to_string()),
-                            download_filename: Some(file_name),
-                            url: Some(url),
-                        };
+                        let failed_download = FailedDownload::from_gbassembly(
+                            accession.clone(),
+                            name.clone(),
+                            file_type.moltype(),
+                            expected_md5.map(|x| x.to_string()), // single MD5 checksum
+                            Some(file_name),                     // intended download filename
+                            Some(url),                           // URL of the file
+                            None,                                // No range
+                        );
                         download_failures.push(failed_download);
                     }
                     continue;
@@ -368,10 +352,16 @@ async fn dl_sketch_assembly_accession(
             // sketch data
             match file_type {
                 GenBankFileType::Genomic => {
-                    sigs.build_sigs_from_data(data, "DNA", name.clone(), file_name.clone())?;
+                    sigs.build_sigs_from_data(data, "DNA", name.clone(), file_name.clone(), None)?;
                 }
                 GenBankFileType::Protein => {
-                    sigs.build_sigs_from_data(data, "protein", name.clone(), file_name.clone())?;
+                    sigs.build_sigs_from_data(
+                        data,
+                        "protein",
+                        name.clone(),
+                        file_name.clone(),
+                        None,
+                    )?;
                 }
                 _ => {} // Do nothing for other file types
             };
@@ -381,83 +371,205 @@ async fn dl_sketch_assembly_accession(
     Ok((sigs, download_failures, checksum_failures))
 }
 
+/// Extracts the specified range from sequences in data and writes to the file in FASTA format.
+async fn process_and_write_range(
+    data: &[u8],
+    file: &mut File,
+    range: Option<(usize, usize)>,
+) -> Result<()> {
+    let cursor = std::io::Cursor::new(data);
+    let mut fastx_reader =
+        needletail::parse_fastx_reader(cursor).context("Failed to parse FASTA/FASTQ data")?;
+
+    while let Some(record) = fastx_reader.next() {
+        let record = record.context("Failed to read record")?;
+        let sequence_to_write = extract_range_from_record(&record, range)
+            .context("Failed to extract range from record")?;
+
+        // Use the `id` and `seq` fields directly to construct the FASTA entry
+        let fasta_entry = format!(
+            ">{}\n{}\n",
+            String::from_utf8_lossy(record.id()),
+            String::from_utf8_lossy(&sequence_to_write)
+        );
+
+        // Write the FASTA entry to the file
+        file.write_all(fasta_entry.as_bytes())
+            .await
+            .context("Failed to write FASTA entry to file")?;
+    }
+
+    Ok(())
+}
+
+/// Extracts a range from a `SequenceRecord`. Returns the specified sequence slice as a `Vec<u8>`.
+fn extract_range_from_record(
+    record: &SequenceRecord,
+    range: Option<(usize, usize)>,
+) -> Result<Vec<u8>> {
+    let full_sequence = record.seq();
+    if let Some((start, end)) = range {
+        let adjusted_start = start.saturating_sub(1); // Adjust for 1-based indexing
+        if adjusted_start >= end || end > full_sequence.len() {
+            return Err(anyhow::anyhow!(
+                "Invalid range: start={}, end={}, sequence length={}",
+                start,
+                end,
+                full_sequence.len()
+            ));
+        }
+        Ok(full_sequence[adjusted_start..end].to_vec())
+    } else {
+        Ok(full_sequence.to_vec())
+    }
+}
+
+/// Opens a file for writing, creating necessary directories and truncating it if it exists.
+/// Returns an `Option<File>` if a filename is provided, or `None` if the filename is `None`.
+async fn open_file_for_writing(
+    location: &PathBuf,
+    filename: Option<&String>,
+) -> Result<Option<File>> {
+    if let Some(download_filename) = filename {
+        let path = location.join(download_filename);
+
+        // Create subdirectories if needed
+        if let Some(parent) = path.parent() {
+            create_dir_all(parent).with_context(|| {
+                format!(
+                    "Failed to create directories for download filename path {}",
+                    &path
+                )
+            })?;
+        }
+
+        // Open the file in write mode (truncate if it exists)
+        let file = OpenOptions::new()
+            .create(true) // Create the file if it doesn't exist
+            .write(true) // Enable write mode
+            .truncate(true) // Clear existing content
+            .open(&path)
+            .await
+            .with_context(|| format!("Failed to open file at {}", path))?;
+        Ok(Some(file))
+    } else {
+        Ok(None)
+    }
+}
+
 #[allow(clippy::too_many_arguments)]
 async fn dl_sketch_url(
     client: &Client,
     accinfo: AccessionData,
     location: &PathBuf,
     retry: Option<u32>,
-    _keep_fastas: bool,
+    keep_fastas: bool,
     mut sigs: BuildCollection,
     _genomes_only: bool,
     _proteomes_only: bool,
     download_only: bool,
+    write_checksum_fail: bool,
 ) -> Result<(BuildCollection, Vec<FailedDownload>, Vec<FailedChecksum>)> {
     let retry_count = retry.unwrap_or(3); // Default retry count
     let empty_coll = BuildCollection::new();
     let mut download_failures = Vec::<FailedDownload>::new();
     let mut checksum_failures = Vec::<FailedChecksum>::new();
 
-    let name = accinfo.name;
-    let accession = accinfo.accession;
-    let url = accinfo.url;
-    let expected_md5 = accinfo.expected_md5sum;
-    let download_filename = accinfo.download_filename;
-    let moltype = accinfo.moltype;
-
-    match download_with_retry(client, &url, expected_md5.as_deref(), retry_count).await {
-        Ok(data) => {
-            // check keep_fastas instead??
-            if let Some(ref download_filename) = download_filename {
-                let path = location.join(download_filename);
-                fs::write(path, &data).context("Failed to write data to file")?;
-            }
-            if !download_only {
-                let filename = download_filename.clone().unwrap_or("".to_string());
-                // sketch data
+    let name = accinfo.name.clone();
+    let accession = accinfo.accession.clone();
+    let download_filename = &accinfo.download_filename;
+    let filename = download_filename.clone().unwrap_or("".to_string());
+    let moltype = &accinfo.moltype;
 
-                match moltype {
-                    InputMolType::Dna => {
-                        sigs.build_sigs_from_data(data, "DNA", name.clone(), filename.clone())?;
-                    }
-                    InputMolType::Protein => {
-                        sigs.build_sigs_from_data(data, "protein", name.clone(), filename.clone())?;
+    let mut file: Option<File> = if keep_fastas {
+        open_file_for_writing(location, download_filename.as_ref()).await?
+    } else {
+        None
+    };
+
+    // are we merging files?
+    let merged_sample: bool = accinfo.url_info.len() > 1;
+    for uinfo in &accinfo.url_info {
+        let url = &uinfo.url;
+        let expected_md5 = &uinfo.md5sum;
+        let range = uinfo.range;
+        match download_with_retry(client, url, expected_md5.as_deref(), retry_count).await {
+            Ok(data) => {
+                // Write to file if keep_fastas is true and a file is open
+                // note, if multiple urls are provided, this will append to the same file
+                if let Some(file) = file.as_mut() {
+                    if range.is_some() {
+                        process_and_write_range(&data, file, range)
+                            .await
+                            .context("Failed to process and write range to file")?;
+                    } else {
+                        // Write the entire data if no range is provided
+                        file.write_all(&data)
+                            .await
+                            .context("Failed to write data to file")?;
                     }
-                };
+                }
+
+                if !download_only {
+                    // sketch data
+
+                    match moltype {
+                        InputMolType::Dna => {
+                            sigs.build_sigs_from_data(
+                                data,
+                                "DNA",
+                                name.clone(),
+                                filename.clone(),
+                                range,
+                            )?;
+                        }
+                        InputMolType::Protein => {
+                            sigs.build_sigs_from_data(
+                                data,
+                                "protein",
+                                name.clone(),
+                                filename.clone(),
+                                range,
+                            )?;
+                        }
+                    };
+                }
             }
-        }
-        Err(err) => {
-            let error_message = err.to_string();
-            // did we have a checksum error or a download error?
-            // here --> keep track of accession errors + filetype
-            if error_message.contains("MD5 hash does not match") {
-                let checksum_mismatch: FailedChecksum = FailedChecksum {
-                    accession: accession.clone(),
-                    name: name.clone(),
-                    moltype: moltype.to_string(),
-                    md5sum_url: None,
-                    download_filename,
-                    url: Some(url.clone()),
-                    expected_md5sum: expected_md5.clone(),
-                    reason: error_message.clone(),
-                };
-                checksum_failures.push(checksum_mismatch);
-                sigs = empty_coll;
-            } else {
-                let failed_download = FailedDownload {
-                    accession: accession.clone(),
-                    name: name.clone(),
-                    moltype: moltype.to_string(),
-                    md5sum: expected_md5.map(|x| x.to_string()),
-                    download_filename,
-                    url: Some(url),
-                };
-                download_failures.push(failed_download);
-                sigs = empty_coll;
+            Err(err) => {
+                let error_message = err.to_string();
+                // did we have a checksum error or a download error?
+                // here --> keep track of accession errors + filetype
+                if error_message.contains("MD5 hash does not match") && write_checksum_fail {
+                    let checksum_mismatch: FailedChecksum = FailedChecksum::new(
+                        accession.clone(),
+                        name.clone(),
+                        moltype.to_string(),
+                        None,
+                        download_filename.clone(),
+                        Some(url.clone()),
+                        expected_md5.clone(),
+                        error_message.clone(),
+                    );
+                    checksum_failures.push(checksum_mismatch);
+                    // if this is a merged sample, the checksum failure is only for one part of it.
+                    // also write a download failure, which is the full entry.
+                    // The checksum failures file is mostly for debugging, while the failure csv
+                    // can be used to re-run urlsketch.
+                    if merged_sample {
+                        download_failures.push(FailedDownload::from_accession_data(&accinfo));
+                    }
+                } else {
+                    download_failures.push(FailedDownload::from_accession_data(&accinfo));
+                }
+                // Clear signatures and return immediately on failure
+                return Ok((empty_coll, download_failures, checksum_failures));
             }
         }
     }
 
+    // Update signature info
+    sigs.update_info(name, filename);
+
     Ok((sigs, download_failures, checksum_failures))
 }
 
@@ -705,43 +817,25 @@ pub fn failures_handle(
             Ok(file) => {
                 let mut writer = BufWriter::new(file);
 
-                // Attempt to write CSV headers
+                // Write CSV header
                 if let Err(e) = writer
-                    .write_all(b"accession,name,moltype,md5sum,download_filename,url\n")
+                    .write_all(FailedDownload::csv_header().as_bytes())
                     .await
                 {
                     let error = Error::new(e).context("Failed to write headers");
                     let _ = error_sender.send(error).await;
-                    return; // Exit the task early after reporting the error
+                    return;
                 }
-
-                while let Some(FailedDownload {
-                    accession,
-                    name,
-                    md5sum,
-                    download_filename,
-                    url,
-                    moltype,
-                }) = recv_failed.recv().await
-                {
-                    let record = format!(
-                        "{},{},{},{},{},{}\n",
-                        accession,
-                        name,
-                        moltype,
-                        md5sum.unwrap_or("".to_string()),
-                        download_filename.unwrap_or("".to_string()),
-                        url.map(|u| u.to_string()).unwrap_or("".to_string())
-                    );
-                    // Attempt to write each record
-                    if let Err(e) = writer.write_all(record.as_bytes()).await {
+                while let Some(failed_download) = recv_failed.recv().await {
+                    // Write the FailedDownload to the CSV writer
+                    if let Err(e) = failed_download.to_writer(&mut writer).await {
                         let error = Error::new(e).context("Failed to write record");
                         let _ = error_sender.send(error).await;
-                        continue; // Optionally continue to try to write next records
+                        continue;
                     }
                 }
 
-                // Attempt to flush the writer
+                // Flush the writer
                 if let Err(e) = writer.flush().await {
                     let error = Error::new(e).context("Failed to flush writer");
                     let _ = error_sender.send(error).await;
@@ -768,41 +862,20 @@ pub fn checksum_failures_handle(
 
                 // Attempt to write CSV headers
                 if let Err(e) = writer
-                    .write_all(b"accession,name,moltype,md5sum_url,download_filename,url,expected_md5sum,reason\n")
+                    .write_all(FailedChecksum::csv_header().as_bytes())
                     .await
                 {
                     let error = Error::new(e).context("Failed to write headers");
                     let _ = error_sender.send(error).await;
-                    return; // Exit the task early after reporting the error
+                    return;
                 }
 
-                while let Some(FailedChecksum {
-                    accession,
-                    name,
-                    moltype,
-                    md5sum_url,
-                    download_filename,
-                    url,
-                    expected_md5sum,
-                    reason,
-                }) = recv_failed.recv().await
-                {
-                    let record = format!(
-                        "{},{},{},{},{},{},{},{}\n",
-                        accession,
-                        name,
-                        moltype,
-                        md5sum_url.map(|u| u.to_string()).unwrap_or("".to_string()),
-                        download_filename.unwrap_or("".to_string()),
-                        url.map(|u| u.to_string()).unwrap_or("".to_string()),
-                        expected_md5sum.unwrap_or("".to_string()),
-                        reason,
-                    );
-                    // Attempt to write each record
-                    if let Err(e) = writer.write_all(record.as_bytes()).await {
+                // Write each failed checksum record
+                while let Some(failed_checksum) = recv_failed.recv().await {
+                    if let Err(e) = failed_checksum.to_writer(&mut writer).await {
                         let error = Error::new(e).context("Failed to write failed checksum record");
                         let _ = error_sender.send(error).await;
-                        continue; // continue to try to write next records
+                        continue;
                     }
                 }
 
@@ -1233,6 +1306,16 @@ pub async fn urlsketch(
                 sigs.filter_by_manifest(existing_manifest);
             }
         }
+        // eliminate sigs that won't be added to based on moltype
+        // this assumes no translation --> modify as needed if adding that.
+        if accinfo.moltype == InputMolType::Dna {
+            sigs.select(&dna_multiselection)?;
+        } else {
+            sigs.select(&protein_multiselection)?;
+        }
+        if sigs.is_empty() && !download_only {
+            continue;
+        }
 
         // eliminate sigs that won't be added to based on moltype
         // this assumes no translation --> modify as needed if adding that.
@@ -1252,6 +1335,7 @@ pub async fn urlsketch(
         let checksum_send_failed = send_failed_checksums.clone();
         let download_path_clone = download_path.clone(); // Clone the path for each task
         let send_errors = error_sender.clone();
+        let write_checksum_fail = write_failed_checksums;
 
         tokio::spawn(async move {
             let _permit = semaphore_clone.acquire().await;
@@ -1276,6 +1360,7 @@ pub async fn urlsketch(
                 genomes_only,
                 proteomes_only,
                 download_only,
+                write_checksum_fail,
             )
             .await;
             match result {
@@ -1292,28 +1377,11 @@ pub async fn urlsketch(
                             let _ = send_errors.send(e.into()).await; // Send the error through the channel
                         }
                     }
-                    if write_failed_checksums {
-                        for fail in failed_checksums {
-                            if let Err(e) = checksum_send_failed.send(fail).await {
-                                eprintln!("Failed to send failed checksum info: {}", e);
-                                let _ = send_errors.send(e.into()).await; // Send the error through the channel
-                            }
-                        }
-                    } else {
-                        // if we don't have a failed checksum file, convert to failed downloads + write there
-                        for fail in failed_checksums {
-                            let dl_fail: FailedDownload = FailedDownload {
-                                accession: fail.accession,
-                                name: fail.name,
-                                moltype: fail.moltype,
-                                md5sum: fail.expected_md5sum,
-                                download_filename: fail.download_filename,
-                                url: fail.url,
-                            };
-                            if let Err(e) = send_failed.send(dl_fail).await {
-                                eprintln!("Failed to send failed download info: {}", e);
-                                let _ = send_errors.send(e.into()).await; // Send the error through the channel
-                            }
+                    // if write_failed_checksums {
+                    for fail in failed_checksums {
+                        if let Err(e) = checksum_send_failed.send(fail).await {
+                            eprintln!("Failed to send failed checksum info: {}", e);
+                            let _ = send_errors.send(e.into()).await; // Send the error through the channel
                         }
                     }
                 }
diff --git a/src/utils/buildutils.rs b/src/utils/buildutils.rs
index c99ed26..595bfd8 100644
--- a/src/utils/buildutils.rs
+++ b/src/utils/buildutils.rs
@@ -759,14 +759,33 @@ impl BuildCollection {
         &mut self,
         input_moltype: &str,
         record: &SequenceRecord,
+        range: Option<(usize, usize)>,
     ) -> Result<()> {
+        // Get the full sequence and apply the range if provided
+        let full_sequence = record.seq();
+        let sequence_to_process = if let Some((start, end)) = range {
+            // Adjust for 1-based input: start - 1, end remains unchanged
+            let adjusted_start = start.saturating_sub(1); // Ensure no underflow
+            if adjusted_start >= end || end > full_sequence.len() {
+                return Err(anyhow::anyhow!(
+                    "Invalid range: start={}, end={}, sequence length={}",
+                    start,
+                    end,
+                    full_sequence.len()
+                ));
+            }
+            &full_sequence[adjusted_start..end]
+        } else {
+            &full_sequence
+        };
+        // add seq to sigs
         self.iter_mut().try_for_each(|(rec, sig)| {
             if input_moltype == "protein"
                 && (rec.moltype() == HashFunctions::Murmur64Protein
                     || rec.moltype() == HashFunctions::Murmur64Dayhoff
                     || rec.moltype() == HashFunctions::Murmur64Hp)
             {
-                sig.add_protein(&record.seq())
+                sig.add_protein(sequence_to_process)
                     .context("Failed to add protein")?;
                 if !rec.sequence_added {
                     rec.sequence_added = true;
@@ -776,7 +795,7 @@ impl BuildCollection {
                     || rec.moltype() == HashFunctions::Murmur64Skipm2n3
                     || rec.moltype() == HashFunctions::Murmur64Skipm1n3)
             {
-                sig.add_sequence(&record.seq(), true)
+                sig.add_sequence(sequence_to_process, true)
                     .context("Failed to add sequence")?;
                 if !rec.sequence_added {
                     rec.sequence_added = true;
@@ -792,6 +811,7 @@ impl BuildCollection {
         input_moltype: &str,
         name: String,
         filename: String,
+        range: Option<(usize, usize)>,
     ) -> Result<()> {
         let cursor = Cursor::new(data);
         let mut fastx_reader =
@@ -800,7 +820,7 @@ impl BuildCollection {
         // Iterate over FASTA records and add sequences/proteins to sigs
         while let Some(record) = fastx_reader.next() {
             let record = record.context("Failed to read record")?;
-            self.build_sigs_from_record(input_moltype, &record)?;
+            self.build_sigs_from_record(input_moltype, &record, range)?;
         }
 
         // After processing sequences, update sig, record information
@@ -814,6 +834,7 @@ impl BuildCollection {
         input_moltype: &str, // "protein" or "DNA"
         name: String,
         filename: String,
+        range: Option<(usize, usize)>,
     ) -> Result<u64> {
         // Create a FASTX reader from the file or stdin
         let mut fastx_reader = if filename == "-" {
@@ -830,7 +851,7 @@ impl BuildCollection {
         while let Some(record_result) = fastx_reader.next() {
             let record = record_result.context("Failed to read a record from input")?;
 
-            self.build_sigs_from_record(input_moltype, &record)?;
+            self.build_sigs_from_record(input_moltype, &record, range)?;
 
             record_count += 1;
         }
@@ -847,8 +868,9 @@ impl BuildCollection {
         record: SequenceRecord,
         input_moltype: &str, // (protein/dna); todo - use hashfns?
         filename: String,
+        range: Option<(usize, usize)>,
     ) -> Result<()> {
-        self.build_sigs_from_record(input_moltype, &record)?;
+        self.build_sigs_from_record(input_moltype, &record, range)?;
         // After processing sequences, update sig, record information
         let record_name = std::str::from_utf8(record.id())
             .expect("could not get record id")
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index 6c7fa18..efa326c 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -3,6 +3,7 @@ use reqwest::Url;
 use sourmash::collection::Collection;
 use std::collections::HashMap;
 use std::fmt;
+use tokio::io::AsyncWriteExt;
 
 pub mod buildutils;
 use crate::utils::buildutils::{BuildManifest, BuildRecord};
@@ -85,15 +86,22 @@ impl GenBankFileType {
         }
     }
 }
+
 #[allow(dead_code)]
 #[derive(Clone)]
 pub struct AccessionData {
     pub accession: String,
     pub name: String,
     pub moltype: InputMolType,
+    pub url_info: Vec<UrlInfo>,
+    pub download_filename: Option<String>, // Need to require this if --keep-fastas are used
+}
+
+#[derive(Clone)]
+pub struct UrlInfo {
     pub url: reqwest::Url,
-    pub expected_md5sum: Option<String>,
-    pub download_filename: Option<String>, // need to require this if --keep-fastas are used
+    pub md5sum: Option<String>,
+    pub range: Option<(usize, usize)>,
 }
 
 #[derive(Clone)]
@@ -174,6 +182,116 @@ pub fn load_gbassembly_info(input_csv: String) -> Result<(Vec<GBAssemblyData>, u
     Ok((results, row_count))
 }
 
+fn parse_urls(url_field: Option<&str>) -> Result<Vec<reqwest::Url>, anyhow::Error> {
+    let url_field = url_field.ok_or_else(|| anyhow!("Missing 'url' field"))?;
+
+    let mut urls = Vec::new();
+
+    for s in url_field.split(';').map(|s| s.trim()) {
+        if s.is_empty() {
+            return Err(anyhow!("Empty URL entry found in 'url' field"));
+        }
+
+        let parsed_url =
+            reqwest::Url::parse(s).map_err(|e| anyhow!("Invalid URL '{}': {}", s, e))?;
+        urls.push(parsed_url);
+    }
+
+    if urls.is_empty() {
+        return Err(anyhow!("No valid URLs found in 'url' field"));
+    }
+
+    Ok(urls)
+}
+
+fn parse_md5sums(
+    md5sum_field: &str,
+    expected_num_urls: usize,
+    accession: &str,
+) -> Result<(Vec<Option<String>>, usize), anyhow::Error> {
+    if md5sum_field.trim().is_empty() {
+        // Return a vector of None for each expected URL and a count of 0
+        return Ok((vec![None; expected_num_urls], 0));
+    }
+
+    let md5sums: Vec<Option<String>> = md5sum_field
+        .split(';')
+        .map(|s| {
+            let trimmed = s.trim();
+            if trimmed.is_empty() {
+                None
+            } else {
+                Some(trimmed.to_string())
+            }
+        })
+        .collect();
+
+    // Validate the number of MD5 sums matches the expected number of URLs
+    if md5sums.len() != expected_num_urls {
+        return Err(anyhow::anyhow!(
+            "Number of MD5 sums ({}) does not match the number of URLs ({}) for accession '{}'",
+            md5sums.len(),
+            expected_num_urls,
+            accession
+        ));
+    }
+
+    // Count the number of non-None MD5 sums
+    let count = md5sums.iter().filter(|md5| md5.is_some()).count();
+
+    Ok((md5sums, count))
+}
+
+fn parse_ranges(
+    range_field: &str,
+    expected_num_ranges: usize,
+) -> Result<Vec<Option<(usize, usize)>>, String> {
+    if range_field.trim().is_empty() {
+        // Return a vector of None for each expected range
+        return Ok(vec![None; expected_num_ranges]);
+    }
+
+    let ranges: Vec<&str> = range_field.split(';').collect();
+
+    // Check if the number of ranges matches expected_num_ranges
+    if ranges.len() != expected_num_ranges {
+        return Err(format!(
+            "Number of ranges ({}) does not match expected number of ranges ({})",
+            ranges.len(),
+            expected_num_ranges
+        ));
+    }
+
+    ranges
+        .into_iter()
+        .map(|s| {
+            let s = s.trim(); // Trim whitespace
+            if s.is_empty() {
+                return Ok(None); // Treat empty range as None
+            }
+            let parts: Vec<&str> = s.split('-').collect();
+            if parts.len() == 2 {
+                let start = parts[0]
+                    .parse::<usize>()
+                    .map_err(|_| format!("Invalid start value in range: {}", s))?;
+                let end = parts[1]
+                    .parse::<usize>()
+                    .map_err(|_| format!("Invalid end value in range: {}", s))?;
+                if start < end {
+                    Ok(Some((start, end))) // Return Some for valid ranges
+                } else {
+                    Err(format!(
+                        "Start value must be less than end value in range: {}",
+                        s
+                    ))
+                }
+            } else {
+                Err(format!("Invalid range format: {}", s))
+            }
+        })
+        .collect()
+}
+
 pub fn load_accession_info(
     input_csv: String,
     keep_fasta: bool,
@@ -195,6 +313,7 @@ pub fn load_accession_info(
         "md5sum",
         "download_filename",
         "url",
+        "range",
     ];
     if header != expected_header {
         return Err(anyhow!(
@@ -227,36 +346,47 @@ pub fn load_accession_info(
             .ok_or_else(|| anyhow!("Missing 'moltype' field"))?
             .parse::<InputMolType>()
             .map_err(|_| anyhow!("Invalid 'moltype' value"))?;
-        let expected_md5sum = record.get(3).map(|s| s.to_string());
+
+        // Parse URLs
+        let url_result = parse_urls(record.get(5));
+        let urls = match url_result {
+            Ok(urls) => {
+                if urls.is_empty() {
+                    return Err(anyhow!("No valid URLs found in 'url' field"));
+                }
+                urls
+            }
+            Err(e) => return Err(e), // Propagate the error if parsing fails
+        };
+
+        // Parse MD5sums (optional)
+        let (md5sums, md5sum_count_in_row) =
+            parse_md5sums(record.get(3).unwrap_or(""), urls.len(), &acc)?;
+        // Update the overall MD5 sum count
+        md5sum_count += md5sum_count_in_row;
+
+        // Parse ranges (optional)
+        let range_field = record.get(6).unwrap_or("");
+        let ranges = parse_ranges(range_field, urls.len()).map_err(|e| anyhow!("{}", e))?;
+
+        // Combine URLs, MD5 sums, and ranges into UrlInfo
+        let url_info: Vec<UrlInfo> = urls
+            .into_iter()
+            .zip(md5sums)
+            .zip(ranges)
+            .map(|((url, md5sum), range)| UrlInfo { url, md5sum, range })
+            .collect();
+
         let download_filename = record.get(4).map(|s| s.to_string());
         if keep_fasta && download_filename.is_none() {
             return Err(anyhow!("Missing 'download_filename' field"));
         }
-        let url = record
-            .get(5)
-            .ok_or_else(|| anyhow!("Missing 'url' field"))?
-            .split(',')
-            .filter_map(|s| {
-                if s.starts_with("http://") || s.starts_with("https://") || s.starts_with("ftp://")
-                {
-                    reqwest::Url::parse(s).ok()
-                } else {
-                    None
-                }
-            })
-            .next()
-            .ok_or_else(|| anyhow!("Invalid 'url' value"))?;
-        // count entries with url and md5sum
-        if expected_md5sum.is_some() {
-            md5sum_count += 1;
-        }
         // store accession data
         results.push(AccessionData {
             accession: acc,
             name,
             moltype,
-            url,
-            expected_md5sum,
+            url_info,
             download_filename,
         });
     }
@@ -312,3 +442,620 @@ impl MultiCollection {
         records_map
     }
 }
+
+#[derive(Clone)]
+pub struct FailedDownload {
+    accession: String,
+    name: String,
+    moltype: String,
+    md5sum: String,
+    download_filename: String,
+    url: String,
+    range: String,
+}
+
+impl FailedDownload {
+    /// Build a `FailedDownload` from `GBAssemblyData` with detailed information
+    pub fn from_gbassembly(
+        accession: String,
+        name: String,
+        moltype: String,
+        md5sum: Option<String>,            // Single MD5 checksum
+        download_filename: Option<String>, // Download filename
+        url: Option<reqwest::Url>,         // URL for the file
+        range: Option<(usize, usize)>,     // Optional range for the download
+    ) -> Self {
+        Self {
+            accession,
+            name,
+            moltype,
+            md5sum: md5sum.unwrap_or_default(),
+            download_filename: download_filename.unwrap_or_default(),
+            url: url.map(|u| u.to_string()).unwrap_or_default(),
+            range: range
+                .map(|(start, end)| format!("{}-{}", start, end))
+                .unwrap_or_default(), // Format range or use ""
+        }
+    }
+
+    fn parse_to_separated_string<T, F>(url_info: &[UrlInfo], mut extractor: F) -> String
+    where
+        F: FnMut(&UrlInfo) -> Option<T>,
+        T: ToString,
+    {
+        let results: Vec<String> = url_info
+            .iter()
+            .map(|info| extractor(info).map_or("".to_string(), |v| v.to_string())) // Map `None` to empty string
+            .collect();
+
+        if results.iter().all(|entry| entry.is_empty()) {
+            "".to_string() // If all entries are empty, return `""`
+        } else {
+            results.join(";") // Otherwise, join with `;`
+        }
+    }
+
+    /// Build a `FailedDownload` from `AccessionData`
+    pub fn from_accession_data(acc_data: &AccessionData) -> Self {
+        Self {
+            accession: acc_data.accession.clone(),
+            name: acc_data.name.clone(),
+            moltype: acc_data.moltype.to_string(),
+            md5sum: Self::parse_to_separated_string(&acc_data.url_info, |info| info.md5sum.clone()),
+            download_filename: acc_data.download_filename.clone().unwrap_or_default(),
+            url: Self::parse_to_separated_string(&acc_data.url_info, |info| {
+                Some(info.url.to_string())
+            }),
+            range: Self::parse_to_separated_string(&acc_data.url_info, |info| {
+                info.range.map(|(start, end)| format!("{}-{}", start, end))
+            }),
+        }
+    }
+
+    pub fn to_csv_record(&self) -> String {
+        format!(
+            "{},{},{},{},{},{},{}\n",
+            self.accession,
+            self.name,
+            self.moltype,
+            self.md5sum,
+            self.download_filename,
+            self.url,
+            self.range,
+        )
+    }
+
+    pub fn csv_header() -> &'static str {
+        "accession,name,moltype,md5sum,download_filename,url,range\n"
+    }
+
+    /// Write a `FailedDownload` to a CSV writer
+    pub async fn to_writer<W: tokio::io::AsyncWrite + Unpin>(
+        &self,
+        writer: &mut W,
+    ) -> Result<(), std::io::Error> {
+        writer.write_all(self.to_csv_record().as_bytes()).await
+    }
+}
+
+pub struct FailedChecksum {
+    accession: String,
+    name: String,
+    moltype: String,
+    md5sum_url: Option<Url>,
+    download_filename: Option<String>,
+    url: Option<Url>,
+    expected_md5sum: Option<String>,
+    reason: String,
+}
+
+impl FailedChecksum {
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        accession: String,
+        name: String,
+        moltype: String,
+        md5sum_url: Option<reqwest::Url>,
+        download_filename: Option<String>,
+        url: Option<reqwest::Url>,
+        expected_md5sum: Option<String>,
+        reason: String,
+    ) -> Self {
+        Self {
+            accession,
+            name,
+            moltype,
+            md5sum_url,
+            download_filename,
+            url,
+            expected_md5sum,
+            reason,
+        }
+    }
+
+    /// Convert a `FailedChecksum` to a CSV-formatted string
+    pub fn to_csv_record(&self) -> String {
+        let md5sum_url_str = self
+            .md5sum_url
+            .as_ref()
+            .map(|u| u.to_string())
+            .unwrap_or_default();
+
+        let url_str = self.url.as_ref().map(|u| u.to_string()).unwrap_or_default();
+
+        format!(
+            "{},{},{},{},{},{},{},{}\n",
+            self.accession,
+            self.name,
+            self.moltype,
+            md5sum_url_str,
+            self.download_filename.clone().unwrap_or_default(),
+            url_str,
+            self.expected_md5sum.clone().unwrap_or_default(),
+            self.reason,
+        )
+    }
+
+    /// Get the CSV header for a `FailedChecksum`
+    pub fn csv_header() -> &'static str {
+        "accession,name,moltype,md5sum_url,download_filename,url,expected_md5sum,reason\n"
+    }
+
+    /// Write a `FailedChecksum` to a CSV writer
+    pub async fn to_writer<W: tokio::io::AsyncWrite + Unpin>(
+        &self,
+        writer: &mut W,
+    ) -> Result<(), std::io::Error> {
+        writer.write_all(self.to_csv_record().as_bytes()).await
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use reqwest::Url;
+
+    #[test]
+    fn test_parse_urls_valid_urls() {
+        let url_field = Some("http://example.com; https://example.org");
+        let result = parse_urls(url_field).unwrap();
+
+        assert_eq!(
+            result,
+            vec![
+                Url::parse("http://example.com").unwrap(),
+                Url::parse("https://example.org").unwrap()
+            ]
+        );
+    }
+
+    #[test]
+    fn test_parse_urls_with_whitespace() {
+        let url_field = Some("   http://example.com   ;   https://example.org   ");
+        let result = parse_urls(url_field).unwrap();
+
+        assert_eq!(
+            result,
+            vec![
+                Url::parse("http://example.com").unwrap(),
+                Url::parse("https://example.org").unwrap()
+            ]
+        );
+    }
+
+    #[test]
+    fn test_parse_urls_with_empty_entries() {
+        let url_field = Some("http://example.com;;https://example.org");
+        let result = parse_urls(url_field);
+
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Empty URL entry found in 'url' field"
+        );
+    }
+
+    #[test]
+    fn test_parse_urls_invalid_url() {
+        let url_field = Some("http://example.com; invalid-url; https://example.org");
+        let result = parse_urls(url_field);
+
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Invalid URL 'invalid-url': relative URL without a base"
+        );
+    }
+
+    #[test]
+    fn test_parse_urls_empty_field() {
+        let url_field = Some("");
+        let result = parse_urls(url_field);
+
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Empty URL entry found in 'url' field"
+        );
+    }
+
+    #[test]
+    fn test_parse_urls_missing_field() {
+        let url_field = None;
+        let result = parse_urls(url_field);
+
+        assert!(result.is_err());
+        assert_eq!(result.unwrap_err().to_string(), "Missing 'url' field");
+    }
+
+    #[test]
+    fn test_parse_urls_all_invalid() {
+        let url_field = Some("invalid-url; still-not-a-url");
+        let result = parse_urls(url_field);
+
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Invalid URL 'invalid-url': relative URL without a base"
+        );
+    }
+
+    #[test]
+    fn test_parse_ranges_valid() {
+        let range_field = "1-10;20-30;40-50";
+        let expected_num_ranges = 3;
+        let result = parse_ranges(range_field, expected_num_ranges);
+
+        assert!(result.is_ok());
+        assert_eq!(
+            result.unwrap(),
+            vec![Some((1, 10)), Some((20, 30)), Some((40, 50))]
+        );
+    }
+
+    #[test]
+    fn test_parse_ranges_empty_field() {
+        let range_field = "   ";
+        let expected_num_ranges = 3;
+        let result = parse_ranges(range_field, expected_num_ranges);
+
+        assert!(result.is_ok());
+        assert_eq!(result.unwrap(), vec![None, None, None]);
+    }
+
+    #[test]
+    fn test_parse_ranges_neg_start() {
+        let range_field = "1-10;-20-30";
+        let expected_num_ranges = 2;
+        let result = parse_ranges(range_field, expected_num_ranges);
+
+        assert!(result.is_err());
+        assert_eq!(result.unwrap_err(), "Invalid range format: -20-30");
+    }
+
+    #[test]
+    fn test_parse_ranges_invalid_start() {
+        let range_field = "1-10;bar-30";
+        let expected_num_ranges = 2;
+        let result = parse_ranges(range_field, expected_num_ranges);
+
+        assert!(result.is_err());
+        assert_eq!(result.unwrap_err(), "Invalid start value in range: bar-30");
+    }
+
+    #[test]
+    fn test_parse_ranges_invalid_end() {
+        let range_field = "1-10;20-bar";
+        let expected_num_ranges = 2;
+        let result = parse_ranges(range_field, expected_num_ranges);
+
+        assert!(result.is_err());
+        assert_eq!(result.unwrap_err(), "Invalid end value in range: 20-bar");
+    }
+
+    #[test]
+    fn test_parse_ranges_start_not_less_than_end() {
+        let range_field = "30-10";
+        let expected_num_ranges = 1;
+        let result = parse_ranges(range_field, expected_num_ranges);
+
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err(),
+            "Start value must be less than end value in range: 30-10"
+        );
+    }
+
+    #[test]
+    fn test_parse_ranges_extra_ranges() {
+        let range_field = "1-10;20-30;40-50";
+        let expected_num_ranges = 5; // Expecting more ranges than provided
+        let result = parse_ranges(range_field, expected_num_ranges);
+
+        assert!(result.is_err()); // Now expecting an error
+        assert_eq!(
+            result.unwrap_err(),
+            "Number of ranges (3) does not match expected number of ranges (5)"
+        );
+    }
+
+    #[test]
+    fn test_parse_ranges_fewer_ranges() {
+        let range_field = "1-10;20-30";
+        let expected_num_ranges = 3; // Expecting more ranges than provided
+        let result = parse_ranges(range_field, expected_num_ranges);
+
+        assert!(result.is_err()); // Now expecting an error
+        assert_eq!(
+            result.unwrap_err(),
+            "Number of ranges (2) does not match expected number of ranges (3)"
+        );
+    }
+
+    #[test]
+    fn test_parse_ranges_with_empty_values() {
+        let range_field = "1-10;;20-30";
+        let expected_num_ranges = 3;
+        let result = parse_ranges(range_field, expected_num_ranges).unwrap();
+
+        assert_eq!(result, vec![Some((1, 10)), None, Some((20, 30))]);
+    }
+
+    #[test]
+    fn test_parse_md5sums_valid() {
+        let md5sum_field = "abcd1234;efgh5678;ijkl9012";
+        let expected_num_urls = 3;
+        let accession = "ACC123";
+
+        let result = parse_md5sums(md5sum_field, expected_num_urls, accession).unwrap();
+        assert_eq!(
+            result,
+            (
+                vec![
+                    Some("abcd1234".to_string()),
+                    Some("efgh5678".to_string()),
+                    Some("ijkl9012".to_string())
+                ],
+                3
+            )
+        );
+    }
+
+    #[test]
+    fn test_parse_md5sums_empty_field() {
+        let md5sum_field = "";
+        let expected_num_urls = 2;
+        let accession = "ACC123";
+
+        let result = parse_md5sums(md5sum_field, expected_num_urls, accession).unwrap();
+        assert_eq!(result, (vec![None, None], 0));
+    }
+
+    #[test]
+    fn test_parse_md5sums_mismatched_count_more_md5s() {
+        let md5sum_field = "abcd1234;efgh5678;ijkl9012";
+        let expected_num_urls = 2; // Fewer URLs than MD5 sums
+        let accession = "ACC123";
+
+        let result = parse_md5sums(md5sum_field, expected_num_urls, accession);
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Number of MD5 sums (3) does not match the number of URLs (2) for accession 'ACC123'"
+        );
+    }
+
+    #[test]
+    fn test_parse_md5sums_mismatched_count_fewer_md5s() {
+        let md5sum_field = "abcd1234;efgh5678";
+        let expected_num_urls = 3; // More URLs than MD5 sums
+        let accession = "ACC123";
+
+        let result = parse_md5sums(md5sum_field, expected_num_urls, accession);
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Number of MD5 sums (2) does not match the number of URLs (3) for accession 'ACC123'"
+        );
+    }
+
+    #[test]
+    fn test_parse_md5sums_with_whitespace() {
+        let md5sum_field = "  abcd1234  ; efgh5678 ;  ijkl9012 ";
+        let expected_num_urls = 3;
+        let accession = "ACC123";
+
+        let result = parse_md5sums(md5sum_field, expected_num_urls, accession).unwrap();
+        assert_eq!(
+            result,
+            (
+                vec![
+                    Some("abcd1234".to_string()),
+                    Some("efgh5678".to_string()),
+                    Some("ijkl9012".to_string())
+                ],
+                3
+            )
+        );
+    }
+
+    #[test]
+    fn test_parse_md5sums_some_empty_entries() {
+        let md5sum_field = "abcd1234;;ijkl9012";
+        let expected_num_urls = 3;
+        let accession = "ACC123";
+
+        let result = parse_md5sums(md5sum_field, expected_num_urls, accession).unwrap();
+        assert_eq!(
+            result,
+            (
+                vec![
+                    Some("abcd1234".to_string()),
+                    None, // Empty MD5 sum
+                    Some("ijkl9012".to_string())
+                ],
+                2 // Only count non-empty Some values
+            )
+        );
+    }
+
+    #[test]
+    fn test_failed_download_from_gbassembly_valid() {
+        let accession = "ACC123".to_string();
+        let name = "Sample Name".to_string();
+        let moltype = "DNA".to_string();
+        let md5sum = Some("abcd1234".to_string());
+        let download_filename = Some("file.fasta".to_string());
+        let url = Some(Url::parse("http://example.com/file.fasta").unwrap());
+        let range = Some((10, 20));
+
+        let failed_download = FailedDownload::from_gbassembly(
+            accession.clone(),
+            name.clone(),
+            moltype.clone(),
+            md5sum.clone(),
+            download_filename.clone(),
+            url.clone(),
+            range.clone(),
+        );
+
+        assert_eq!(failed_download.accession, accession);
+        assert_eq!(failed_download.name, name);
+        assert_eq!(failed_download.moltype, moltype);
+        assert_eq!(failed_download.md5sum, "abcd1234");
+        assert_eq!(failed_download.download_filename, "file.fasta");
+        assert_eq!(failed_download.url, "http://example.com/file.fasta");
+        assert_eq!(failed_download.range, "10-20");
+    }
+
+    #[test]
+    fn test_failed_download_from_gbassembly_defaults() {
+        let accession = "ACC123".to_string();
+        let name = "Sample Name".to_string();
+        let moltype = "DNA".to_string();
+
+        let failed_download = FailedDownload::from_gbassembly(
+            accession.clone(),
+            name.clone(),
+            moltype.clone(),
+            None, // No MD5 checksum
+            None, // No filename
+            None, // No URL
+            None, // No range
+        );
+
+        assert_eq!(failed_download.accession, accession);
+        assert_eq!(failed_download.name, name);
+        assert_eq!(failed_download.moltype, moltype);
+        assert_eq!(failed_download.md5sum, "");
+        assert_eq!(failed_download.download_filename, "");
+        assert_eq!(failed_download.url, "");
+        assert_eq!(failed_download.range, "");
+    }
+
+    #[test]
+    fn test_failed_download_from_accession_data() {
+        let url_info = vec![
+            UrlInfo {
+                url: Url::parse("http://example.com/file1").unwrap(),
+                md5sum: Some("abcd1234".to_string()),
+                range: Some((10, 20)),
+            },
+            UrlInfo {
+                url: Url::parse("http://example.com/file2").unwrap(),
+                md5sum: None,
+                range: Some((30, 40)),
+            },
+        ];
+
+        let acc_data = AccessionData {
+            accession: "ACC123".to_string(),
+            name: "Sample Name".to_string(),
+            moltype: InputMolType::Dna,
+            url_info,
+            download_filename: Some("file.fasta".to_string()),
+        };
+
+        let failed_download = FailedDownload::from_accession_data(&acc_data);
+
+        assert_eq!(failed_download.accession, "ACC123");
+        assert_eq!(failed_download.name, "Sample Name");
+        assert_eq!(failed_download.moltype, "DNA");
+        assert_eq!(failed_download.md5sum, "abcd1234;");
+        assert_eq!(failed_download.download_filename, "file.fasta");
+        assert_eq!(
+            failed_download.url,
+            "http://example.com/file1;http://example.com/file2"
+        );
+        assert_eq!(failed_download.range, "10-20;30-40");
+    }
+
+    #[test]
+    fn test_parse_to_separated_string() {
+        let url_info = vec![
+            UrlInfo {
+                url: Url::parse("http://example.com/file1").unwrap(),
+                md5sum: Some("abcd1234".to_string()),
+                range: Some((10, 20)),
+            },
+            UrlInfo {
+                url: Url::parse("http://example.com/file2").unwrap(),
+                md5sum: None,
+                range: Some((30, 40)),
+            },
+        ];
+
+        let md5sum_result =
+            FailedDownload::parse_to_separated_string(&url_info, |info| info.md5sum.clone());
+        assert_eq!(md5sum_result, "abcd1234;");
+
+        let url_result =
+            FailedDownload::parse_to_separated_string(&url_info, |info| Some(info.url.to_string()));
+        assert_eq!(
+            url_result,
+            "http://example.com/file1;http://example.com/file2"
+        );
+
+        let range_result = FailedDownload::parse_to_separated_string(&url_info, |info| {
+            info.range.map(|(start, end)| format!("{}-{}", start, end))
+        });
+        assert_eq!(range_result, "10-20;30-40");
+    }
+
+    #[test]
+    fn test_parse_to_separated_string_2() {
+        let url_info = vec![
+            UrlInfo {
+                url: Url::parse("http://example.com/file1").unwrap(),
+                md5sum: Some("abcd1234".to_string()),
+                range: Some((10, 20)),
+            },
+            UrlInfo {
+                url: Url::parse("http://example.org/file2").unwrap(),
+                md5sum: Some("efgh5678".to_string()),
+                range: Some((30, 40)),
+            },
+            UrlInfo {
+                url: Url::parse("http://example.net/file3").unwrap(),
+                md5sum: Some("ijkl9012".to_string()),
+                range: Some((50, 60)),
+            },
+        ];
+
+        let md5sum_result =
+            FailedDownload::parse_to_separated_string(&url_info, |info| info.md5sum.clone());
+        assert_eq!(md5sum_result, "abcd1234;efgh5678;ijkl9012");
+
+        let url_result =
+            FailedDownload::parse_to_separated_string(&url_info, |info| Some(info.url.to_string()));
+        assert_eq!(
+            url_result,
+            "http://example.com/file1;http://example.org/file2;http://example.net/file3"
+        );
+
+        let range_result = FailedDownload::parse_to_separated_string(&url_info, |info| {
+            info.range.map(|(start, end)| format!("{}:{}", start, end))
+        });
+        assert_eq!(range_result, "10:20;30:40;50:60");
+    }
+}
diff --git a/tests/sourmash_tst_utils.py b/tests/sourmash_tst_utils.py
index 4bbc87f..aaaa37d 100644
--- a/tests/sourmash_tst_utils.py
+++ b/tests/sourmash_tst_utils.py
@@ -7,8 +7,7 @@
 import collections
 import pprint
 
-import pkg_resources
-from pkg_resources import Requirement, resource_filename, ResolutionError
+import importlib.metadata
 import traceback
 from io import open  # pylint: disable=redefined-builtin
 from io import StringIO
@@ -43,23 +42,13 @@ def _runscript(scriptname):
     namespace = {"__name__": "__main__"}
     namespace['sys'] = globals()['sys']
 
-    try:
-        pkg_resources.load_entry_point("sourmash", 'console_scripts', scriptname)()
-        return 0
-    except pkg_resources.ResolutionError:
-        pass
-
-    path = scriptpath()
-
-    scriptfile = os.path.join(path, scriptname)
-    if os.path.isfile(scriptfile):
-        if os.path.isfile(scriptfile):
-            exec(  # pylint: disable=exec-used
-                compile(open(scriptfile).read(), scriptfile, 'exec'),
-                namespace)
-            return 0
-
-    return -1
+    entry_points = importlib.metadata.entry_points(
+        group="console_scripts", name="sourmash"
+    )
+    assert len(entry_points) == 1
+    smash_cli = tuple(entry_points)[0].load()
+    smash_cli()
+    return 0
 
 
 ScriptResults = collections.namedtuple('ScriptResults',
diff --git a/tests/test-data/GCA_000175535.1_ASM17553v1_genomic.1-50000.fna.gz b/tests/test-data/GCA_000175535.1_ASM17553v1_genomic.1-50000.fna.gz
new file mode 100644
index 0000000..c693f97
Binary files /dev/null and b/tests/test-data/GCA_000175535.1_ASM17553v1_genomic.1-50000.fna.gz differ
diff --git a/tests/test-data/GCA_000175535.1_ASM17553v1_genomic.50000-100000.fna.gz b/tests/test-data/GCA_000175535.1_ASM17553v1_genomic.50000-100000.fna.gz
new file mode 100644
index 0000000..a872cef
Binary files /dev/null and b/tests/test-data/GCA_000175535.1_ASM17553v1_genomic.50000-100000.fna.gz differ
diff --git a/tests/test-data/acc-merged-md5sums.csv b/tests/test-data/acc-merged-md5sums.csv
new file mode 100644
index 0000000..b7bd78a
--- /dev/null
+++ b/tests/test-data/acc-merged-md5sums.csv
@@ -0,0 +1,3 @@
+accession,name,moltype,md5sum,download_filename,url,range
+both,both name,dna,47b9fb20c51f0552b87db5d44d5d4566;a1a8f1c6dc56999c73fe298871c963d1,both.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_genomic.fna.gz; https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz,
+
diff --git a/tests/test-data/acc-merged.csv b/tests/test-data/acc-merged.csv
new file mode 100644
index 0000000..dd06da1
--- /dev/null
+++ b/tests/test-data/acc-merged.csv
@@ -0,0 +1,3 @@
+accession,name,moltype,md5sum,download_filename,url,range
+both,both name,dna,,both.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_genomic.fna.gz; https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz,
+
diff --git a/tests/test-data/acc-url-md5sum.csv b/tests/test-data/acc-url-md5sum.csv
index 542cc8a..ab39a15 100644
--- a/tests/test-data/acc-url-md5sum.csv
+++ b/tests/test-data/acc-url-md5sum.csv
@@ -1,3 +1,3 @@
-accession,name,moltype,md5sum,download_filename,url
-GCA_000961135.2,GCA_000961135.2 Candidatus Aramenus sulfurataquae isolate AZ1-454,dna,47b9fb20c51f0552b87db5d44d5d4566,GCA_000961135.2_genomic.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_genomic.fna.gz
-GCA_000175535.1,GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14,dna,b1234567,GCA_000175535.1_genomic.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz
+accession,name,moltype,md5sum,download_filename,url,range
+GCA_000961135.2,GCA_000961135.2 Candidatus Aramenus sulfurataquae isolate AZ1-454,dna,47b9fb20c51f0552b87db5d44d5d4566,GCA_000961135.2_genomic.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_genomic.fna.gz,
+GCA_000175535.1,GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14,dna,b1234567,GCA_000175535.1_genomic.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz,
diff --git a/tests/test-data/acc-url-range.csv b/tests/test-data/acc-url-range.csv
new file mode 100644
index 0000000..b4069df
--- /dev/null
+++ b/tests/test-data/acc-url-range.csv
@@ -0,0 +1,3 @@
+accession,name,moltype,md5sum,download_filename,url,range
+GCA_000175535.1_first50kb,GCA_000175535.1_first50kb Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14,dna,a1a8f1c6dc56999c73fe298871c963d1,GCA_000175535.1_genomic_first50kb.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz,1-50000
+GCA_000175535.1_second50kb,GCA_000175535.1_second50kb Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14,dna,a1a8f1c6dc56999c73fe298871c963d1,GCA_000175535.1_genomic_second50kb.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz,50000-100000
diff --git a/tests/test-data/acc-url.csv b/tests/test-data/acc-url.csv
index 8c3a87e..133d968 100644
--- a/tests/test-data/acc-url.csv
+++ b/tests/test-data/acc-url.csv
@@ -1,4 +1,4 @@
-accession,name,moltype,md5sum,download_filename,url
-GCA_000961135.2,GCA_000961135.2 Candidatus Aramenus sulfurataquae isolate AZ1-454,dna,47b9fb20c51f0552b87db5d44d5d4566,GCA_000961135.2_genomic.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_genomic.fna.gz
-GCA_000961135.2,GCA_000961135.2 Candidatus Aramenus sulfurataquae isolate AZ1-454,protein,fb7920fb8f3cf5d6ab9b6b754a5976a4,GCA_000961135.2_protein.urlsketch.faa.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_protein.faa.gz
-GCA_000175535.1,GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14,dna,a1a8f1c6dc56999c73fe298871c963d1,GCA_000175535.1_genomic.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz
+accession,name,moltype,md5sum,download_filename,url,range
+GCA_000961135.2,GCA_000961135.2 Candidatus Aramenus sulfurataquae isolate AZ1-454,dna,47b9fb20c51f0552b87db5d44d5d4566,GCA_000961135.2_genomic.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_genomic.fna.gz,
+GCA_000961135.2,GCA_000961135.2 Candidatus Aramenus sulfurataquae isolate AZ1-454,protein,fb7920fb8f3cf5d6ab9b6b754a5976a4,GCA_000961135.2_protein.urlsketch.faa.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_protein.faa.gz,
+GCA_000175535.1,GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14,dna,a1a8f1c6dc56999c73fe298871c963d1,GCA_000175535.1_genomic.urlsketch.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz,
diff --git a/tests/test-data/subseqs.zip b/tests/test-data/subseqs.zip
new file mode 100644
index 0000000..56d7c79
Binary files /dev/null and b/tests/test-data/subseqs.zip differ
diff --git a/tests/test_gbsketch.py b/tests/test_gbsketch.py
index 9f03e7e..895390c 100644
--- a/tests/test_gbsketch.py
+++ b/tests/test_gbsketch.py
@@ -66,13 +66,14 @@ def test_gbsketch_simple(runtmp, capfd):
         fail_lines = failF.readlines()
         print(fail_lines)
         assert len(fail_lines) == 2
-        assert fail_lines[0] == "accession,name,moltype,md5sum,download_filename,url\n"
-        acc, name, moltype, md5sum, download_filename, url = fail_lines[1].strip().split(',')
+        assert fail_lines[0] == "accession,name,moltype,md5sum,download_filename,url,range\n"
+        acc, name, moltype, md5sum, download_filename, url, range = fail_lines[1].strip().split(',')
         assert acc == "GCA_000175535.1"
         assert name == "GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14"
         assert moltype == "protein"
         assert download_filename == "GCA_000175535.1_protein.faa.gz"
         assert url == "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_protein.faa.gz"
+        assert range == ""
 
 
 def test_gbsketch_manifest(runtmp, capfd):
@@ -605,13 +606,14 @@ def test_gbsketch_protein_dayhoff_hp(runtmp):
         fail_lines = failF.readlines()
         print(fail_lines)
         assert len(fail_lines) == 2
-        assert fail_lines[0] == "accession,name,moltype,md5sum,download_filename,url\n"
-        acc, name, moltype, md5sum, download_filename, url = fail_lines[1].strip().split(',')
+        assert fail_lines[0] == "accession,name,moltype,md5sum,download_filename,url,range\n"
+        acc, name, moltype, md5sum, download_filename, url, range = fail_lines[1].strip().split(',')
         assert acc == "GCA_000175535.1"
         assert name == "GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14"
         assert moltype == "protein"
         assert download_filename == "GCA_000175535.1_protein.faa.gz"
         assert url == "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_protein.faa.gz"
+        assert range == ""
 
 
 def test_gbsketch_simple_batched_single(runtmp, capfd):
diff --git a/tests/test_urlsketch.py b/tests/test_urlsketch.py
index 4a25557..7e9b704 100644
--- a/tests/test_urlsketch.py
+++ b/tests/test_urlsketch.py
@@ -3,6 +3,8 @@
 """
 import os
 import pytest
+import gzip
+import screed
 
 import csv
 import sourmash
@@ -60,15 +62,16 @@ def test_urlsketch_simple(runtmp):
     assert os.path.exists(failed)
     with open(failed, 'r') as failF:
         header = next(failF).strip()
-        assert header == "accession,name,moltype,md5sum,download_filename,url"
+        assert header == "accession,name,moltype,md5sum,download_filename,url,range"
         for line in failF:
             print(line)
-            acc, name, moltype, md5sum, download_filename, url = line.strip().split(',')
+            acc, name, moltype, md5sum, download_filename, url, range = line.strip().split(',')
             assert acc == "GCA_000175535.1"
             assert name == "GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14"
             assert moltype == "protein"
             assert download_filename == "GCA_000175535.1_protein.faa.gz"
             assert url == "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_protein.faa.gz"
+            assert range == ""
 
 
 def test_urlsketch_manifest(runtmp, capfd):
@@ -151,6 +154,46 @@ def test_urlsketch_save_fastas(runtmp):
                 assert sig.md5sum() == ss3.md5sum()
 
 
+def test_urlsketch_save_fastas_no_append_across_runs(runtmp):
+    # make sure we overwrite files on subsequent runs (not append to existing)
+    acc_csv = get_test_data('acc-url.csv')
+    output = runtmp.output('simple.zip')
+    failed = runtmp.output('failed.csv')
+    out_dir = runtmp.output('out_fastas')
+
+    # run once
+    runtmp.sourmash('scripts', 'urlsketch', acc_csv, '-o', output,
+                    '--failed', failed, '-r', '1', '--fastas', out_dir, '--keep-fasta',
+                    '--param-str', "dna,k=31,scaled=1000", '-p', "protein,k=10,scaled=200")
+
+    # check out fastas exist
+    assert os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+    fa_files = os.listdir(out_dir)
+    assert set(fa_files) == set(['GCA_000175535.1_genomic.urlsketch.fna.gz', 'GCA_000961135.2_protein.urlsketch.faa.gz', 'GCA_000961135.2_genomic.urlsketch.fna.gz'])
+
+    # Get the file size for each file
+    fsizes = set()
+    for fa_file in fa_files:
+        file_path = os.path.join(out_dir, fa_file)
+        file_size = os.path.getsize(file_path)
+        print(f"File: {fa_file}, Size: {file_size} bytes")
+        fsizes.add(file_size)
+
+    # run a second time
+    runtmp.sourmash('scripts', 'urlsketch', acc_csv, '-o', output,
+                    '--failed', failed, '-r', '1', '--fastas', out_dir, '--keep-fasta',
+                    '--param-str', "dna,k=31,scaled=1000", '-p', "protein,k=10,scaled=200")
+
+    fa_files2 = os.listdir(out_dir)
+    assert set(fa_files2) == set(['GCA_000175535.1_genomic.urlsketch.fna.gz', 'GCA_000961135.2_protein.urlsketch.faa.gz', 'GCA_000961135.2_genomic.urlsketch.fna.gz'])
+    for fa_file in fa_files2:
+        file_path = os.path.join(out_dir, fa_file)
+        file_size = os.path.getsize(file_path)
+        print(f"File: {fa_file}, Size: {file_size} bytes")
+        assert file_size in fsizes
+
+
 def test_urlsketch_download_only(runtmp, capfd):
     acc_csv = get_test_data('acc-url.csv')
     output = runtmp.output('simple.zip')
@@ -268,7 +311,7 @@ def test_urlsketch_empty_accfile(runtmp, capfd):
         
     captured = capfd.readouterr()
     print(captured.err)
-    assert 'Error: Invalid column names in CSV file. Columns should be: ["accession", "name", "moltype", "md5sum", "download_filename", "url"]' in captured.err
+    assert 'Error: Invalid column names in CSV file. Columns should be: ["accession", "name", "moltype", "md5sum", "download_filename", "url", "range"]' in captured.err
 
 
 def test_urlsketch_bad_acc_fail(runtmp, capfd):
@@ -326,13 +369,14 @@ def test_urlsketch_from_gbsketch_failed(runtmp, capfd):
     with open(failed, 'r') as failF:
         fail_lines = failF.readlines()
         assert len(fail_lines) == 2
-        assert fail_lines[0] == "accession,name,moltype,md5sum,download_filename,url\n"
-        acc, name, moltype, md5sum, download_filename, url = fail_lines[1].strip().split(',')
+        assert fail_lines[0] == "accession,name,moltype,md5sum,download_filename,url,range\n"
+        acc, name, moltype, md5sum, download_filename, url, range = fail_lines[1].strip().split(',')
         assert acc == "GCA_000175535.1"
         assert name == "GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14"
         assert moltype == "protein"
         assert download_filename == "GCA_000175535.1_protein.faa.gz"
         assert url == "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_protein.faa.gz"
+        assert range == ""
     assert not runtmp.last_result.out # stdout should be empty
 
     out2 = runtmp.output('failed-retry.zip')
@@ -352,15 +396,16 @@ def test_urlsketch_from_gbsketch_failed(runtmp, capfd):
     assert os.path.exists(fail2)
     with open(fail2, 'r') as failF:
         header = next(failF).strip()
-        assert header == "accession,name,moltype,md5sum,download_filename,url"
+        assert header == "accession,name,moltype,md5sum,download_filename,url,range"
         for line in failF:
             print(line)
-            acc, name, moltype, md5sum, download_filename, url = line.strip().split(',')
+            acc, name, moltype, md5sum, download_filename, url, range = line.strip().split(',')
             assert acc == "GCA_000175535.1"
             assert name == "GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14"
             assert moltype == "protein"
             assert download_filename == "GCA_000175535.1_protein.faa.gz"
             assert url == "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_protein.faa.gz"
+            assert range == ""
 
 
 def test_zip_file_permissions(runtmp):
@@ -435,7 +480,7 @@ def test_urlsketch_protein_dayhoff_hp(runtmp):
         fail_lines = failF.readlines()
         print(fail_lines)
         assert len(fail_lines) == 1
-        assert fail_lines[0] == "accession,name,moltype,md5sum,download_filename,url\n"
+        assert fail_lines[0] == "accession,name,moltype,md5sum,download_filename,url,range\n"
 
 
 def test_urlsketch_md5sum_mismatch_checksum_file(runtmp, capfd):
@@ -513,16 +558,17 @@ def test_urlsketch_md5sum_mismatch_no_checksum_file(runtmp, capfd):
     assert os.path.exists(failed)
     with open(failed, 'r') as failF:
         header = next(failF).strip()
-        assert header == "accession,name,moltype,md5sum,download_filename,url"
+        assert header == "accession,name,moltype,md5sum,download_filename,url,range"
         for line in failF:
             print(line)
-            acc, name, moltype, md5sum, download_filename, url= line.strip().split(',')
+            acc, name, moltype, md5sum, download_filename, url, range= line.strip().split(',')
             assert acc == "GCA_000175535.1"
             assert name == "GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14"
             assert moltype == "DNA"
             assert md5sum == "b1234567"
             assert download_filename == "GCA_000175535.1_genomic.urlsketch.fna.gz"
             assert url == "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz"
+            assert range == ""
 
 
 def test_urlsketch_simple_batched(runtmp, capfd):
@@ -534,6 +580,7 @@ def test_urlsketch_simple_batched(runtmp, capfd):
     out1 = runtmp.output('simple.1.zip')
     out2 = runtmp.output('simple.2.zip')
     out3 = runtmp.output('simple.3.zip')
+    out4 = runtmp.output('simple.4.zip')
 
     sig1 = get_test_data('GCA_000175535.1.sig.gz')
     sig2 = get_test_data('GCA_000961135.2.sig.gz')
@@ -550,6 +597,7 @@ def test_urlsketch_simple_batched(runtmp, capfd):
     assert os.path.exists(out1)
     assert os.path.exists(out2)
     assert os.path.exists(out3)
+    assert not os.path.exists(out4)
     assert not os.path.exists(output) # for now, orig output file should be empty.
     captured = capfd.readouterr()
     print(captured.err)
@@ -581,7 +629,7 @@ def test_urlsketch_simple_batch_restart(runtmp, capfd):
     out1 = runtmp.output('simple.1.zip')
     out2 = runtmp.output('simple.2.zip')
     out3 = runtmp.output('simple.3.zip')
-
+    out4 = runtmp.output('simple.4.zip')
 
     sig1 = get_test_data('GCA_000175535.1.sig.gz')
     sig2 = get_test_data('GCA_000961135.2.sig.gz')
@@ -592,17 +640,20 @@ def test_urlsketch_simple_batch_restart(runtmp, capfd):
     ss4 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')
 
     # first, cat sig2 into an output file that will trick gbsketch into thinking it's a prior batch
+    # need to actually rename it first, so it will match sig that would have been written
+
     runtmp.sourmash('sig', 'cat', sig2, '-o', out1)
     assert os.path.exists(out1)
 
     runtmp.sourmash('scripts', 'urlsketch', acc_csv, '-o', output,
-                    '--failed', failed, '-r', '1', '--checksum-fail', ch_fail,
+                    '--failed', failed, '-r', '5', '-n', "1", '--checksum-fail', ch_fail,
                     '--param-str', "dna,k=31,scaled=1000,abund", '-p', "protein,k=10,scaled=200",
                     '--batch-size', '1')
 
     assert os.path.exists(out1)
     assert os.path.exists(out2)
     assert os.path.exists(out3)
+    assert not os.path.exists(out4)
     assert not os.path.exists(output) # for now, orig output file should be empty.
     captured = capfd.readouterr()
     print(captured.err)
@@ -642,14 +693,14 @@ def test_urlsketch_negative_batch_size(runtmp):
 def test_urlsketch_simple_batch_restart_with_incomplete_zip(runtmp, capfd):
     # test restart with complete + incomplete zipfile batches
     acc_csv = get_test_data('acc-url.csv')
-    output = runtmp.output('simple.zip')
+    output = runtmp.output('restart.zip')
     failed = runtmp.output('failed.csv')
     ch_fail = runtmp.output('checksum_dl_failed.csv')
 
-    out1 = runtmp.output('simple.1.zip')
-    out2 = runtmp.output('simple.2.zip')
-    out3 = runtmp.output('simple.3.zip')
-
+    out1 = runtmp.output('restart.1.zip')
+    out2 = runtmp.output('restart.2.zip')
+    out3 = runtmp.output('restart.3.zip')
+    out4 = runtmp.output('restart.4.zip')
 
     sig1 = get_test_data('GCA_000175535.1.sig.gz')
     sig2 = get_test_data('GCA_000961135.2.sig.gz')
@@ -668,13 +719,14 @@ def test_urlsketch_simple_batch_restart_with_incomplete_zip(runtmp, capfd):
         f.write(b"This is not a valid zip file!")
 
     runtmp.sourmash('scripts', 'urlsketch', acc_csv, '-o', output,
-                    '--failed', failed, '-r', '1', '--checksum-fail', ch_fail,
+                    '--failed', failed, '-r', '5', '-n', "1", '--checksum-fail', ch_fail,
                     '--param-str', "dna,k=31,scaled=1000,abund", '-p', "protein,k=10,scaled=200",
                     '--batch-size', '1')
 
     assert os.path.exists(out1)
     assert os.path.exists(out2)
     assert os.path.exists(out3)
+    assert not os.path.exists(out4)
     assert not os.path.exists(output) # for now, orig output file should be empty.
     captured = capfd.readouterr()
     print(captured.err)
@@ -787,3 +839,618 @@ def test_urlsketch_simple_skipmer(runtmp, capfd):
                             assert (
                                 siginfo["molecule"] == expected["moltype"]
                             ), f"Moltype mismatch: {siginfo['molecule']}"
+
+
+def test_urlsketch_simple_merged(runtmp):
+    acc_csv = get_test_data('acc-merged.csv')
+    output = runtmp.output('merged.zip')
+    failed = runtmp.output('failed.csv')
+
+    sig1 = get_test_data('GCA_000175535.1.sig.gz')
+    sig2 = get_test_data('GCA_000961135.2.sig.gz')
+    merged_sig = runtmp.output("sigmerge.zip")
+
+    # create merged signature
+    runtmp.sourmash("sig", "merge", "-k", "31", sig1, sig2, "--set-name", "both name", '-o', merged_sig)
+    msigidx = sourmash.load_file_as_index(merged_sig)
+    msig = list(msigidx.signatures())[0]
+    print(msig.name)
+
+    runtmp.sourmash('scripts', 'urlsketch', acc_csv, '-o', output,
+                    '--failed', failed, '-r', '1',
+                    '--param-str', "dna,k=31,scaled=1000")
+
+    assert os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+
+    idx = sourmash.load_file_as_index(output)
+    sigs = list(idx.signatures())
+
+    assert len(sigs) == 1
+    sig = sigs[0]
+    assert sig.name == msig.name == "both name"
+    print(msig.md5sum())
+    assert sig.md5sum() == msig.md5sum()
+    assert sig.minhash.moltype == msig.minhash.moltype == "DNA"
+    assert os.path.exists(failed)
+
+
+def test_urlsketch_simple_merged_with_md5sums(runtmp):
+    acc_csv = get_test_data('acc-merged-md5sums.csv')
+    output = runtmp.output('merged.zip')
+    failed = runtmp.output('failed.csv')
+
+    sig1 = get_test_data('GCA_000175535.1.sig.gz')
+    sig2 = get_test_data('GCA_000961135.2.sig.gz')
+    merged_sig = runtmp.output("sigmerge.zip")
+
+    # create merged signature
+    runtmp.sourmash("sig", "merge", "-k", "31", sig1, sig2, "--set-name", "both name", '-o', merged_sig)
+    msigidx = sourmash.load_file_as_index(merged_sig)
+    msig = list(msigidx.signatures())[0]
+    print(msig.name)
+
+    runtmp.sourmash('scripts', 'urlsketch', acc_csv, '-o', output,
+                    '--failed', failed, '-r', '1',
+                    '--param-str', "dna,k=31,scaled=1000")
+
+    assert os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+
+    idx = sourmash.load_file_as_index(output)
+    sigs = list(idx.signatures())
+
+    assert len(sigs) == 1
+    sig = sigs[0]
+    assert sig.name == msig.name == "both name"
+    print(msig.md5sum())
+    assert sig.md5sum() == msig.md5sum()
+    assert sig.minhash.moltype == msig.minhash.moltype == "DNA"
+    assert os.path.exists(failed)
+
+
+def test_urlsketch_simple_merged_keep_fasta(runtmp):
+    acc_csv = get_test_data('acc-merged.csv')
+    output = runtmp.output('merged.zip')
+    failed = runtmp.output('failed.csv')
+    out_dir = runtmp.output('out_fastas')
+
+    sig1 = get_test_data('GCA_000175535.1.sig.gz')
+    sig2 = get_test_data('GCA_000961135.2.sig.gz')
+    merged_sig = runtmp.output("sigmerge.zip")
+
+    # create merged signature
+    runtmp.sourmash("sig", "merge", "-k", "31", sig1, sig2, "--set-name", "both name", '-o', merged_sig)
+    msigidx = sourmash.load_file_as_index(merged_sig)
+    msig = list(msigidx.signatures())[0]
+    print(msig.name)
+
+    runtmp.sourmash('scripts', 'urlsketch', acc_csv, '-o', output,
+                    '--failed', failed, '-r', '1', '--keep-fasta',
+                    '--fastas', out_dir,
+                    '--param-str', "dna,k=31,scaled=1000")
+
+    assert os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+    # check fasta files are present
+    fa_files = os.listdir(out_dir)
+    print(fa_files)
+    assert fa_files == ['both.urlsketch.fna.gz']
+
+    # check fasta files have records from both entries
+    n_expected_records = 104
+    n_records = 0
+    # check one record from each
+    expected_names = ["ACUJ01000001.1 Chlamydia muridarum MopnTet14 chromosome, whole genome shotgun sequence",
+                      "JZWS02000016.1 MAG: Candidatus Aramenus sulfurataquae isolate AZ1-454 NODE_87_length_15535_cov_30.701232, whole genome shotgun sequence"]
+    rec_names = []
+    with screed.open(os.path.join(out_dir, fa_files[0])) as inF:
+        for rec in inF:
+            n_records +=1
+            rec_names.append(rec.name)
+
+    assert n_records == n_expected_records
+    assert all(n in rec_names for n in expected_names)
+
+    idx = sourmash.load_file_as_index(output)
+    sigs = list(idx.signatures())
+
+    assert len(sigs) == 1
+    sig = sigs[0]
+    assert sig.name == msig.name == "both name"
+    print(msig.md5sum())
+    assert sig.md5sum() == msig.md5sum()
+    assert sig.minhash.moltype == msig.minhash.moltype == "DNA"
+    assert os.path.exists(failed)
+
+
+def test_urlsketch_simple_merged_keep_fasta_path_in_filename(runtmp):
+    acc_csv = get_test_data('acc-merged.csv')
+    mod_csv = runtmp.output('acc-merged-filepath.csv')
+    output = runtmp.output('merged.zip')
+    failed = runtmp.output('failed.csv')
+    out_dir = runtmp.output('out_fastas')
+
+    # open acc-merged.csv and prepend "/unavailable-path/subdir/" to the "download_filename" column
+    with open(acc_csv, 'r') as infile, open(mod_csv, 'w', newline='') as outfile:
+        reader = csv.DictReader(infile)
+        fieldnames = reader.fieldnames
+        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in reader:
+            row['download_filename'] = f"unavailable-path/subdir/{row['download_filename']}"
+            writer.writerow(row)
+
+    sig1 = get_test_data('GCA_000175535.1.sig.gz')
+    sig2 = get_test_data('GCA_000961135.2.sig.gz')
+    merged_sig = runtmp.output("sigmerge.zip")
+
+    # create merged signature
+    runtmp.sourmash("sig", "merge", "-k", "31", sig1, sig2, "--set-name", "both name", '-o', merged_sig)
+    msigidx = sourmash.load_file_as_index(merged_sig)
+    msig = list(msigidx.signatures())[0]
+    print(msig.name)
+
+    runtmp.sourmash('scripts', 'urlsketch', mod_csv, '-o', output,
+                    '--failed', failed, '-r', '1', '--keep-fasta',
+                    '--fastas', out_dir,
+                    '--param-str', "dna,k=31,scaled=1000")
+
+    assert os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+    # check fasta files are present
+    fa_files = []
+    for root, dirs, files in os.walk(out_dir):
+        for file in files:
+            if file.endswith('fna.gz'):
+                fa_files.append(os.path.relpath(os.path.join(root, file), out_dir))
+    print(fa_files)
+    assert fa_files == ['unavailable-path/subdir/both.urlsketch.fna.gz']
+
+
+def test_urlsketch_simple_merged_incorrect_md5sum_checksum_failure(runtmp):
+    acc_csv = get_test_data('acc-merged-md5sums.csv')
+    mod_csv = runtmp.output('acc-merged_incorrect_md5.csv')
+    output = runtmp.output('merged.zip')
+    failed = runtmp.output('failed.csv')
+    ch_failed = runtmp.output('ch-failed.csv')
+    out_dir = runtmp.output('out_fastas')
+
+    # open file and write incorrect md5sum
+    with open(acc_csv, 'r') as infile, open(mod_csv, 'w', newline='') as outfile:
+        reader = csv.DictReader(infile)
+        fieldnames = reader.fieldnames
+        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in reader:
+            row['md5sum'] = row['md5sum'][2:] # take off first digit from first md5sum
+            print(row)
+            writer.writerow(row)
+
+    with pytest.raises(utils.SourmashCommandFailed):
+        runtmp.sourmash('scripts', 'urlsketch', mod_csv, '-o', output,
+                    '--failed', failed, '-r', '1', '--keep-fasta',
+                    '--fastas', out_dir, '--checksum-fail', ch_failed,
+                    '--param-str', "dna,k=31,scaled=1000")
+
+    assert not os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+    # check failure file
+    assert os.path.exists(ch_failed)
+    with open(ch_failed, 'r') as failF:
+        header = next(failF).strip()
+        print(header)
+        assert header == "accession,name,moltype,md5sum_url,download_filename,url,expected_md5sum,reason"
+        for line in failF:
+            print(line)
+            acc, name, moltype, md5sum_url, download_filename, url, expected_md5sum, reason = line.strip().split(',')
+            assert acc == "both"
+            assert name == "both name"
+            assert moltype == "DNA"
+            assert download_filename == "both.urlsketch.fna.gz"
+            assert expected_md5sum == "b9fb20c51f0552b87db5d44d5d4566"
+            assert reason == "MD5 hash does not match. Expected: 'b9fb20c51f0552b87db5d44d5d4566'; Found: '47b9fb20c51f0552b87db5d44d5d4566'"
+            assert url == "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_genomic.fna.gz"
+    with open(failed, 'r') as fails:
+        header = next(fails).strip()
+        print(header)
+        assert header == "accession,name,moltype,md5sum,download_filename,url,range"
+        for line in fails:
+            print(line)
+            acc, name, moltype, md5sum, download_filename, url, range = line.strip().split(',')
+            assert acc == "both"
+            assert name == "both name"
+            assert moltype == "DNA"
+            assert md5sum == "b9fb20c51f0552b87db5d44d5d4566;a1a8f1c6dc56999c73fe298871c963d1"
+            assert download_filename == "both.urlsketch.fna.gz"
+            assert url ==  "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_genomic.fna.gz;https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz"
+            assert range == ""
+
+
+def test_urlsketch_with_range(runtmp):
+    acc_csv = get_test_data('acc-url-range.csv')
+    subseqs = get_test_data('subseqs.zip')
+    output = runtmp.output('range.zip')
+    failed = runtmp.output('failed.csv')
+
+    # open subseq sigs
+    idx = sourmash.load_file_as_index(subseqs)
+    siglist = list(idx.signatures())
+    ss1 = siglist[0]
+    ss2 = siglist[1]
+
+    runtmp.sourmash('scripts', 'urlsketch', acc_csv, '-o', output,
+                    '--failed', failed, '-r', '1',
+                    '--param-str', "dna,k=31,scaled=100")
+
+    assert os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+
+    idx = sourmash.load_file_as_index(output)
+    sigs = list(idx.signatures())
+
+    assert len(sigs) == 2
+    for sig in sigs:
+        ident = sig.name.split(' ')[0]
+        assert ident in ["GCA_000175535.1_first50kb", "GCA_000175535.1_second50kb"]
+        print(ident)
+        if ident == "GCA_000175535.1_first50kb":
+            assert sig.md5sum() == ss1.md5sum()
+        if ident == "GCA_000175535.1_second50kb":
+            assert sig.md5sum() == ss2.md5sum()
+    assert os.path.exists(failed)
+
+
+def test_urlsketch_with_range_keep_fasta(runtmp):
+    acc_csv = get_test_data('acc-url-range.csv')
+    subseqs = get_test_data('subseqs.zip')
+    first50kb = get_test_data('GCA_000175535.1_ASM17553v1_genomic.1-50000.fna.gz')
+    second50kb = get_test_data('GCA_000175535.1_ASM17553v1_genomic.50000-100000.fna.gz')
+    output = runtmp.output('range.zip')
+    failed = runtmp.output('failed.csv')
+    out_dir = runtmp.output('out_fastas')
+
+    # open subseq sigs
+    idx = sourmash.load_file_as_index(subseqs)
+    siglist = list(idx.signatures())
+    ss1 = siglist[0]
+    ss2 = siglist[1]
+
+    runtmp.sourmash('scripts', 'urlsketch', acc_csv, '-o', output,
+                    '--failed', failed, '-r', '1', '--keep-fasta',
+                    '--fastas', out_dir,
+                    '--param-str', "dna,k=31,scaled=100")
+
+    assert os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+    # check fasta files are present
+    fa_files = os.listdir(out_dir)
+    print(fa_files)
+    assert set(fa_files) == set(['GCA_000175535.1_genomic_first50kb.urlsketch.fna.gz', 'GCA_000175535.1_genomic_second50kb.urlsketch.fna.gz'])
+
+     # Compare the contents of the generated FASTA files to the expected ones
+    for generated_file, expected_file in [
+        ('GCA_000175535.1_genomic_first50kb.urlsketch.fna.gz', first50kb),
+        ('GCA_000175535.1_genomic_second50kb.urlsketch.fna.gz', second50kb)
+    ]:
+        generated_path = os.path.join(out_dir, generated_file)
+
+        # Read the records from both files using screed
+        gen_records = set((record.name, record.sequence) for record in screed.open(generated_path))
+        exp_records = set((record.name, record.sequence) for record in screed.open(expected_file))
+
+        # Assert that the records are identical
+        assert gen_records == exp_records
+
+    idx = sourmash.load_file_as_index(output)
+    sigs = list(idx.signatures())
+
+    assert len(sigs) == 2
+    for sig in sigs:
+        ident = sig.name.split(' ')[0]
+        assert ident in ["GCA_000175535.1_first50kb", "GCA_000175535.1_second50kb"]
+        print(ident)
+        if ident == "GCA_000175535.1_first50kb":
+            assert sig.md5sum() == ss1.md5sum()
+        if ident == "GCA_000175535.1_second50kb":
+            assert sig.md5sum() == ss2.md5sum()
+    assert os.path.exists(failed)
+
+
+def test_urlsketch_with_range_improper_range_1(runtmp, capfd):
+    acc_csv = get_test_data('acc-url-range.csv')
+    acc_mod = runtmp.output("acc-url-range-mod.csv")
+    subseqs = get_test_data('subseqs.zip')
+    output = runtmp.output('range.zip')
+    failed = runtmp.output('failed.csv')
+
+    # Modify the range in the acc_csv file
+    with open(acc_csv, 'r') as infile, open(acc_mod, 'w', newline='') as outfile:
+        reader = csv.DictReader(infile)
+        fieldnames = reader.fieldnames
+        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in reader:
+            if row['accession'] == 'GCA_000175535.1_second50kb':
+                row['range'] = '100000-10000000'
+            writer.writerow(row)
+
+    # open subseq sigs
+    idx = sourmash.load_file_as_index(subseqs)
+    siglist = list(idx.signatures())
+    ss1 = siglist[0]
+
+    runtmp.sourmash('scripts', 'urlsketch', acc_mod, '-o', output,
+                    '--failed', failed, '-r', '1',
+                    '--param-str', "dna,k=31,scaled=100")
+
+    assert os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+    assert os.path.exists(failed)
+    captured = capfd.readouterr()
+    print(captured.err)
+    assert "Error: Invalid range: start=100000, end=10000000, sequence length=1088736" in captured.err
+
+    idx = sourmash.load_file_as_index(output)
+    sigs = list(idx.signatures())
+
+    assert len(sigs) == 1
+    for sig in sigs:
+        ident = sig.name.split(' ')[0]
+        assert ident == "GCA_000175535.1_first50kb"
+        assert sig.md5sum() == ss1.md5sum()
+
+    with open(failed, 'r') as failF:
+        header = next(failF).strip()
+        print(header)
+        assert header == "accession,name,moltype,md5sum,download_filename,url,range"
+        for line in failF:
+            print(line)
+            acc, name, moltype, md5sum, download_filename, url, range = line.strip().split(',')
+            assert acc == "GCA_000175535.1_second50kb"
+            assert name == "GCA_000175535.1_second50kb"
+            assert moltype == "DNA"
+            assert md5sum == "b9fb20c51f0552b87db5d44d5d4566;a1a8f1c6dc56999c73fe298871c963d1"
+            assert download_filename == "both.urlsketch.fna.gz"
+            assert url ==  "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/961/135/GCA_000961135.2_ASM96113v2/GCA_000961135.2_ASM96113v2_genomic.fna.gz;https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz"
+            assert range == ""
+
+
+def test_urlsketch_with_range_improper_range_2(runtmp, capfd):
+    acc_csv = get_test_data('acc-url-range.csv')
+    acc_mod = runtmp.output("acc-url-range-mod.csv")
+    subseqs = get_test_data('subseqs.zip')
+    output = runtmp.output('range.zip')
+    failed = runtmp.output('failed.csv')
+
+    # Modify the range in the acc_csv file
+    with open(acc_csv, 'r') as infile, open(acc_mod, 'w', newline='') as outfile:
+        reader = csv.DictReader(infile)
+        fieldnames = reader.fieldnames
+        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in reader:
+            if row['accession'] == 'GCA_000175535.1_second50kb':
+                row['range'] = '-1-10000000'
+            writer.writerow(row)
+
+    # open subseq sigs
+    idx = sourmash.load_file_as_index(subseqs)
+    siglist = list(idx.signatures())
+    ss1 = siglist[0]
+
+    with pytest.raises(utils.SourmashCommandFailed):
+        runtmp.sourmash('scripts', 'urlsketch', acc_mod, '-o', output,
+                    '--failed', failed, '-r', '1',
+                    '--param-str', "dna,k=31,scaled=100")
+
+    captured = capfd.readouterr()
+    print(captured.err)
+    assert "Error: Invalid range format: -1-10000000" in captured.err
+
+
+def test_urlsketch_merged_ranged(runtmp):
+    acc_csv = get_test_data('acc-merged-md5sums.csv')
+    acc_mod = runtmp.output('acc-merged-md5sums-ranges.csv')
+    subseqs = get_test_data('subseqs.zip')
+    output = runtmp.output('range.zip')
+    failed = runtmp.output('failed.csv')
+    sketch_out = runtmp.output('sketch-subseqs.zip')
+    merged_out = runtmp.output('merged-subseqs.zip')
+    f1 = get_test_data("GCA_000175535.1_ASM17553v1_genomic.1-50000.fna.gz")
+    f2 = get_test_data("GCA_000175535.1_ASM17553v1_genomic.50000-100000.fna.gz")
+
+    # Modify the acc_csv file to add range values
+    with open(acc_csv, 'r') as infile, open(acc_mod, 'w', newline='') as outfile:
+        reader = csv.DictReader(infile)
+        fieldnames = reader.fieldnames
+        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in reader:
+            row['url'] = "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz;https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz"
+            row['md5sum'] = "a1a8f1c6dc56999c73fe298871c963d1;a1a8f1c6dc56999c73fe298871c963d1"
+            row['range'] = '1-50000;50000-100000'
+            writer.writerow(row)
+            print(row)
+
+    # sketch subseq files
+    runtmp.sourmash('sketch', "dna", f1, f2, '--name',
+                    'both name', '-o', sketch_out,
+                    '-p', "dna,k=31,scaled=100")
+
+    idx = sourmash.load_file_as_index(sketch_out)
+    sigs1 = list(idx.signatures())
+    assert len(sigs1) == 1
+    sketchsig = sigs1[0]
+
+    # merge subset sketches
+    runtmp.sourmash('sig', "merge", subseqs,'--set-name',
+                    'both name', '-o', merged_out)
+    idx = sourmash.load_file_as_index(merged_out)
+    sigs = list(idx.signatures())
+    assert len(sigs) == 1
+    mergesig = sigs[0]
+
+    # # run urlsketch
+    runtmp.sourmash('scripts', 'urlsketch', acc_mod, '-o', output,
+                    '--failed', failed, '-r', '1',
+                    '--param-str', "dna,k=31,scaled=100")
+
+    assert os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+
+    idx = sourmash.load_file_as_index(output)
+    sigs = list(idx.signatures())
+    assert len(sigs) == 1
+    sig = sigs[0]
+    assert sig.name == "both name"
+    print(sig.md5sum())
+    assert sig.md5sum() == sketchsig.md5sum() == mergesig.md5sum() == "5feeed4c8a75c8b3fe67af1270fa92c4"
+
+
+def test_urlsketch_merged_ranged_md5sum_fail_no_checksum_file(runtmp):
+    acc_csv = get_test_data('acc-merged-md5sums.csv')
+    acc_mod = runtmp.output('acc-merged-md5sums-ranges.csv')
+    output = runtmp.output('range.zip')
+    failed = runtmp.output('failed.csv')
+
+    # Modify the acc_csv file to add range values
+    with open(acc_csv, 'r') as infile, open(acc_mod, 'w', newline='') as outfile:
+        reader = csv.DictReader(infile)
+        fieldnames = reader.fieldnames
+        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in reader:
+            row['url'] = "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz;https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz"
+            row['md5sum'] = "a1a8f1c6dc56999c73fe298871c963d1;b2" # second md5sum is incorrect
+            row['range'] = '1-50000;50000-100000'
+            writer.writerow(row)
+            print(row)
+
+    # # run urlsketch
+    with pytest.raises(utils.SourmashCommandFailed):
+        runtmp.sourmash('scripts', 'urlsketch', acc_mod, '-o', output,
+                    '--failed', failed, '-r', '1',
+                    '--param-str', "dna,k=31,scaled=100")
+
+    assert not os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+
+    with open(failed, 'r') as failF:
+        header = next(failF).strip()
+        assert header == "accession,name,moltype,md5sum,download_filename,url,range"
+        for line in failF:
+            print(line)
+            acc, name, moltype, md5sum, download_filename, url, range = line.strip().split(',')
+            assert acc == "both"
+            assert name == "both name"
+            assert moltype == "DNA"
+            assert md5sum == "a1a8f1c6dc56999c73fe298871c963d1;b2"
+            assert download_filename == "both.urlsketch.fna.gz"
+            assert url == "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz;https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz"
+            assert range == "1-50000;50000-100000"
+
+
+def test_urlsketch_merged_ranged_md5sum_fail_with_checksum_file(runtmp):
+    acc_csv = get_test_data('acc-merged-md5sums.csv')
+    acc_mod = runtmp.output('acc-merged-md5sums-ranges.csv')
+    output = runtmp.output('range.zip')
+    failed = runtmp.output('failed.csv')
+    ch_fail = runtmp.output('ch_failed.csv')
+
+    # Modify the acc_csv file to add range values
+    with open(acc_csv, 'r') as infile, open(acc_mod, 'w', newline='') as outfile:
+        reader = csv.DictReader(infile)
+        fieldnames = reader.fieldnames
+        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in reader:
+            row['url'] = "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz;https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz"
+            row['md5sum'] = "a1a8f1c6dc56999c73fe298871c963d1;b2" # second md5sum is incorrect
+            row['range'] = '1-50000;50000-100000'
+            writer.writerow(row)
+            print(row)
+
+    # # run urlsketch
+    with pytest.raises(utils.SourmashCommandFailed):
+        runtmp.sourmash('scripts', 'urlsketch', acc_mod, '-o', output,
+                    '--failed', failed, '-r', '1', '--checksum-fail', ch_fail,
+                    '--param-str', "dna,k=31,scaled=100")
+
+    assert not os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+
+    # since this is a merged dataset, we write both checksum fail and regular fail.
+    with open(failed, 'r') as failF:
+        header = next(failF).strip()
+        assert header == "accession,name,moltype,md5sum,download_filename,url,range"
+        for line in failF:
+            print(line)
+            acc, name, moltype, md5sum, download_filename, url, range = line.strip().split(',')
+            assert acc == "both"
+            assert name == "both name"
+            assert moltype == "DNA"
+            assert md5sum == "a1a8f1c6dc56999c73fe298871c963d1;b2"
+            assert download_filename == "both.urlsketch.fna.gz"
+            assert url == "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz;https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz"
+            assert range == "1-50000;50000-100000"
+
+    assert os.path.exists(ch_fail)
+    with open(ch_fail, 'r') as failF:
+        header = next(failF).strip()
+        assert header == "accession,name,moltype,md5sum_url,download_filename,url,expected_md5sum,reason"
+        for line in failF:
+            print(line)
+            acc, name, moltype, md5sum_url, download_filename, url, expected_md5, reason= line.strip().split(',')
+            assert acc == "both"
+            assert name == "both name"
+            assert moltype == "DNA"
+            assert md5sum_url == ""
+            assert download_filename == "both.urlsketch.fna.gz"
+            assert url == "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz"
+            assert expected_md5 == "b2"
+            assert reason == "MD5 hash does not match. Expected: 'b2'; Found: 'a1a8f1c6dc56999c73fe298871c963d1'"
+
+
+def test_urlsketch_merged_ranged_fail(runtmp):
+    acc_csv = get_test_data('acc-merged-md5sums.csv')
+    acc_mod = runtmp.output('acc-merged-md5sums-ranges.csv')
+    output = runtmp.output('range.zip')
+    failed = runtmp.output('failed.csv')
+
+    # Modify the acc_csv file to add range values
+    with open(acc_csv, 'r') as infile, open(acc_mod, 'w', newline='') as outfile:
+        reader = csv.DictReader(infile)
+        fieldnames = reader.fieldnames
+        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
+        writer.writeheader()
+        for row in reader:
+            # first url is incorrect
+            row['url'] = "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1;https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz"
+            row['md5sum'] = "a1a8f1c6dc56999c73fe298871c963d1;"
+            row['range'] = '1-50000;50000-100000'
+            writer.writerow(row)
+            print(row)
+
+    # # run urlsketch
+    with pytest.raises(utils.SourmashCommandFailed):
+        runtmp.sourmash('scripts', 'urlsketch', acc_mod, '-o', output,
+                    '--failed', failed, '-r', '1',
+                    '--param-str', "dna,k=31,scaled=100")
+
+    assert not os.path.exists(output)
+    assert not runtmp.last_result.out # stdout should be empty
+
+    with open(failed, 'r') as failF:
+        header = next(failF).strip()
+        assert header == "accession,name,moltype,md5sum,download_filename,url,range"
+        for line in failF:
+            print(line)
+            acc, name, moltype, md5sum, download_filename, url, range = line.strip().split(',')
+            assert acc == "both"
+            assert name == "both name"
+            assert moltype == "DNA"
+            assert md5sum == "a1a8f1c6dc56999c73fe298871c963d1;"
+            assert download_filename == "both.urlsketch.fna.gz"
+            assert url == "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1;https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_genomic.fna.gz"
+            assert range == "1-50000;50000-100000"