Skip to content

Commit

Permalink
Merge pull request #3 from iskold/main
Browse files Browse the repository at this point in the history
Added simple  autocycler_wrapper helper script
  • Loading branch information
rrwick authored Feb 25, 2025
2 parents a6e9cdb + fe9d571 commit d659d7e
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 2 deletions.
70 changes: 70 additions & 0 deletions scripts/autocycler_wrapper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env bash

# Command line wrapper for autocycler.
# Usage:
# autocycler_wrapper.sh <reads> <output_folder> <read_partitions> <threads>
#
# Example usage:
# autocycler_wrapper.sh ont.fastq.gz output_autocycler 4 16
#
# The example runs autocycler on the reads 'ont.fastq.gz' and
# outputs everything into the folder 'output_autocycler'.
# It divides the reads into 4 partitions and uses 16 threads.

reads=$1 # Your read set goes here
output=$2 # Name you output folder here
subset=$3 # Number of read subset partitions (default is 4. Auto-prefixes 0 for any number below 10)
threads=$4 # set as appropriate for your system

# Add 0 prefix for read sub-sets if lower than 10
if [ "${#1}" -lt "2" ]; then
subset="0${subset}"
fi

# Input parameter print
echo Running autocycler with following parameters\:
echo Reads\: $reads
echo Output folder\: $output
echo Read partitions\: $subset
echo Threads\: $threads
mkdir -p $output

# Estimate genome size: Use lrge if it is installed, otherwise use the slower bundled Raven estimator
echo -e "\nEstimating genome size:"
if ! command -v lrge 2>&1 >/dev/null
then
genome_size=$(genome_size_raven.sh "$reads" "$threads")
else
genome_size=$(lrge -t "$threads" "$reads")
fi

# Step 1: subsample the long-read set into multiple files
autocycler subsample --count $subset --reads "$reads" --out_dir ${output}/subsampled_reads --genome_size "$genome_size"

# Step 2: assemble each subsampled file
mkdir -p $output/assemblies
for assembler in canu flye miniasm necat nextdenovo raven; do
for i in `eval echo {01..$subset}`; do
"$assembler".sh ${output}/subsampled_reads/sample_"$i".fastq ${output}/assemblies/"$assembler"_"$i" "$threads" "$genome_size"
done
done

# Optional step: remove the subsampled reads to save space
rm ${output}/subsampled_reads/*.fastq

# Step 3: compress the input assemblies into a unitig graph
autocycler compress -i ${output}/assemblies -a ${output}/autocycler_out

# Step 4: cluster the input contigs into putative genomic sequences
autocycler cluster -a ${output}/autocycler_out

# Steps 5 and 6: trim and resolve each QC-pass cluster
for c in ${output}/autocycler_out/clustering/qc_pass/cluster_*; do
autocycler trim -c "$c"
autocycler resolve -c "$c"
done

# Step 7: combine resolved clusters into a final assembly
echo Running Autocycler combine
autocycler combine -a ${output}/autocycler_out -i ${output}/autocycler_out/clustering/qc_pass/cluster_*/5_final.gfa

4 changes: 2 additions & 2 deletions scripts/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ channels:
- bioconda
dependencies:
- any2fasta>=0.4.2 # https://github.com/tseemann/any2fasta
- canu>=2.2 # https://github.com/marbl/canu
- canu>=2.3 # https://github.com/marbl/canu
- flye>=2.9.5 # https://github.com/mikolmogorov/Flye
- lja>=0.2 # https://github.com/AntonBankevich/LJA
# - lja>=0.2 # https://github.com/AntonBankevich/LJA
- metamdbg>=1.0 # https://github.com/GaetanBenoitDev/metaMDBG
- miniasm>=0.3 # https://github.com/lh3/miniasm
- minimap2>=2.28 # https://github.com/lh3/minimap2
Expand Down

0 comments on commit d659d7e

Please sign in to comment.