Skip to content

Commit

Permalink
Upgrade to version 3.11.4 (#113)
Browse files Browse the repository at this point in the history
This includes a new amrfinder_index program to re-index the AMRFinderPlus database.
It also includes some mostly cosmetic code and error message cleanup and minor updates to github actions.
  • Loading branch information
evolarjun authored Mar 9, 2023
1 parent fa68921 commit f2840d9
Show file tree
Hide file tree
Showing 18 changed files with 597 additions and 394 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/binary.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v3
- name: prerequisites
run: |
sudo apt-get update
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ccpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v3
- name: prerequisites
run: |
sudo apt-get update
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dockerhub.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Test docker
run: docker run hello-world
- name: Software and DB version
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/mac_ccpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: macos-latest

steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v3
- name: prerequisites
run: |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)"
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/mac_conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ jobs:
build:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
- name: Install conda because built-in conda is borked
run: |
curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
Expand Down
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ COMPILE.cpp= $(CXX) $(CPPFLAGS) $(SVNREV) $(DBDIR) $(TEST_UPDATE_DB) -c

.PHONY: all clean install release

BINARIES= amr_report amrfinder amrfinder_update fasta_check fasta_extract fasta2parts gff_check dna_mutation
BINARIES= amr_report amrfinder amrfinder_index amrfinder_update fasta_check fasta_extract fasta2parts gff_check dna_mutation

all: $(BINARIES)

Expand Down Expand Up @@ -114,6 +114,10 @@ amrfinder_update: $(amrfinder_updateOBJS)
fi # make sure the next make command rebuilds amrfinder_update
$(CXX) -o $@ $(amrfinder_updateOBJS) -lcurl

amrfinder_index.o: common.hpp common.inc
amrfinder_indexOBJS=amrfinder_index.o common.o
amrfinder_index: $(amrfinder_indexOBJS)
$(CXX) -o $@ $(amrfinder_indexOBJS)

fasta_check.o: common.hpp common.inc
fasta_checkOBJS=fasta_check.o common.o
Expand Down
52 changes: 0 additions & 52 deletions alignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -970,58 +970,6 @@ long Alignment::getGlobalTargetStart () const



#if 0
bool Alignment::getFrameShift_right (const Alignment &rightPart,
size_t diff_max) const
{
ASSERT (! targetProt);
ASSERT (refProt);
ASSERT (rightPart. refProt);

if (this == & rightPart)
return false;

if (rightPart. targetProt)
return false;

if ( targetName != rightPart. targetName
|| refName != rightPart. refName
|| targetStrand != rightPart. targetStrand
)
return false;

if ( refStart >= rightPart. refStart
|| refEnd >= rightPart. refEnd
|| refEnd + diff_max / 3 < rightPart. refStart
)
return false;

if (targetStrand)
{
if ( targetStart >= rightPart. targetStart
|| targetEnd >= rightPart. targetEnd
|| targetEnd + diff_max < rightPart. targetStart
)
return false;
}
else
{
if ( targetStart <= rightPart. targetStart
|| targetEnd <= rightPart. targetEnd
|| targetStart > rightPart. targetEnd + diff_max
)
return false;
}

if (targetStart % 3 == rightPart. targetStart % 3)
return false;

return true;
}
#endif



} // namespace


15 changes: 0 additions & 15 deletions alignment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,21 +322,6 @@ struct Alignment : Root
}
long getGlobalTargetStart () const;
// Requires: !targetProt, refProt
#if 0
bool getFrameShift (const Alignment &other,
size_t diff_max) const
// Return: success
// Input: diff_max: in bp
// Requires: !targetProt, refProt, rightPart.refProt
{ return nident >= other. nident
&& ( getFrameShift_right (other, diff_max)
|| other. getFrameShift_right (*this, diff_max)
);
}
private:
bool getFrameShift_right (const Alignment &rightPart,
size_t diff_max) const;
#endif
};


Expand Down
4 changes: 2 additions & 2 deletions amr_report.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1331,7 +1331,7 @@ struct Batch
// Pass 1
{
if (verbose ())
cout << "Reading " << famFName << " Pass 1 ..." << endl;
section ("Reading " + famFName + " Pass 1", true);
LineInput f (famFName);
while (f. nextLine ())
try
Expand Down Expand Up @@ -1381,7 +1381,7 @@ struct Batch
}
{
if (verbose ())
cout << "Reading " << famFName << " Pass 2 ..." << endl;
section ("Reading " + famFName + " Pass 2", true);
LineInput f (famFName);
while (f. nextLine ())
{
Expand Down
26 changes: 16 additions & 10 deletions amrfinder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
* Dependencies: NCBI BLAST, HMMer
*
* Release changes:
* 03/01/2023 PD-3597 amrfinder_index
* 02/27/2023 section()
* 3.11.4 01/24/2023 GPipe organism string in taxgroup.tab is a comma-separated list of GPipe organisms
* 3.11.3 12/27/2022 "No valid AMRFinder database is found.\nThis directory (or symbolic link to directory) is not found: " + db
* 3.11.2 12/13/2022 PD-4427 a database of the older software minor is loaded for a new software minor version
* 12/05/2022 detect reference frameshited proteins
* 3.11.1 11/23/2022 PD-4414 modified reference proteins can have unequal lengths of reference and allele sequences
Expand Down Expand Up @@ -610,8 +614,8 @@ struct ThisApplication : ShellApplication
const string downloadLatestInstr ("\nTo download the latest version to the default directory run: amrfinder -u");

if (! directoryExists (db)) // PD-2447
throw runtime_error ("No valid AMRFinder database found: " + db + ifS (! update, downloadLatestInstr));
//throw runtime_error ("No valid AMRFinder database found.\nSymbolic link is not found: " + db + ifS (! update, downloadLatestInstr));
//throw runtime_error ("No valid AMRFinder database found: " + db + ifS (! update, downloadLatestInstr));
throw runtime_error ("No valid AMRFinder database is found.\nThis directory (or symbolic link to directory) is not found: " + db + ifS (! update, downloadLatestInstr));
if (database_version)
cout << "Database directory: " << shellQuote (path2canonical (db)) << endl;
else
Expand Down Expand Up @@ -736,13 +740,15 @@ struct ThisApplication : ShellApplication
if (isLeft (f. line, "#"))
continue;
iss. reset (f. line);
string org, gpipeOrg;
string org, gpipeOrgs;
int num = -1;
iss >> org >> gpipeOrg >> num;
iss >> org >> gpipeOrgs >> num;
QC_ASSERT (! org. empty ());
QC_ASSERT (num >= 0);
QC_ASSERT (iss. eof ());
if (organism1 == gpipeOrg)
const StringVector gpipeOrgVec (gpipeOrgs, ',', true);
QC_ASSERT (gpipeOrgVec. size () >= 1);
if (gpipeOrgVec. contains (organism1))
{
organism1 = org;
found = true;
Expand Down Expand Up @@ -906,15 +912,15 @@ struct ThisApplication : ShellApplication
}
}

stderr << "Running blastp...\n";
stderr. section ("Running blastp");
{
const Chronometer_OnePass cop ("blastp", cerr, false, qc_on && ! quiet);
// " -task blastp-fast -word_size 6 -threshold 21 " // PD-2303
exec (fullProg ("blastp") + " -query " + prot1 + " -db " + tmp + "/db/AMRProt" + " "
+ blastp_par + get_num_threads_param ("blastp", min (nProt, protLen_total / 10000)) + " " BLAST_FMT " -out " + tmp + "/blastp > /dev/null 2> " + tmp + "/blastp-err", tmp + "/blastp-err");
}

stderr << "Running hmmsearch...\n";
stderr. section ("Running hmmsearch");
{
const Chronometer_OnePass cop ("hmmsearch", cerr, false, qc_on && ! quiet);
ASSERT (threads_max >= 1);
Expand Down Expand Up @@ -962,7 +968,7 @@ struct ThisApplication : ShellApplication
EXEC_ASSERT (fastaCheck (dna, false, qcS, logFName, nDna, dnaLen_max, dnaLen_total));
const string blastx (dnaLen_max > 100000 ? "tblastn" : "blastx"); // PAR

stderr << "Running " << blastx << "...\n";
stderr. section ("Running " + blastx);
findProg (blastx);
{
const Chronometer_OnePass cop (blastx, cerr, false, qc_on && ! quiet);
Expand Down Expand Up @@ -1001,7 +1007,7 @@ struct ThisApplication : ShellApplication
if (blastn)
{
findProg ("blastn");
stderr << "Running blastn...\n";
stderr. section ("Running blastn");
const Chronometer_OnePass cop ("blastn", cerr, false, qc_on && ! quiet);
exec (fullProg ("blastn") + " -query " + dna + " -db " + tmp + "/db/AMR_DNA-" + organism1 + " -evalue 1e-20 -dust no -max_target_seqs 10000 "
+ get_num_threads_param ("blastn", min (nDna, dnaLen_total / 2500000)) + " " BLAST_FMT " -out " + tmp + "/blastn > " + logFName + " 2> " + tmp + "/blastn-err", tmp + "/blastn-err");
Expand Down Expand Up @@ -1050,7 +1056,7 @@ struct ThisApplication : ShellApplication


// tmp + "/amr", tmp + "/mutation_all"
stderr << "Making report...\n";
stderr. section ("Making report");
const string printNode (print_node ? " -print_node" : "");
const string nameS (emptyArg (input_name) ? "" : " -name " + input_name);
{
Expand Down
140 changes: 140 additions & 0 deletions amrfinder_index.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// amrfinder_index.cpp

/*===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government have not placed any restriction on its use or reproduction.
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* Please cite the author in any work or product based on this material.
*
* ===========================================================================
*
* Author: Vyacheslav Brover
*
* File Description:
* Indexing of AMRFinder data
*
* Dependencies: NCBI BLAST, HMMer
*
* Release changes: see amrfinder.cpp
*
*/




#ifdef _MSC_VER
#error "UNIX is required"
#endif

#undef NDEBUG
#include "common.inc"
#include "common.hpp"
using namespace Common_sp;




namespace
{



// ThisApplication

struct ThisApplication : ShellApplication
{
ThisApplication ()
: ShellApplication ("Index the database for AMRFinder", true, true, true)
{
addPositional ("DATABASE", "Directory with AMRFinder database");
addKey ("blast_bin", "Directory for BLAST ending with '/'", "", '\0', "BLAST_DIR");
addFlag ("quiet", "Suppress messages to STDERR", 'q');
version = SVN_REV;
}



void shellBody () const final
{
const string dbDir = getArg ("DATABASE");
const string blast_bin = getArg ("blast_bin");
const bool quiet = getFlag ("quiet");


Stderr stderr (quiet);
stderr << "Running: "<< getCommandLine () << '\n';
const Verbose vrb (qc_on);


if (! directoryExists (dbDir))
throw runtime_error ("Database directory " + dbDir + " does not exist");

const Dir dir (dbDir);
const string dbDirS (dir. get () + "/");

if (! blast_bin. empty ())
prog2dir ["makeblastdb"] = blast_bin;
findProg ("makeblastdb");
findProg ("hmmpress");


// Cf. amrfinder_update.cpp
StringVector dnaPointMuts;
{
LineInput f (dbDirS + "taxgroup.tab");
while (f. nextLine ())
{
if (isLeft (f. line, "#"))
continue;
string taxgroup, gpipe;
int n = -1;
istringstream iss (f. line);
iss >> taxgroup >> gpipe >> n;
QC_ASSERT (n >= 0);
if (n)
dnaPointMuts << taxgroup;
}
}


stderr << "Indexing" << "\n";
exec (fullProg ("hmmpress") + " -f " + shellQuote (dbDirS + "AMR.LIB") + " > /dev/null 2> " + tmp + "/hmmpress.err", tmp + "/hmmpress.err");
setSymlink (dbDirS, tmp + "/db", true);
exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMRProt" + " -dbtype prot -logfile " + tmp + "/makeblastdb.AMRProt", tmp + "/makeblastdb.AMRProt");
exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMR_CDS" + " -dbtype nucl -logfile " + tmp + "/makeblastdb.AMR_CDS", tmp + "/makeblastdb.AMR_CDS");
for (const string& dnaPointMut : dnaPointMuts)
exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMR_DNA-" + dnaPointMut + " -dbtype nucl -logfile " + tmp + "/makeblastdb.AMR_DNA-" + dnaPointMut, tmp + "/makeblastdb.AMR_DNA-" + dnaPointMut);
}
};



} // namespace



int main (int argc,
const char* argv[])
{
ThisApplication app;
return app. run (argc, argv);
}



Loading

0 comments on commit f2840d9

Please sign in to comment.