Skip to content

Commit

Permalink
PD-4566,PD-4548 --hmmer_bin; fasta_check.cpp prohibits '\t' (not any …
Browse files Browse the repository at this point in the history
…'\'), and all restrictions are only for nucleotide sequences
  • Loading branch information
Vyacheslav Brover committed Apr 13, 2023
1 parent 43aedab commit fa70613
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 25 deletions.
16 changes: 14 additions & 2 deletions amrfinder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
* Dependencies: NCBI BLAST, HMMer
*
* Release changes:
* PD-4548 fasta_check.cpp prohibits '\t' (not any '\'), and all restrictions are only for nucleotide sequences
* 3.11.11 04/13/2023 PD-4566 --hmmer_bin
* 3.11.10 04/12/2023 PD-4548 fasta_check.cpp prohibits ';', '.', '~' in the last position of a sequence identifier
* PD-4548 fasta_check.cpp prohibits: ',,' and '\\' in all positions, '?' in initial position, and ',' in the last position of a sequence identifier
* 3.11.9 04/11/2023 PD-4560 BLAST -mt_mode is used on Mac only for BLAST version >= 2.13.0
Expand Down Expand Up @@ -306,7 +308,7 @@ struct ThisApplication : ShellApplication
addFlag ("report_common", "Report proteins common to a taxonomy group"); // PD-2756
addKey ("mutation_all", "File to report all mutations", "", '\0', "MUT_ALL_FILE");
addKey ("blast_bin", "Directory for BLAST. Deafult: $BLAST_BIN", "", '\0', "BLAST_DIR");
//addKey ("hmmer_bin" ??
addKey ("hmmer_bin", "Directory for HMMer", "", '\0', "HMMER_DIR");
addFlag ("report_all_equal", "Report all equally-scoring BLAST and HMM matches"); // PD-3772
addFlag ("print_node", "print hierarchy node (family)"); // PD-4394
addKey ("name", "Text to be added as the first column \"name\" to all rows of the report, for example it can be an assembly name", "", '\0', "NAME");
Expand Down Expand Up @@ -496,6 +498,7 @@ struct ThisApplication : ShellApplication
const bool report_common = getFlag ("report_common");
const string mutation_all = getArg ("mutation_all");
string blast_bin = getArg ("blast_bin");
string hmmer_bin = getArg ("hmmer_bin");
const bool equidistant = getFlag ("report_all_equal");
const bool print_node = getFlag ("print_node");
const string input_name = shellQuote (getArg ("name"));
Expand Down Expand Up @@ -613,6 +616,12 @@ struct ThisApplication : ShellApplication
prog2dir ["blastn"] = blast_bin;
prog2dir ["makeblastdb"] = blast_bin;
}

if (! hmmer_bin. empty ())
{
addDirSlash (hmmer_bin);
prog2dir ["hmmsearch"] = hmmer_bin;
}


if (update)
Expand All @@ -635,7 +644,10 @@ struct ThisApplication : ShellApplication
string blast_bin_par;
if (! blast_bin. empty ())
blast_bin_par = " --blast_bin " + shellQuote (blast_bin);
exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update") + blast_bin_par
string hmmer_bin_par;
if (! hmmer_bin. empty ())
hmmer_bin_par = " --hmmer_bin " + shellQuote (hmmer_bin);
exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update") + blast_bin_par + hmmer_bin_par
+ ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + logFName, logFName);
}
else
Expand Down
8 changes: 7 additions & 1 deletion amrfinder_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ struct ThisApplication : ShellApplication
{
addPositional ("DATABASE", "Directory with AMRFinder database");
addKey ("blast_bin", "Directory for BLAST", "", '\0', "BLAST_DIR");
addKey ("hmmer_bin", "Directory for HMMer", "", '\0', "HMMER_DIR");
addFlag ("quiet", "Suppress messages to STDERR", 'q');
version = SVN_REV;
}
Expand All @@ -74,11 +75,13 @@ struct ThisApplication : ShellApplication
{
string dbDir = getArg ("DATABASE");
string blast_bin = getArg ("blast_bin");
string hmmer_bin = getArg ("hmmer_bin");
const bool quiet = getFlag ("quiet");


addDirSlash (dbDir);
addDirSlash (blast_bin);
addDirSlash (hmmer_bin);


Stderr stderr (quiet);
Expand All @@ -90,8 +93,11 @@ struct ThisApplication : ShellApplication
throw runtime_error ("Database directory " + dbDir + " does not exist");

if (! blast_bin. empty ())
prog2dir ["makeblastdb"] = blast_bin;
prog2dir ["makeblastdb"] = blast_bin;
findProg ("makeblastdb");

if (! hmmer_bin. empty ())
prog2dir ["hmmpress"] = hmmer_bin;
findProg ("hmmpress");


Expand Down
16 changes: 7 additions & 9 deletions amrfinder_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ Requirement: the database directory contains subdirectories named by database ve
{
addKey ("database", "Directory for all versions of AMRFinder databases", "$BASE/data", 'd', "DATABASE_DIR");
addKey ("blast_bin", "Directory for BLAST", "", '\0', "BLAST_DIR");
addKey ("hmmer_bin", "Directory for HMMer", "", '\0', "HMMER_DIR");
addFlag ("force_update", "Force updating the AMRFinder database"); // PD-3469
addFlag ("quiet", "Suppress messages to STDERR", 'q');
version = SVN_REV;
Expand Down Expand Up @@ -345,11 +346,13 @@ Requirement: the database directory contains subdirectories named by database ve
{
const string mainDirOrig = getArg ("database");
string blast_bin = getArg ("blast_bin");
string hmmer_bin = getArg ("hmmer_bin");
const bool force_update = getFlag ("force_update");
const bool quiet = getFlag ("quiet");


addDirSlash (blast_bin);
addDirSlash (hmmer_bin);


Stderr stderr (quiet);
Expand Down Expand Up @@ -400,14 +403,6 @@ Requirement: the database directory contains subdirectories named by database ve
ASSERT (! load_data_version. empty ());


#if 0
if (! blast_bin. empty ())
prog2dir ["makeblastdb"] = blast_bin;
findProg ("makeblastdb");
findProg ("hmmpress");
#endif


// Users's files
string mainDirS;
{
Expand Down Expand Up @@ -491,7 +486,10 @@ Requirement: the database directory contains subdirectories named by database ve
string blast_bin_par;
if (! blast_bin. empty ())
blast_bin_par = " --blast_bin " + shellQuote (blast_bin);
exec (fullProg ("amrfinder_index") + shellQuote (latestDir) + blast_bin_par + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + tmp + "/amrfinder_index.err", tmp + "/amrfinder_index.err");
string hmmer_bin_par;
if (! hmmer_bin. empty ())
hmmer_bin_par = " --hmmer_bin " + shellQuote (hmmer_bin);
exec (fullProg ("amrfinder_index") + shellQuote (latestDir) + blast_bin_par + hmmer_bin_par + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + tmp + "/amrfinder_index.err", tmp + "/amrfinder_index.err");
#else
stderr << "Indexing" << "\n";
exec (fullProg ("hmmpress") + " -f " + shellQuote (latestDir + "AMR.LIB") + " > /dev/null 2> " + tmp + "/hmmpress.err", tmp + "/hmmpress.err");
Expand Down
22 changes: 12 additions & 10 deletions fasta_check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,16 +104,18 @@ struct ThisApplication : Application
if (! printable (c))
throw runtime_error (errorS + "Non-printable character in the sequence identifier: " + to_string ((int) c));
// BLAST: PD-4548
if (id. front () == '?')
throw runtime_error (errorS + "Sequence identifier starts with '?'");
for (const char c : {',', ';', '.', '~'})
if (id. back () == c)
throw runtime_error (errorS + "Sequence identifier ends with " + strQuote (string (1, c)));
if (contains (id, '\\'))
throw runtime_error (errorS + "Sequence identifier contains '\\'");
if (contains (id, ",,"))
throw runtime_error (errorS + "Sequence identifier contains ',,'");
//
if (! aa)
{
if (id. front () == '?')
throw runtime_error (errorS + "Sequence identifier starts with '?'");
for (const char c : {',', ';', '.', '~'})
if (id. back () == c)
throw runtime_error (errorS + "Sequence identifier ends with " + strQuote (string (1, c)));
if (contains (id, "\\t"))
throw runtime_error (errorS + "Sequence identifier contains '\\t'");
if (contains (id, ",,"))
throw runtime_error (errorS + "Sequence identifier contains ',,'");
}
if (! first && seqSize == 0)
throw runtime_error (errorS + "Empty sequence");
if (lenF. get () && ! ids. empty ())
Expand Down
4 changes: 2 additions & 2 deletions gff.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ Annot::Annot (const string &fName,
throw runtime_error ("empty sequence indentifier");
for (const char c : contig)
if (! printable (c))
throw runtime_error ("Non-printable character in the sequence identifier: " + c);
throw runtime_error ("Non-printable character in the sequence identifier: " + to_string (c));

if ( type != "CDS"
&& type != "gene"
Expand Down Expand Up @@ -385,7 +385,7 @@ Annot::Annot (const string &fName)

for (const char c : contig)
if (! printable (c))
throw runtime_error (errorS + "Non-printable character in the sequence identifier: " + c);
throw runtime_error (errorS + "Non-printable character in the sequence identifier: " + to_string (c));

if (start >= stop)
throw runtime_error (errorS + "start should be less than stop");
Expand Down
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.11.10
3.11.11

0 comments on commit fa70613

Please sign in to comment.