Skip to content

Commit

Permalink
PD-4706 protein match overrides a nucleotide match for point mutations
Browse files Browse the repository at this point in the history
  • Loading branch information
Vyacheslav Brover committed Aug 4, 2023
1 parent c6859b0 commit e352ffc
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 81 deletions.
1 change: 0 additions & 1 deletion alignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ namespace Alignment_sp

// AmrMutation


AmrMutation::AmrMutation (size_t pos_arg,
const string &geneMutation_arg,
const string &class_arg,
Expand Down
15 changes: 9 additions & 6 deletions amr_report.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1041,7 +1041,7 @@ struct BlastAlignment : Alignment
const size_t unionStart = min (protStart, dnaStart);
const size_t unionStop = max (protStop, dnaStop);
if ( ( intersectionStart < intersectionStop
&& double (intersectionStop - intersectionStart) / double (protStop - protStart) > 0.75 // PAR
&& double (intersectionStop - intersectionStart) / double (protStop - protStart) > 0.75 // PAR, PD-2320
)
|| (intersectionStart - unionStart) + (unionStop - intersectionStop) <= 60 * 3 // PAR, PD-4169
|| ( protStart <= dnaStart + mismatchTail_aa * 3
Expand All @@ -1061,14 +1061,14 @@ struct BlastAlignment : Alignment
// Reflexive
{ if (this == & other)
return true;
if ( ! refMutation. empty ()
|| ! other. refMutation. empty ()
)
return false;
if (targetProt == other. targetProt)
{
if (targetName != other. targetName)
return false;
if ( ! refMutation. empty ()
|| ! other. refMutation. empty ()
)
return false;
// PD-807, PD-4277
if ( ! other. insideEq (*this)
&& ! insideEq (other)
Expand Down Expand Up @@ -1097,7 +1097,10 @@ struct BlastAlignment : Alignment
}
}
else
{ // PD-1902, PD-2139, PD-2313, PD-2320
{
if (refMutation. empty () != other. refMutation. empty ()) // PD-4698
return false;
// PD-1902, PD-2139, PD-2313, PD-2320
if (targetProt && ! matchesCds (other))
return false;
if (! targetProt && ! other. matchesCds (*this))
Expand Down
31 changes: 22 additions & 9 deletions amrfinder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
* gunzip (optional)
*
* Release changes:
* 08/04/2023 PD-4698 protein match overrides a nucleotide match for point mutations
* 3.11.18 07/25/2023 parameter order in instruction; "can be gzipped" is added to instruction
* 3.11.17 07/19/2023 PD-4687 distinct overlapping hits are not reported separately for protein targets for the same alleles or gene symbols
* 3.11.16 07/18/2023 PD-4687 distinct overlapping hits are not reported separately for protein targets (because the start/stop are not reported)
* 3.11.15 05/23/2023 PD-4629 "amrfinder_update -d DIR" will create DIR if DIR is missing
Expand Down Expand Up @@ -298,38 +300,49 @@ struct ThisApplication : ShellApplication
{
addFlag ("update", "Update the AMRFinder database", 'u'); // PD-2379
addFlag ("force_update", "Force updating the AMRFinder database", 'U'); // PD-3469

#ifdef DIR
addKey ("dir", "Common directory of the --protein, --nucleotide and --gff files", "", '\0', "DIRECTORY");
#endif
addKey ("protein", "Input protein FASTA file", "", 'p', "PROT_FASTA");
addKey ("nucleotide", "Input nucleotide FASTA file", "", 'n', "NUC_FASTA");
addKey ("gff", "GFF file for protein locations. Protein id should be in the attribute 'Name=<id>' (9th field) of the rows with type 'CDS' or 'gene' (3rd field).", "", 'g', "GFF_FILE");
addFlag ("pgap", "Input files PROT_FASTA, NUC_FASTA and GFF_FILE are created by the NCBI PGAP"); // = --annotation_format pgap
addKey ("protein", "Input protein FASTA file (can be gzipped)", "", 'p', "PROT_FASTA");
addKey ("nucleotide", "Input nucleotide FASTA file (can be gzipped)", "", 'n', "NUC_FASTA");
addKey ("gff", "GFF file for protein locations (can be gzipped). Protein id should be in the attribute 'Name=<id>' (9th field) of the rows with type 'CDS' or 'gene' (3rd field).", "", 'g', "GFF_FILE");
addKey ("annotation_format", "Type of GFF file: " + Gff::names. toString (", "), "genbank", 'a', "ANNOTATION_FORMAT");

addKey ("database", "Alternative directory with AMRFinder database. Default: $AMRFINDER_DB", "", 'd', "DATABASE_DIR");
addFlag ("database_version", "Print database version", 'V');

addKey ("ident_min", "Minimum proportion of identical amino acids in alignment for hit (0..1). -1 means use a curated threshold if it exists and " + toString (ident_min_def) + " otherwise", "-1", 'i', "MIN_IDENT");
// "PD-3482
addKey ("coverage_min", "Minimum coverage of the reference protein (0..1)", toString (partial_coverage_min_def), 'c', "MIN_COV");

addKey ("organism", "Taxonomy group. To see all possible taxonomy groups use the --list_organisms flag", "", 'O', "ORGANISM");
addFlag ("list_organisms", "Print the list of all possible taxonomy groups for mutations identification and exit", 'l');
addKey ("translation_table", "NCBI genetic code for translated BLAST", "11", 't', "TRANSLATION_TABLE");

addFlag ("plus", "Add the plus genes to the report"); // PD-2789
addFlag ("report_common", "Report proteins common to a taxonomy group"); // PD-2756
addKey ("mutation_all", "File to report all mutations", "", '\0', "MUT_ALL_FILE");
addKey ("blast_bin", "Directory for BLAST. Deafult: $BLAST_BIN", "", '\0', "BLAST_DIR");
addKey ("hmmer_bin", "Directory for HMMer", "", '\0', "HMMER_DIR");
addFlag ("report_all_equal", "Report all equally-scoring BLAST and HMM matches"); // PD-3772
addFlag ("print_node", "print hierarchy node (family)"); // PD-4394
addKey ("name", "Text to be added as the first column \"name\" to all rows of the report, for example it can be an assembly name", "", '\0', "NAME");
addFlag ("print_node", "print hierarchy node (family)"); // PD-4394
addKey ("mutation_all", "File to report all mutations", "", '\0', "MUT_ALL_FILE");

addKey ("output", "Write output to OUTPUT_FILE instead of STDOUT", "", 'o', "OUTPUT_FILE");
addKey ("protein_output", "Output protein FASTA file of reported proteins", "", '\0', "PROT_FASTA_OUT");
addKey ("nucleotide_output", "Output nucleotide FASTA file of reported nucleotide sequences", "", '\0', "NUC_FASTA_OUT");
addKey ("nucleotide_flank5_output", "Output nucleotide FASTA file of reported nucleotide sequences with 5' flanking sequences", "", '\0', "NUC_FLANK5_FASTA_OUT");
addKey ("nucleotide_flank5_size", "5' flanking sequence size for NUC_FLANK5_FASTA_OUT", "0", '\0', "NUC_FLANK5_SIZE");

addKey ("blast_bin", "Directory for BLAST. Deafult: $BLAST_BIN", "", '\0', "BLAST_DIR");
addKey ("hmmer_bin", "Directory for HMMer", "", '\0', "HMMER_DIR");

addFlag ("quiet", "Suppress messages to STDERR", 'q');

addFlag ("pgap", "Input files PROT_FASTA, NUC_FASTA and GFF_FILE are created by the NCBI PGAP"); // = --annotation_format pgap
addFlag ("gpipe_org", "NCBI internal GPipe organism names");

addKey ("parm", "amr_report parameters for testing: -nosame -noblast -skip_hmm_check -bed", "", '\0', "PARM");
addFlag ("database_version", "Print database version", 'V');

version = SVN_REV;
}

Expand Down
119 changes: 82 additions & 37 deletions common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,42 @@ string getStack ()

bool Chronometer::enabled = false;



void Chronometer::start ()
{
if (! on ())
return;
if (startTime != noclock)
throw runtime_error (FUNC "Chronometer \"" + name + "\" is not stopped");
startTime = clock ();
}



void Chronometer::stop ()
{
if (! on ())
return;
if (startTime == noclock)
throw runtime_error (FUNC "Chronometer \"" + name + "\" is not started");
time += clock () - startTime;
startTime = noclock;
}


void Chronometer::print (ostream &os) const
{
if (! on ())
return;
os << "CHRON: " << name << ": ";
const ONumber onm (os, 2, false);
os << (double) time / CLOCKS_PER_SEC << " sec." << endl;
}




// Chronometer_OnePass

Chronometer_OnePass::Chronometer_OnePass (const string &name_arg,
Expand Down Expand Up @@ -439,6 +473,26 @@ const string noString;



string nonPrintable2str (char c)
{
string s;
switch (c)
{
case 7: s = "BEL"; break;
case 8: s = "BS"; break;
case 9: s = "TAB"; break;
case 10: s = "LF"; break;
case 12: s = "FF"; break;
case 13: s = "CR"; break;
case 27: s = "ESC"; break;
case 32: s = "SPACE"; break;
default: s = "x" + uchar2hex ((uchar) c);
}
return "<" + s + ">";
}



bool isRight (const string &s,
const string &right)
{
Expand Down Expand Up @@ -2458,18 +2512,9 @@ Token TokenInput::getXmlText ()
}

// t.name
if (n >= 0 && n < ' ')
{
string s;
switch (n)
{
case 9: s = "TAB"; break;
case 10: s = "LF"; break;
case 13: s = "CR"; break;
default: s = "x" + uchar2hex ((uchar) n);
}
t. name += "<" + s + ">";
}
QC_ASSERT (n > 0);
if (nonPrintable (n))
t. name += nonPrintable2str ((char) n);
else
{
if (isChar (n))
Expand Down Expand Up @@ -3363,10 +3408,10 @@ void Application::addKey (const string &name,
ASSERT (! contains (name2arg, name));
if (acronym && contains (char2arg, acronym))
throw logic_error ("Duplicate option " + strQuote (string (1, acronym)));
keys << Key (*this, name, argDescription, defaultValue, acronym, var);
name2arg [name] = & keys. back ();
keyArgs << Key (*this, name, argDescription, defaultValue, acronym, var);
name2arg [name] = & keyArgs. back ();
if (acronym)
char2arg [acronym] = & keys. back ();
char2arg [acronym] = & keyArgs. back ();
}


Expand All @@ -3379,10 +3424,10 @@ void Application::addFlag (const string &name,
ASSERT (! contains (name2arg, name));
if (acronym && contains (char2arg, acronym))
throw logic_error ("Duplicate option " + strQuote (string (1, acronym)));
keys << Key (*this, name, argDescription, acronym);
name2arg [name] = & keys. back ();
keyArgs << Key (*this, name, argDescription, acronym);
name2arg [name] = & keyArgs. back ();
if (acronym)
char2arg [acronym] = & keys. back ();
char2arg [acronym] = & keyArgs. back ();
}


Expand All @@ -3392,8 +3437,8 @@ void Application::addPositional (const string &name,
{
ASSERT (! contains (name2arg, name));
IMPLY (gnu, isUpper (name));
positionals << Positional (name, argDescription);
name2arg [name] = & positionals. back ();
positionalArgs << Positional (name, argDescription);
name2arg [name] = & positionalArgs. back ();
}


Expand All @@ -3417,7 +3462,7 @@ Application::Key* Application::getKey (const string &keyName) const
void Application::setPositional (List<Positional>::iterator &posIt,
const string &value)
{
if (posIt == positionals. end ())
if (posIt == positionalArgs. end ())
{
if (isLeft (value, "-"))
errorExitStr (strQuote (value) + " is not a valid option\n\n" + getInstruction ());
Expand Down Expand Up @@ -3472,14 +3517,14 @@ void Application::qc () const

QC_ASSERT (! description. empty ());
QC_ASSERT (! version. empty ());
QC_IMPLY (! needsArg, positionals. empty ());
QC_IMPLY (! needsArg, positionalArgs. empty ());

for (const Positional& p : positionals)
for (const Positional& p : positionalArgs)
p. qc ();

map<string,size_t> requiredGroups;
string requiredGroup_prev;
for (const Key& key : keys)
for (const Key& key : keyArgs)
{
key. qc ();
if (! key. requiredGroup. empty () && key. requiredGroup != requiredGroup_prev)
Expand Down Expand Up @@ -3534,11 +3579,11 @@ string Application::getInstruction () const

instr += "\n\nUSAGE: " + programName;

for (const Positional& p : positionals)
for (const Positional& p : positionalArgs)
instr += " " + p. str ();

string requiredGroup_prev;
for (const Key& key : keys)
for (const Key& key : keyArgs)
{
if (key. requiredGroup == requiredGroup_prev)
if (key. requiredGroup. empty ())
Expand Down Expand Up @@ -3580,17 +3625,17 @@ string Application::getHelp () const

const string par ("\n ");

if (! positionals. empty ())
if (! positionalArgs. empty ())
{
instr += "\n\nPOSITIONAL PARAMETERS:";
for (const Positional& p : positionals)
for (const Positional& p : positionalArgs)
instr += "\n" + p. str () + par + p. description;
}

if (! keys. empty ())
if (! keyArgs. empty ())
{
instr += "\n\nNAMED PARAMETERS:";
for (const Key& key : keys)
for (const Key& key : keyArgs)
{
instr += "\n" + key. getShortHelp () + par + key. description;
if (gnu && ! key. defaultValue. empty ())
Expand Down Expand Up @@ -3644,11 +3689,11 @@ int Application::run (int argc,
initEnvironment ();


// positionals, keys
// positionalArgs, keyArgs
Set<Key*> keysRead;
{
bool first = true;
List<Positional>::iterator posIt = positionals. begin ();
List<Positional>::iterator posIt = positionalArgs. begin ();
Key* key = nullptr;
for (string s : programArgs)
{
Expand Down Expand Up @@ -3733,27 +3778,27 @@ int Application::run (int argc,
errorExitStr ("Key with no value: " + key->name + "\n\n" + getInstruction ());


if (programArgs. size () == 1 && (! positionals. empty () || needsArg))
if (programArgs. size () == 1 && (! positionalArgs. empty () || needsArg))
{
cout << getInstruction () << endl;
return 1;
}


if (posIt != positionals. end ())
if (posIt != positionalArgs. end ())
errorExitStr ("Too few positional parameters\n\n" + getInstruction ());
}


for (Key& key : keys)
for (Key& key : keyArgs)
if (! keysRead. contains (& key))
key. value = key. defaultValue;


{
map<string,StringVector> requiredGroup2names;
map<string,StringVector> requiredGroup2given;
for (const Key& key : keys)
for (const Key& key : keyArgs)
if (! key. requiredGroup. empty ())
{
if (! key. value. empty ())
Expand Down Expand Up @@ -3885,7 +3930,7 @@ void ShellApplication::initEnvironment ()

string execDir_ (execDir);
trimSuffix (execDir_, "/");
for (Key& key : keys)
for (Key& key : keyArgs)
if (! key. flag)
replaceStr (key. defaultValue, "$BASE", execDir_);
}
Expand Down
Loading

0 comments on commit e352ffc

Please sign in to comment.