nextstrain · victorlin · Jan 14, 2025 · Jan 7, 2025 · Jan 7, 2025 · Jan 11, 2025
diff --git a/CHANGES.md b/CHANGES.md
@@ -10,13 +10,16 @@
 
 * ancestral, refine: Explicitly specify how the root and ambiguous states are handled during sequence reconstruction and mutation counting. [#1690][] (@rneher)
 * titers: Fix type errors in code associated with cross-validation of models. [#1688][] (@huddlej)
-* Add help text to clarify difference in behavior between options that override defaults (e.g. `--metadata-delimiters`) vs. options that extend existing defaults (e.g. `--expected-date-formats`). [#1705][] (@victorlin)
 * export: The help text for `--lat-longs` has been improved with a link to the defaults and specifics around the overriding behavior. [#1715][] (@victorlin)
 * augur.io.read_metadata: Pandas versions <1.4.0 prevented this function from properly setting the index column's data type. Support for those older versions has been dropped. [#1716][] (@victorlin)
+* In version 24.4.0, one of the new features was that all options that take multiple values could be repeated. Unfortunately, it overlooked a few that have been fixed in this version. [#1707][] (@victorlin)
+    * `augur curate rename --field-map`
+    * `augur curate transform-strain-name --backup-fields`
+* `augur curate format-dates --expected-date-formats` help text has been improved with clarifications regarding how values provided interact with builtin formats. [#1707][] (@victorlin)
 
 [#1688]: https://github.com/nextstrain/augur/pull/1688
 [#1690]: https://github.com/nextstrain/augur/pull/1690
-[#1705]: https://github.com/nextstrain/augur/pull/1705
+[#1707]: https://github.com/nextstrain/augur/issues/1707
 [#1715]: https://github.com/nextstrain/augur/pull/1715
 [#1716]: https://github.com/nextstrain/augur/pull/1716
 

diff --git a/LICENSE.nextstrain-cli b/LICENSE.nextstrain-cli
@@ -1,8 +1,11 @@
-This license applies to the original copy of resource functions from the
-Nextstrain CLI project into this project, incorporated in
-"augur/data/__init__.py". Any subsequent modifications to this project's copy of
-those functions are licensed under the license of this project, not of
-Nextstrain CLI.
+This license applies to the original copy of functions from the Nextstrain CLI
+project into this project, namely
+
+- resource functions incorporated as "augur/data/__init__.py"
+- walk_commands() function incorporated into "augur/argparse_.py"
+
+Any subsequent modifications to this project's copy of these functions are
+licensed under the license of this project, not of Nextstrain CLI.
 
 MIT License
 

diff --git a/augur/align.py b/augur/align.py
@@ -6,6 +6,7 @@
 from shutil import copyfile
 import numpy as np
 from Bio import AlignIO, SeqIO, Seq, Align
+from .argparse_ import ExtendOverwriteDefault
 from .io.file import open_file
 from .io.shell_command_runner import run_shell_command
 from .io.vcf import shquote
@@ -24,7 +25,7 @@ def register_arguments(parser):
     Kept as a separate function than `register_parser` to continue to support
     unit tests that use this function to create argparser.
     """
-    parser.add_argument('--sequences', '-s', required=True, nargs="+", action="extend", metavar="FASTA", help="sequences to align")
+    parser.add_argument('--sequences', '-s', required=True, nargs="+", action=ExtendOverwriteDefault, metavar="FASTA", help="sequences to align")
     parser.add_argument('--output', '-o', default="alignment.fasta", help="output file (default: %(default)s)")
     parser.add_argument('--nthreads', type=nthreads_value, default=1,
                                 help="number of threads to use; specifying the value 'auto' will cause the number of available CPU cores on your system, if determinable, to be used")

diff --git a/augur/ancestral.py b/augur/ancestral.py
@@ -22,6 +22,7 @@
 
     The mutation positions in the node-data JSON are one-based.
 """
+from augur.argparse_ import ExtendOverwriteDefault
 from augur.errors import AugurError
 import sys
 import numpy as np
@@ -317,7 +318,7 @@ def register_parser(parent_subparsers):
     )
     amino_acid_options_group.add_argument('--annotation',
                         help='GenBank or GFF file containing the annotation')
-    amino_acid_options_group.add_argument('--genes', nargs='+', action='extend', help="genes to translate (list or file containing list)")
+    amino_acid_options_group.add_argument('--genes', nargs='+', action=ExtendOverwriteDefault, help="genes to translate (list or file containing list)")
     amino_acid_options_group.add_argument('--translations', type=str, help="translated alignments for each CDS/Gene. "
                            "Currently only supported for FASTA-input. Specify the file name via a "
                            "template like 'aa_sequences_%%GENE.fasta' where %%GENE will be replaced "

diff --git a/augur/argparse_.py b/augur/argparse_.py
@@ -1,47 +1,26 @@
 """
 Custom helpers for the argparse standard library.
 """
-from argparse import Action, ArgumentParser, _ArgumentGroup, HelpFormatter, SUPPRESS, OPTIONAL, ZERO_OR_MORE, _ExtendAction
-from typing import Union
+from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser, _ArgumentGroup, _SubParsersAction
+from itertools import chain
+from typing import Iterable, Optional, Tuple, Union
 from .types import ValidationMode
 
 
 # Include this in an argument help string to suppress the automatic appending
-# of the default value by CustomHelpFormatter.  This works because the
-# automatic appending is conditional on the presence of %(default), so we
-# include it but then format it as a zero-length string .0s.  🙃
+# of the default value by argparse.ArgumentDefaultsHelpFormatter.  This works
+# because the automatic appending is conditional on the presence of %(default),
+# so we include it but then format it as a zero-length string .0s.  🙃
 #
 # Another solution would be to add an extra attribute to the argument (the
-# argparse.Action instance) and then modify CustomHelpFormatter to condition
-# on that new attribute, but that seems more brittle.
+# argparse.Action instance) and then subclass ArgumentDefaultsHelpFormatter to
+# condition on that new attribute, but that seems more brittle.
 #
-# Initially copied from the Nextstrain CLI repo
+# Copied from the Nextstrain CLI repo
 # https://github.com/nextstrain/cli/blob/017c53805e8317951327d24c04184615cc400b09/nextstrain/cli/argparse.py#L13-L21
 SKIP_AUTO_DEFAULT_IN_HELP = "%(default).0s"
 
 
-class CustomHelpFormatter(HelpFormatter):
-    """Customize help text.
-
-    Initially copied from argparse.ArgumentDefaultsHelpFormatter.
-    """
-    def _get_help_string(self, action: Action):
-        help = action.help
-
-        if action.default is not None and action.default != []:
-            if isinstance(action, ExtendOverwriteDefault):
-                help += ' Specified values will override the default list.'
-            if isinstance(action, _ExtendAction):
-                help += ' Specified values will extend the default list.'
-
-        if '%(default)' not in action.help:
-            if action.default is not SUPPRESS:
-                defaulting_nargs = [OPTIONAL, ZERO_OR_MORE]
-                if action.option_strings or action.nargs in defaulting_nargs:
-                    help += ' (default: %(default)s)'
-        return help
-
-
 def add_default_command(parser):
     """
     Sets the default command to run when none is provided.
@@ -83,7 +62,7 @@ def add_command_subparsers(subparsers, commands, command_attribute='__command__'
 
         # Use the same formatting class for every command for consistency.
         # Set here to avoid repeating it in every command's register_parser().
-        subparser.formatter_class = CustomHelpFormatter
+        subparser.formatter_class = ArgumentDefaultsHelpFormatter
 
         if not subparser.description and command.__doc__:
             subparser.description = command.__doc__
@@ -148,3 +127,28 @@ def add_validation_arguments(parser: Union[ArgumentParser, _ArgumentGroup]):
         action="store_const",
         const=ValidationMode.SKIP,
         help="Skip validation of input/output files, equivalent to --validation-mode=skip. Use at your own risk!")
+
+
+# Originally copied from nextstrain/cli/argparse.py in the Nextstrain CLI project¹.
+#
+# ¹ <https://github.com/nextstrain/cli/blob/4a00d7100eff811eab6df34db73c7f6d4196e22b/nextstrain/cli/argparse.py#L252-L271>
+def walk_commands(parser: ArgumentParser, command: Optional[Tuple[str, ...]] = None) -> Iterable[Tuple[Tuple[str, ...], ArgumentParser]]:
+    if command is None:
+        command = (parser.prog,)
+
+    yield command, parser
+
+    subparsers = chain.from_iterable(
+        action.choices.items()
+            for action in parser._actions
+             if isinstance(action, _SubParsersAction))
+
+    visited = set()
+
+    for subname, subparser in subparsers:
+        if subparser in visited:
+            continue
+
+        visited.add(subparser)
+
+        yield from walk_commands(subparser, (*command, subname))
diff --git a/augur/clades.py b/augur/clades.py
@@ -18,6 +18,7 @@
 from collections import defaultdict
 import networkx as nx
 from itertools import islice
+from .argparse_ import ExtendOverwriteDefault
 from .errors import AugurError
 from .io.file import PANDAS_READ_CSV_OPTIONS
 from argparse import SUPPRESS
@@ -342,8 +343,8 @@ def parse_nodes(tree_file, node_data_files, validation_mode):
 def register_parser(parent_subparsers):
     parser = parent_subparsers.add_parser("clades", help=__doc__)
     parser.add_argument('--tree', required=True, help="prebuilt Newick -- no tree will be built if provided")
-    parser.add_argument('--mutations', required=True, metavar="NODE_DATA_JSON", nargs='+', action='extend', help='JSON(s) containing ancestral and tip nucleotide and/or amino-acid mutations ')
-    parser.add_argument('--reference', nargs='+', action='extend', help=SUPPRESS)
+    parser.add_argument('--mutations', required=True, metavar="NODE_DATA_JSON", nargs='+', action=ExtendOverwriteDefault, help='JSON(s) containing ancestral and tip nucleotide and/or amino-acid mutations ')
+    parser.add_argument('--reference', nargs='+', action=ExtendOverwriteDefault, help=SUPPRESS)
     parser.add_argument('--clades', required=True, metavar="TSV", type=str, help='TSV file containing clade definitions by amino-acid')
     parser.add_argument('--output-node-data', type=str,  metavar="NODE_DATA_JSON", help='name of JSON file to save clade assignments to')
     parser.add_argument('--membership-name', type=str, default="clade_membership", help='Key to store clade membership under; use "None" to not export this')

diff --git a/augur/curate/format_dates.py b/augur/curate/format_dates.py
@@ -7,17 +7,18 @@
 """
 import re
 from datetime import datetime
+from textwrap import dedent
 
-from augur.argparse_ import SKIP_AUTO_DEFAULT_IN_HELP
+from augur.argparse_ import ExtendOverwriteDefault, SKIP_AUTO_DEFAULT_IN_HELP
 from augur.errors import AugurError
 from augur.io.print import print_err
 from augur.types import DataErrorMethod
 from .format_dates_directives import YEAR_DIRECTIVES, YEAR_MONTH_DIRECTIVES, YEAR_MONTH_DAY_DIRECTIVES
 
 
-# Default date formats that this command should parse
+# Builtin date formats that this command should parse
 # without additional input from the user.
-DEFAULT_EXPECTED_DATE_FORMATS = [
+BUILTIN_DATE_FORMATS = [
     '%Y-%m-%d',
     '%Y-%m-XX',
     '%Y-XX-XX',
@@ -31,16 +32,19 @@ def register_parser(parent_subparsers):
         help=__doc__)
 
     required = parser.add_argument_group(title="REQUIRED")
-    required.add_argument("--date-fields", nargs="+", action="extend",
+    required.add_argument("--date-fields", nargs="+", action=ExtendOverwriteDefault,
         help="List of date field names in the record that need to be standardized.")
 
     optional = parser.add_argument_group(title="OPTIONAL")
-    optional.add_argument("--expected-date-formats", nargs="+", action="extend",
-        default=DEFAULT_EXPECTED_DATE_FORMATS,
-        help="Expected date formats that are currently in the provided date fields, " +
-             "defined by standard format codes as listed at " +
-             "https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes. " +
-             "If a date string matches multiple formats, it will be parsed as the first matched format in the provided order.")
+    optional.add_argument("--expected-date-formats", nargs="+", action=ExtendOverwriteDefault,
+        help=dedent(f"""\
+            Custom date formats for values in the provided date fields, defined by standard
+            format codes available at
+            <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes>.
+            If a value matches multiple formats, it will be parsed using the first match.
+            The following formats are builtin and automatically used:
+            {", ".join(repr(x).replace("%", "%%") for x in BUILTIN_DATE_FORMATS)}.
+            User-provided values are considered after the builtin formats."""))
     optional.add_argument("--failure-reporting",
         type=DataErrorMethod.argtype,
         choices=list(DataErrorMethod),
@@ -182,10 +186,14 @@ def format_date(date_string, expected_formats):
 
 
 def run(args, records):
+    expected_date_formats = BUILTIN_DATE_FORMATS
+    if args.expected_date_formats:
+        expected_date_formats.extend(args.expected_date_formats)
+
     failures = []
     failure_reporting = args.failure_reporting
     failure_suggestion = (
-        f"\nCurrent expected date formats are {args.expected_date_formats!r}. " +
+        f"\nCurrent expected date formats are {expected_date_formats!r}. " +
         "This can be updated with --expected-date-formats."
     )
     for index, record in enumerate(records):
@@ -198,7 +206,7 @@ def run(args, records):
             if date_string is None:
                 raise AugurError(f"Expected date field {field!r} not found in record {record_id!r}.")
 
-            formatted_date_string = format_date(date_string, args.expected_date_formats)
+            formatted_date_string = format_date(date_string, expected_date_formats)
             if formatted_date_string is None:
                 # Mask failed date formatting before processing error methods
                 # to ensure failures are masked even when failures are "silent"

diff --git a/augur/curate/rename.py b/augur/curate/rename.py
@@ -4,6 +4,7 @@
 
 from typing import Iterable, Literal, Union, List, Tuple
 import argparse
+from augur.argparse_ import ExtendOverwriteDefault
 from augur.io.print import print_err
 from augur.errors import AugurError
 
@@ -13,7 +14,7 @@ def register_parser(parent_subparsers):
     help = __doc__)
 
     required = parser.add_argument_group(title="REQUIRED")
-    required.add_argument("--field-map", nargs="+", required=True,
+    required.add_argument("--field-map", nargs="+", action=ExtendOverwriteDefault, required=True,
         help="Rename fields/columns via '{old_field_name}={new_field_name}'. " +
              "If the new field already exists, then the renaming of the old field will be skipped. " +
              "Multiple entries with the same '{old_field_name}' will duplicate the field/column. " +

diff --git a/augur/curate/titlecase.py b/augur/curate/titlecase.py
@@ -4,6 +4,7 @@
 import re
 from typing import Optional, Set, Union
 
+from augur.argparse_ import ExtendOverwriteDefault
 from augur.errors import AugurError
 from augur.io.print import print_err
 from augur.types import DataErrorMethod
@@ -14,13 +15,13 @@ def register_parser(parent_subparsers):
     help = __doc__)
 
     required = parser.add_argument_group(title="REQUIRED")
-    required.add_argument("--titlecase-fields", nargs="*", action="extend",
+    required.add_argument("--titlecase-fields", nargs="*", action=ExtendOverwriteDefault,
         help="List of fields to convert to titlecase.", required=True)
 
     optional = parser.add_argument_group(title="OPTIONAL")
-    optional.add_argument("--articles", nargs="*", action="extend",
+    optional.add_argument("--articles", nargs="*", action=ExtendOverwriteDefault,
         help="List of articles that should not be converted to titlecase.")
-    optional.add_argument("--abbreviations", nargs="*", action="extend",
+    optional.add_argument("--abbreviations", nargs="*", action=ExtendOverwriteDefault,
         help="List of abbreviations that should not be converted to titlecase, keeps uppercase.")
 
     optional.add_argument("--failure-reporting",

diff --git a/augur/curate/transform_strain_name.py b/augur/curate/transform_strain_name.py
@@ -7,6 +7,7 @@
 import argparse
 import re
 from typing import Generator, List
+from augur.argparse_ import ExtendOverwriteDefault
 from augur.io.print import print_err
 from augur.utils import first_line
 
@@ -55,6 +56,7 @@ def register_parser(
     parser.add_argument(
         "--backup-fields",
         nargs="*",
+        action=ExtendOverwriteDefault,
         default=[],
         help="List of backup fields to use as strain name if the value in 'strain' "
         + "does not match the strain regex pattern. "