From 3d8bed9885feae7d55851c72fd15269b5af95567 Mon Sep 17 00:00:00 2001 From: Trevor Bedford Date: Fri, 26 Jul 2024 13:15:23 -0700 Subject: [PATCH] Allow build-specific --narrow-bandwidth param in frequencies There was a bug in the current behavior where the rule `frequencies` was calling `config["frequencies"]["narrow_bandwidth"]`. This resulted in --narrow-bandwidth to be fixed to whatever was specified in parameters.yaml. However, we were relying on build-specific settings to differentiate behavior in the all-time vs 6m vs 2m vs 1m builds, ala: ``` frequencies global_1m: narrow_bandwidth: 0.019 ``` This commit fixes this issue and allows overriding of narrow_bandwidth in parameters.yaml by build-specific settings. It also provides a genuine default (equal to augur default) so that parameters.yaml doesn't have to always specify narrow_bandwidth. --- .../nextstrain-gisaid/builds.yaml | 29 ------------------- workflow/snakemake_rules/common.smk | 11 +++++++ workflow/snakemake_rules/main_workflow.smk | 2 +- 3 files changed, 12 insertions(+), 30 deletions(-) diff --git a/nextstrain_profiles/nextstrain-gisaid/builds.yaml b/nextstrain_profiles/nextstrain-gisaid/builds.yaml index ab07dcc63..cdb06d629 100644 --- a/nextstrain_profiles/nextstrain-gisaid/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid/builds.yaml @@ -890,116 +890,87 @@ frequencies: reference: min_date: "2020-01-01" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 global_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 global_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 global_6m: min_date: "6M" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 global_all-time: min_date: "2020-01-01" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 africa_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 africa_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 africa_6m: min_date: "6M" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 africa_all-time: min_date: "2020-01-01" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 asia_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 asia_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 asia_6m: min_date: "6M" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 asia_all-time: min_date: "2020-01-01" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 europe_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 europe_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 europe_6m: min_date: "6M" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 europe_all-time: min_date: "2020-01-01" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 north-america_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 north-america_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 north-america_6m: min_date: "6M" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 north-america_all-time: min_date: "2020-01-01" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 oceania_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 oceania_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 oceania_6m: min_date: "6M" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 oceania_all-time: min_date: "2020-01-01" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 south-america_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 south-america_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 south-america_6m: min_date: "6M" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 south-america_all-time: min_date: "2020-01-01" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 diff --git a/workflow/snakemake_rules/common.smk b/workflow/snakemake_rules/common.smk index 0c2713950..022bdd8a7 100644 --- a/workflow/snakemake_rules/common.smk +++ b/workflow/snakemake_rules/common.smk @@ -207,6 +207,17 @@ def _get_max_date_for_frequencies(wildcards): datetime.date.today() - offset ) +def _get_narrow_bandwidth_for_wildcards(wildcards): + # check if builds.yaml contains frequencies:{build_name}:narrow_bandwidth + if wildcards.build_name in config["frequencies"] and 'narrow_bandwidth' in config["frequencies"][wildcards.build_name]: + return config["frequencies"][wildcards.build_name]["narrow_bandwidth"] + # check if parameters.yaml contains frequencies:narrow_bandwidth + elif "frequencies" in config and "narrow_bandwidth" in config["frequencies"]: + return config["frequencies"]["narrow_bandwidth"] + # else return augur frequencies default value + else: + return 0.0833 + def _get_upload_inputs(wildcards): # Do whatever the configuration says if it has opinions. if "upload" in config: diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk index 2bd018d25..10c19542f 100644 --- a/workflow/snakemake_rules/main_workflow.smk +++ b/workflow/snakemake_rules/main_workflow.smk @@ -1164,7 +1164,7 @@ rule tip_frequencies: max_date = _get_max_date_for_frequencies, pivot_interval = config["frequencies"]["pivot_interval"], pivot_interval_units = config["frequencies"]["pivot_interval_units"], - narrow_bandwidth = config["frequencies"]["narrow_bandwidth"], + narrow_bandwidth = _get_narrow_bandwidth_for_wildcards, proportion_wide = config["frequencies"]["proportion_wide"] resources: # Memory use scales primarily with the size of the metadata file.