diff --git a/CHANGELOG.md b/CHANGELOG.md index a5832ce4..5f56d988 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#251](https://github.com/nf-core/funcscan/pull/251) Added annotation tool: Pyrodigal. (by @jasmezz) - [#252](https://github.com/nf-core/funcscan/pull/252) Added a new parameter `-arg_rgi_savejson` that saves the file `.json` in the RGI directory. The default ouput for RGI is now only `.txt`. (by @darcy220606) - [#253](https://github.com/nf-core/funcscan/pull/253) Updated Prodigal to have compressed output files. (by @jasmezz) -- [#258](https://github.com/nf-core/funcscan/pull/258) Added comBGC function to screen whole directory of antiSMASH output (one subfolder per sample). (by @jasmezz) ### `Fixed` diff --git a/bin/comBGC.py b/bin/comBGC.py index 53cd64a6..ee703619 100755 --- a/bin/comBGC.py +++ b/bin/comBGC.py @@ -32,7 +32,7 @@ SOFTWARE. """ -tool_version = "0.6.0" +tool_version = "0.5" welcome = """\ ........................ * comBGC v.{version} * @@ -61,9 +61,7 @@ these can be: - antiSMASH: .gbk and (optional) knownclusterblast/ directory - DeepBGC: .bgc.tsv -- GECCO: .clusters.tsv -Note: Please provide files from a single sample only. If you would like to -summarize multiple samples, please see the --antismash_multiple_samples flag.""", +- GECCO: .clusters.tsv""", ) parser.add_argument( "-o", @@ -75,16 +73,6 @@ type=str, default=".", ) -parser.add_argument( - "-a", - "--antismash_multiple_samples", - metavar="PATH", - dest="antismash_multiple_samples", - nargs="?", - help="""directory of antiSMASH output. Should contain subfolders (one per -sample). Can only be used if --input is not specified.""", - type=str, -) parser.add_argument("-vv", "--verbose", help="increase output verbosity", action="store_true") parser.add_argument("-v", "--version", help="show version number and exit", action="store_true") @@ -93,7 +81,6 @@ # Assign input arguments to variables input = args.input -dir_antismash = args.antismash_multiple_samples outdir = args.outdir verbose = args.verbose version = args.version @@ -124,13 +111,8 @@ elif path.endswith("knownclusterblast/"): input_antismash.append(path) -if input and dir_antismash: - exit( - "The flags --input and --antismash_multiple_samples are mutually exclusive.\nPlease use only one of them (or see --help for how to use)." - ) - # Make sure that at least one input argument is given -if not (input_antismash or input_gecco or input_deepbgc or dir_antismash): +if not (input_antismash or input_gecco or input_deepbgc): exit("Please specify at least one input file (i.e. output from antismash, deepbgc, or gecco) or see --help") ######################## @@ -138,24 +120,6 @@ ######################## -def prepare_multisample_input_antismash(antismash_dir): - """ - Prepare string of input paths of a given antiSMASH output folder (with sample subdirectories) - """ - sample_paths = [] - for root, subdirs, files in os.walk(antismash_dir): - antismash_file = "/".join([root, "index.html"]) - if os.path.exists(antismash_file): - sample = root.split("/")[-1] - gbk_path = "/".join([root, sample]) + ".gbk" - kkb_path = "/".join([root, "knownclusterblast"]) - if os.path.exists(kkb_path): - sample_paths.append([gbk_path, kkb_path]) - else: - sample_paths.append([gbk_path]) - return sample_paths - - def parse_knownclusterblast(kcb_file_path): """ Extract MIBiG IDs from knownclusterblast TXT file. @@ -184,6 +148,9 @@ def antismash_workflow(antismash_paths): - Return data frame with aggregated info. """ + if verbose: + print("\nParsing antiSMASH files\n... ", end="") + antismash_sum_cols = [ "Sample_ID", "Prediction_tool", @@ -219,9 +186,6 @@ def antismash_workflow(antismash_paths): # Aggregate information Sample_ID = gbk_path.split("/")[-1].split(".gbk")[-2] # Assuming file name equals sample name - if verbose: - print("\nParsing antiSMASH file(s): " + Sample_ID + "\n... ", end="") - with open(gbk_path) as gbk: for record in SeqIO.parse(gbk, "genbank"): # GBK records are contigs in this case # Initiate variables per contig @@ -550,13 +514,7 @@ def gecco_workflow(gecco_paths): ######################## if __name__ == "__main__": - if input_antismash: - tools = {"antiSMASH": input_antismash, "deepBGC": input_deepbgc, "GECCO": input_gecco} - elif dir_antismash: - tools = {"antiSMASH": dir_antismash} - else: - tools = {"deepBGC": input_deepbgc, "GECCO": input_gecco} - + tools = {"antiSMASH": input_antismash, "deepBGC": input_deepbgc, "GECCO": input_gecco} tools_provided = {} for tool in tools.keys(): @@ -574,13 +532,7 @@ def gecco_workflow(gecco_paths): for tool in tools_provided.keys(): if tool == "antiSMASH": - if dir_antismash: - antismash_paths = prepare_multisample_input_antismash(dir_antismash) - for input_antismash in antismash_paths: - summary_antismash_temp = antismash_workflow(input_antismash) - summary_antismash = pd.concat([summary_antismash, summary_antismash_temp]) - else: - summary_antismash = antismash_workflow(input_antismash) + summary_antismash = antismash_workflow(input_antismash) elif tool == "deepBGC": summary_deepbgc = deepbgc_workflow(input_deepbgc) elif tool == "GECCO":