diff --git a/README.md b/README.md
index 8cca3546..c4a71956 100644
--- a/README.md
+++ b/README.md
@@ -732,7 +732,7 @@ It accepts the results of a former annotation process in JSON format and allows
### Usage
```bash
-usage: bakta_plot [--config CONFIG] [--output OUTPUT] [--prefix PREFIX] [--sequences SEQUENCES] [--type {features,cog}] [--help] [--verbose] [--debug] [--tmp-dir TMP_DIR] [--version]
+usage: bakta_plot [--config CONFIG] [--output OUTPUT] [--prefix PREFIX] [--sequences SEQUENCES] [--type {features,cog}] [--label LABEL] [--help] [--verbose] [--debug] [--tmp-dir TMP_DIR] [--version]
Rapid & standardized annotation of bacterial genomes, MAGs & plasmids
@@ -752,6 +752,7 @@ Plotting:
Sequences to plot: comma separated number or name (default = all, numbers one-based)
--type {features,cog}
Plot type: feature/cog (default = features)
+ --label LABEL Plot center label (for line breaks use '|')
General:
--help, -h Show this help message and exit
@@ -778,6 +779,12 @@ In the `cog` mode, all protein-coding genes (CDS) are colored due to assigned CO
In addition, both plot types share two innermost GC content and GC skew rings. The first ring represents the GC content per sliding window over the entire sequence(s) in green (`#33a02c`) and red `#e31a1c` representing GC above and below average, respectively. The 2nd ring represents the GC skew in orange (`#fdbf6f`) and blue (`#1f78b4`). The GC skew gives hints on a replicon's replication bubble and hence, on the completeness of the assembly. On a complete & circular bacterial chromosome, you normally see two inflection points at the origin of replication and at its opposite region -> [Wikipedia](https://en.wikipedia.org/wiki/GC_skew)
+Custom plot labels (text in the center) can be provided via `--label`:
+
+```bash
+bakta_plot --sequences 2 --label="line 1|line 2|line 3" input.json
+```
+
## Auxiliary scripts
Often, the usage of Bakta is a necessary upfront task followed by deeper analyses implemented in custom scripts. In [scripts](scripts) we'd like to collect & offer a pool of scripts addressing common tasks:
diff --git a/bakta/plot.py b/bakta/plot.py
index 816b662a..0875c53b 100644
--- a/bakta/plot.py
+++ b/bakta/plot.py
@@ -89,6 +89,7 @@ def main():
arg_group_plot = parser.add_argument_group('Plotting')
arg_group_plot.add_argument('--sequences', action='store', default='all', help='Sequences to plot: comma separated number or name (default = all, numbers one-based)')
arg_group_plot.add_argument('--type', action='store', type=str, default=bc.PLOT_FEATURES, choices=[bc.PLOT_FEATURES, bc.PLOT_COG], help=f'Plot type (default = {bc.PLOT_FEATURES})')
+ arg_group_plot.add_argument('--label', action='store', type=str, default=None, help=f"Plot center label (for line breaks use '|')")
arg_group_general = parser.add_argument_group('General')
arg_group_general.add_argument('--help', '-h', action='help', help='Show this help message and exit')
@@ -198,7 +199,7 @@ def main():
print('Draw plots...')
if args.sequences == 'all': # write whole genome plot
print(f'\tdraw circular genome plot (type={plot_type}) containing all sequences...')
- write(data, features, output_path, colors, plot_type=plot_type)
+ write(data, features, output_path, colors, plot_type=plot_type, plot_label=args.label)
else: # write genome plot containing provided sequences only
plot_sequences = []
sequence_identifiers = []
@@ -217,10 +218,10 @@ def main():
plot_sequence_ids = [seq['id'] for seq in plot_sequences]
data['features'] = [feat for feat in features if feat['sequence'] in plot_sequence_ids] # reduce feature list in data object
data['sequences'] = [seq for seq in sequences if seq['id'] in plot_sequence_ids] # reduce sequence list in data object
- write(data, features, output_path, colors, plot_name_suffix=plot_name_suffix, plot_type=plot_type)
+ write(data, features, output_path, colors, plot_name_suffix=plot_name_suffix, plot_type=plot_type, plot_label=args.label)
-def write(data, features, output_path, colors=COLORS, plot_name_suffix=None, plot_type=bc.PLOT_FEATURES):
+def write(data, features, output_path, colors=COLORS, plot_name_suffix=None, plot_type=bc.PLOT_FEATURES, plot_label=None):
sequence_list = insdc.build_biopython_sequence_list(data, features)
for seq in sequence_list: # fix edge features because PyCirclize cannot handle them correctly
seq.features = [feat for feat in seq.features if feat.type != 'gene' and feat.type != 'source']
@@ -239,13 +240,13 @@ def write(data, features, output_path, colors=COLORS, plot_name_suffix=None, plo
feat.location = FeatureLocation(feat_loc.start, int(str(feat_loc.end)[1:]), strand=feat.strand)
# build lable
- plot_lable = build_label(data)
+ plot_label = build_label(data) if plot_label is None else plot_label.replace('|', '\n')
# select style
if plot_type == bc.PLOT_COG:
- plot = build_features_type_cog(data, sequence_list, plot_lable, colors)
+ plot = build_features_type_cog(data, sequence_list, plot_label, colors)
else:
- plot = build_features_type_feature(data, sequence_list, plot_lable, colors)
+ plot = build_features_type_feature(data, sequence_list, plot_label, colors)
file_name = cfg.prefix if plot_name_suffix is None else f'{cfg.prefix}_{plot_name_suffix}'
for file_type in ['png', 'svg']:
file_path = output_path.joinpath(f'{file_name}.{file_type}')