Skip to content

Commit

Permalink
add support for custom plot labels
Browse files Browse the repository at this point in the history
  • Loading branch information
oschwengers committed Nov 11, 2024
1 parent 9da23ba commit 01de8ea
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ It accepts the results of a former annotation process in JSON format and allows
### Usage
```bash
usage: bakta_plot [--config CONFIG] [--output OUTPUT] [--prefix PREFIX] [--sequences SEQUENCES] [--type {features,cog}] [--help] [--verbose] [--debug] [--tmp-dir TMP_DIR] [--version] <input>
usage: bakta_plot [--config CONFIG] [--output OUTPUT] [--prefix PREFIX] [--sequences SEQUENCES] [--type {features,cog}] [--label LABEL] [--help] [--verbose] [--debug] [--tmp-dir TMP_DIR] [--version] <input>
Rapid & standardized annotation of bacterial genomes, MAGs & plasmids
Expand All @@ -752,6 +752,7 @@ Plotting:
Sequences to plot: comma separated number or name (default = all, numbers one-based)
--type {features,cog}
Plot type: feature/cog (default = features)
--label LABEL Plot center label (for line breaks use '|')
General:
--help, -h Show this help message and exit
Expand All @@ -778,6 +779,12 @@ In the `cog` mode, all protein-coding genes (CDS) are colored due to assigned CO

In addition, both plot types share two innermost GC content and GC skew rings. The first ring represents the GC content per sliding window over the entire sequence(s) in green (`#33a02c`) and red `#e31a1c` representing GC above and below average, respectively. The 2nd ring represents the GC skew in orange (`#fdbf6f`) and blue (`#1f78b4`). The GC skew gives hints on a replicon's replication bubble and hence, on the completeness of the assembly. On a complete & circular bacterial chromosome, you normally see two inflection points at the origin of replication and at its opposite region -> [Wikipedia](https://en.wikipedia.org/wiki/GC_skew)
Custom plot labels (text in the center) can be provided via `--label`:
```bash
bakta_plot --sequences 2 --label="line 1|line 2|line 3" input.json
```
## Auxiliary scripts
Often, the usage of Bakta is a necessary upfront task followed by deeper analyses implemented in custom scripts. In [scripts](scripts) we'd like to collect & offer a pool of scripts addressing common tasks:
Expand Down
13 changes: 7 additions & 6 deletions bakta/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def main():
arg_group_plot = parser.add_argument_group('Plotting')
arg_group_plot.add_argument('--sequences', action='store', default='all', help='Sequences to plot: comma separated number or name (default = all, numbers one-based)')
arg_group_plot.add_argument('--type', action='store', type=str, default=bc.PLOT_FEATURES, choices=[bc.PLOT_FEATURES, bc.PLOT_COG], help=f'Plot type (default = {bc.PLOT_FEATURES})')
arg_group_plot.add_argument('--label', action='store', type=str, default=None, help=f"Plot center label (for line breaks use '|')")

arg_group_general = parser.add_argument_group('General')
arg_group_general.add_argument('--help', '-h', action='help', help='Show this help message and exit')
Expand Down Expand Up @@ -198,7 +199,7 @@ def main():
print('Draw plots...')
if args.sequences == 'all': # write whole genome plot
print(f'\tdraw circular genome plot (type={plot_type}) containing all sequences...')
write(data, features, output_path, colors, plot_type=plot_type)
write(data, features, output_path, colors, plot_type=plot_type, plot_label=args.label)
else: # write genome plot containing provided sequences only
plot_sequences = []
sequence_identifiers = []
Expand All @@ -217,10 +218,10 @@ def main():
plot_sequence_ids = [seq['id'] for seq in plot_sequences]
data['features'] = [feat for feat in features if feat['sequence'] in plot_sequence_ids] # reduce feature list in data object
data['sequences'] = [seq for seq in sequences if seq['id'] in plot_sequence_ids] # reduce sequence list in data object
write(data, features, output_path, colors, plot_name_suffix=plot_name_suffix, plot_type=plot_type)
write(data, features, output_path, colors, plot_name_suffix=plot_name_suffix, plot_type=plot_type, plot_label=args.label)


def write(data, features, output_path, colors=COLORS, plot_name_suffix=None, plot_type=bc.PLOT_FEATURES):
def write(data, features, output_path, colors=COLORS, plot_name_suffix=None, plot_type=bc.PLOT_FEATURES, plot_label=None):
sequence_list = insdc.build_biopython_sequence_list(data, features)
for seq in sequence_list: # fix edge features because PyCirclize cannot handle them correctly
seq.features = [feat for feat in seq.features if feat.type != 'gene' and feat.type != 'source']
Expand All @@ -239,13 +240,13 @@ def write(data, features, output_path, colors=COLORS, plot_name_suffix=None, plo
feat.location = FeatureLocation(feat_loc.start, int(str(feat_loc.end)[1:]), strand=feat.strand)

# build lable
plot_lable = build_label(data)
plot_label = build_label(data) if plot_label is None else plot_label.replace('|', '\n')

# select style
if plot_type == bc.PLOT_COG:
plot = build_features_type_cog(data, sequence_list, plot_lable, colors)
plot = build_features_type_cog(data, sequence_list, plot_label, colors)
else:
plot = build_features_type_feature(data, sequence_list, plot_lable, colors)
plot = build_features_type_feature(data, sequence_list, plot_label, colors)
file_name = cfg.prefix if plot_name_suffix is None else f'{cfg.prefix}_{plot_name_suffix}'
for file_type in ['png', 'svg']:
file_path = output_path.joinpath(f'{file_name}.{file_type}')
Expand Down

0 comments on commit 01de8ea

Please sign in to comment.