Skip to content

Commit

Permalink
Closes #71 MSA is no longer based on TransposonLayout. Removed 'trans…
Browse files Browse the repository at this point in the history
…poson' fluentdna.layout option. Code cleanup. Ran full test suite.
  • Loading branch information
josiahseaman committed Dec 10, 2018
1 parent 97ab818 commit 07e78e9
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 30 deletions.
5 changes: 0 additions & 5 deletions DDV/AnnotationAlignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,9 @@
filter_repeats_by_chromosome_and_family
from DDV.Span import alignment_chopping_index, AlignedSpans, Span
from DDV.TransposonLayout import TransposonLayout

from DDV.DDVUtils import make_output_dir_with_suffix


# class AnnotationAlignment(object):
# def __init__(self):


def create_aligned_annotation_fragments(alignment, repeat_entries):
aligned_repeats = [] # list of AlignedSpans
consensus_width = max_consensus_width(repeat_entries)
Expand Down
18 changes: 9 additions & 9 deletions DDV/MultipleAlignmentLayout.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
from PIL import Image, ImageDraw

import math
from DDV.TileLayout import hex_to_rgb
from DDV.TileLayout import hex_to_rgb, TileLayout
from natsort import natsorted

from DDV.TransposonLayout import TransposonLayout
from Layouts import level_layout_factory


Expand All @@ -24,13 +23,14 @@ def fastas_in_folder(input_fasta_folder):



class MultipleAlignmentLayout(TransposonLayout):
class MultipleAlignmentLayout(TileLayout):
def __init__(self, sort_contigs=False, **kwargs):
kwargs['low_contrast'] = True
kwargs['sort_contigs'] = True
super(MultipleAlignmentLayout, self).__init__(**kwargs)
self.all_contents = {} # (filename: contigs) output_fasta() determines order of fasta_sources
self.using_mixed_widths = True # we are processing all repeat types with different widths
self.current_column_height = 20
self.next_origin = [self.border_width, 30] # margin for titles, incremented each MSA
self.protein_palette = True
self.sort_contigs = sort_contigs
self.title_height_px = 10
Expand Down Expand Up @@ -90,7 +90,6 @@ def __init__(self, sort_contigs=False, **kwargs):


def process_all_alignments(self, input_fasta_folder, output_folder, output_file_name):
self.using_mixed_widths = True # we are processing all repeat types with different widths
start_time = datetime.now()
self.preview_all_files(input_fasta_folder)
self.calculate_mixed_layout()
Expand All @@ -115,11 +114,11 @@ def process_all_alignments(self, input_fasta_folder, output_folder, output_file_
print("Output Image in:", datetime.now() - start_time)


def draw_nucleotides(self):
def draw_nucleotides(self, verbose=False):
"""Layout a whole set of different repeat types with different widths. Column height is fixed,
but column width varies constantly. Wrapping to the next row is determined by hitting the
edge of the allocated image."""
super(TransposonLayout, self).draw_nucleotides()
super(MultipleAlignmentLayout, self).draw_nucleotides(verbose)


def calc_all_padding(self):
Expand Down Expand Up @@ -152,8 +151,9 @@ def prepare_image(self, image_length, width=None, height=None):
self.pixels = self.image.load()

def initialize_image_by_sequence_dimensions(self):
max_w = max([x.consensus_width for source in self.all_contents.values() for x in source])
max_h = max([len(source) for source in self.all_contents.values()])
margin = self.border_width*2
max_w = max([x.consensus_width for source in self.all_contents.values() for x in source]) + margin
max_h = max([len(source) + self.title_height_px for source in self.all_contents.values()]) + margin
#TODO check if max_h is too large and needs to be interrupted by layout: one full row
areas = []
for source in self.all_contents.values():
Expand Down
6 changes: 3 additions & 3 deletions DDV/TileLayout.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def draw_extras(self):
"""Placeholder method for child classes"""
pass

def draw_nucleotides(self):
def draw_nucleotides(self, verbose=True):
total_progress = 0
# Layout contigs one at a time
for contig_index, contig in enumerate(self.contigs):
Expand All @@ -239,9 +239,9 @@ def draw_nucleotides(self):
# if nuc != gap_char:
self.draw_pixel(nuc, x + i, y)
except IndexError:
print("Cursor fell off the image at", x,y)
print("Cursor fell off the image at", (x,y))
total_progress += contig.tail_padding # add trailing white space after the contig sequence body
if len(self.contigs) < 100 or contig_index % (len(self.contigs) // 100) == 0:
if verbose and (len(self.contigs) < 100 or contig_index % (len(self.contigs) // 100) == 0):
print(str(total_progress / self.image_length * 100)[:4], '% done:', contig.name,
flush=True) # pseudo progress bar
print('')
Expand Down
17 changes: 13 additions & 4 deletions DDV/TransposonLayout.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
""" DEPRECATION WARNING:
This file is currently unused by FluentDNA. The original purpose was to show
a variety of MSA pulled from RepeatMasker annotations across the genome.
The intent was to visualize the diversity and abundance at each site.
All the reusable functionality of TransposonLayout has been moved to MultipleAlignmentLayout.
A fairly small script could process the repeatMasker annotation file into a folder of fasta MSA
that could be visualized with MultipleAlignmentLayout.
The crucial values are the within repeat coordinates rep_end. I found experimentally
that rooting the MSA on the last nucleotide of each line (not the start) gave the most
coherent MSA."""

from __future__ import print_function, division, absolute_import, \
with_statement, generators, nested_scopes
import math
import traceback

import sys
from DNASkittleUtils.DDVUtils import editable_str
from collections import defaultdict
from datetime import datetime
Expand All @@ -12,15 +22,14 @@
from DNASkittleUtils.DDVUtils import rev_comp
from DDV.RepeatAnnotations import read_repeatmasker_csv, max_consensus_width, blank_line_array
from DDV.TileLayout import TileLayout
from DDV.Layouts import LayoutLevel, level_layout_factory
from DDV import gap_char


class TransposonLayout(TileLayout):
def __init__(self, **kwargs):
# print("Warning: Transposon Layout is an experimental feature not currently supported.",
# file=sys.stderr)
kwargs.update({'sort_contigs': True}) # important for mega row heigh handling
kwargs.update({'sort_contigs': True}) # important for mega row height handling
super(TransposonLayout, self).__init__(**kwargs)
self.repeat_entries = None
self.current_column_height = 20
Expand Down Expand Up @@ -102,7 +111,7 @@ def filter_simple_repeats(self, return_only_simple_repeats=False):



def draw_nucleotides(self):
def draw_nucleotides(self, verbose=True):
processed_contigs = self.create_repeat_fasta_contigs()
print("Finished creating contigs")
self.contigs = processed_contigs # TODO: overwriting self.contigs isn't really great data management
Expand Down
9 changes: 0 additions & 9 deletions DDV/fluentdna.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
from DDV.UniqueOnlyChainParser import UniqueOnlyChainParser
from DDV.AnnotatedAlignment import AnnotatedAlignment
from DDV.TileLayout import TileLayout
from DDV.TransposonLayout import TransposonLayout
from DDV.MultipleAlignmentLayout import MultipleAlignmentLayout
from DNASkittleUtils.Contigs import write_contigs_to_file, read_contigs

Expand Down Expand Up @@ -150,14 +149,6 @@ def ddv(args):
done(args, args.output_dir)

# ==========TODO: separate views that support batches of contigs============= #
elif args.layout == 'transposon':
layout = TransposonLayout()
# if len(args.contigs) != 1:
# raise NotImplementedError("Chromosome Argument requires exactly one chromosome e.g. '--contigs chr12'")
layout.process_all_repeats(args.fasta, args.output_dir, just_the_name(args.output_dir), args.ref_annotation, args.contigs)
print("Done with Transposons")
done(args, args.output_dir)

elif args.layout == 'alignment':
layout = MultipleAlignmentLayout(sort_contigs=args.sort_contigs)
layout.process_all_alignments(args.fasta,
Expand Down

0 comments on commit 07e78e9

Please sign in to comment.