From 992c2c0c46c514cc93523fd615b81b38c4d02223 Mon Sep 17 00:00:00 2001 From: Matt Rasmussen Date: Wed, 17 Jul 2013 20:50:23 -0700 Subject: [PATCH] working on including all dependencies from rasmus and compbio --- arghmm/dep.py | 7 + arghmm/deps/__init__.py | 0 arghmm/deps/compbio/__init__.py | 7 + arghmm/deps/compbio/__init__.pyc | Bin 0 -> 257 bytes arghmm/deps/compbio/alignlib.py | 694 ++++++ arghmm/deps/compbio/alignlib.pyc | Bin 0 -> 18596 bytes arghmm/deps/compbio/arglib.py | 3163 +++++++++++++++++++++++++++ arghmm/deps/compbio/arglib.pyc | Bin 0 -> 71323 bytes arghmm/deps/compbio/birthdeath.py | 425 ++++ arghmm/deps/compbio/birthdeath.pyc | Bin 0 -> 10518 bytes arghmm/deps/compbio/coal.py | 2254 +++++++++++++++++++ arghmm/deps/compbio/coal.pyc | Bin 0 -> 65385 bytes arghmm/deps/compbio/fasta.py | 361 +++ arghmm/deps/compbio/fasta.pyc | Bin 0 -> 10403 bytes arghmm/deps/compbio/phylo.py | 2769 +++++++++++++++++++++++ arghmm/deps/compbio/phylo.pyc | Bin 0 -> 71729 bytes arghmm/deps/compbio/seqlib.py | 445 ++++ arghmm/deps/compbio/seqlib.pyc | Bin 0 -> 17163 bytes arghmm/deps/compbio/vis/__init__.py | 0 arghmm/deps/compbio/vis/argvis.py | 1029 +++++++++ arghmm/deps/rasmus/__init__.py | 2 + arghmm/deps/rasmus/__init__.pyc | Bin 0 -> 150 bytes arghmm/deps/rasmus/hmm.py | 351 +++ arghmm/deps/rasmus/hmm.pyc | Bin 0 -> 9368 bytes arghmm/deps/rasmus/intervals.py | 262 +++ arghmm/deps/rasmus/intervals.pyc | Bin 0 -> 6631 bytes arghmm/deps/rasmus/linked_list.py | 220 ++ arghmm/deps/rasmus/linked_list.pyc | Bin 0 -> 6605 bytes arghmm/deps/rasmus/plotting.py | 314 +++ arghmm/deps/rasmus/plotting.pyc | Bin 0 -> 7823 bytes arghmm/deps/rasmus/sets.py | 128 ++ arghmm/deps/rasmus/stats.py | 1507 +++++++++++++ arghmm/deps/rasmus/stats.pyc | Bin 0 -> 46991 bytes arghmm/deps/rasmus/svg.py | 321 +++ arghmm/deps/rasmus/svg.pyc | Bin 0 -> 11408 bytes arghmm/deps/rasmus/tablelib.py | 1465 +++++++++++++ arghmm/deps/rasmus/tablelib.pyc | Bin 0 -> 31244 bytes arghmm/deps/rasmus/testing.py | 140 ++ arghmm/deps/rasmus/textdraw.py | 79 + arghmm/deps/rasmus/textdraw.pyc | Bin 0 -> 2783 bytes arghmm/deps/rasmus/timer.py | 241 ++ arghmm/deps/rasmus/timer.pyc | Bin 0 -> 10388 bytes arghmm/deps/rasmus/treelib.py | 2415 ++++++++++++++++++++ arghmm/deps/rasmus/treelib.pyc | Bin 0 -> 66688 bytes arghmm/deps/rasmus/util.py | 1910 ++++++++++++++++ arghmm/deps/rasmus/util.pyc | Bin 0 -> 60962 bytes setup/cp-deps.py | 19 + test/test_pylibs.py | 32 + 48 files changed, 20560 insertions(+) create mode 100644 arghmm/dep.py create mode 100644 arghmm/deps/__init__.py create mode 100644 arghmm/deps/compbio/__init__.py create mode 100644 arghmm/deps/compbio/__init__.pyc create mode 100644 arghmm/deps/compbio/alignlib.py create mode 100644 arghmm/deps/compbio/alignlib.pyc create mode 100644 arghmm/deps/compbio/arglib.py create mode 100644 arghmm/deps/compbio/arglib.pyc create mode 100644 arghmm/deps/compbio/birthdeath.py create mode 100644 arghmm/deps/compbio/birthdeath.pyc create mode 100644 arghmm/deps/compbio/coal.py create mode 100644 arghmm/deps/compbio/coal.pyc create mode 100644 arghmm/deps/compbio/fasta.py create mode 100644 arghmm/deps/compbio/fasta.pyc create mode 100644 arghmm/deps/compbio/phylo.py create mode 100644 arghmm/deps/compbio/phylo.pyc create mode 100644 arghmm/deps/compbio/seqlib.py create mode 100644 arghmm/deps/compbio/seqlib.pyc create mode 100644 arghmm/deps/compbio/vis/__init__.py create mode 100644 arghmm/deps/compbio/vis/argvis.py create mode 100644 arghmm/deps/rasmus/__init__.py create mode 100644 arghmm/deps/rasmus/__init__.pyc create mode 100644 arghmm/deps/rasmus/hmm.py create mode 100644 arghmm/deps/rasmus/hmm.pyc create mode 100644 arghmm/deps/rasmus/intervals.py create mode 100644 arghmm/deps/rasmus/intervals.pyc create mode 100644 arghmm/deps/rasmus/linked_list.py create mode 100644 arghmm/deps/rasmus/linked_list.pyc create mode 100644 arghmm/deps/rasmus/plotting.py create mode 100644 arghmm/deps/rasmus/plotting.pyc create mode 100644 arghmm/deps/rasmus/sets.py create mode 100644 arghmm/deps/rasmus/stats.py create mode 100644 arghmm/deps/rasmus/stats.pyc create mode 100644 arghmm/deps/rasmus/svg.py create mode 100644 arghmm/deps/rasmus/svg.pyc create mode 100644 arghmm/deps/rasmus/tablelib.py create mode 100644 arghmm/deps/rasmus/tablelib.pyc create mode 100644 arghmm/deps/rasmus/testing.py create mode 100644 arghmm/deps/rasmus/textdraw.py create mode 100644 arghmm/deps/rasmus/textdraw.pyc create mode 100644 arghmm/deps/rasmus/timer.py create mode 100644 arghmm/deps/rasmus/timer.pyc create mode 100644 arghmm/deps/rasmus/treelib.py create mode 100644 arghmm/deps/rasmus/treelib.pyc create mode 100644 arghmm/deps/rasmus/util.py create mode 100644 arghmm/deps/rasmus/util.pyc create mode 100755 setup/cp-deps.py create mode 100644 test/test_pylibs.py diff --git a/arghmm/dep.py b/arghmm/dep.py new file mode 100644 index 00000000..3e4cad6b --- /dev/null +++ b/arghmm/dep.py @@ -0,0 +1,7 @@ + +import sys, os + +def load_deps(dirname="deps"): + sys.path.append(os.path.realpath( + os.path.join(os.path.dirname(__file__), dirname))) + diff --git a/arghmm/deps/__init__.py b/arghmm/deps/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/arghmm/deps/compbio/__init__.py b/arghmm/deps/compbio/__init__.py new file mode 100644 index 00000000..66446352 --- /dev/null +++ b/arghmm/deps/compbio/__init__.py @@ -0,0 +1,7 @@ +""" + + The CompBio python module + + Various utilities for computational biology. + +""" diff --git a/arghmm/deps/compbio/__init__.pyc b/arghmm/deps/compbio/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1ef39a5ca359f0f6ef380476fdbe026b25dac425 GIT binary patch literal 257 zcmYL@%?iRW497b!O2K#Nai{g_L1gG%kckH`b#`@&tjkz4VUOukc=rj+m|`Hme3AU~ zd@tqd@;X@YwTNTN9weg>;>?Uh7SG0B#*k969V@4H=-VxzY6q&YQ67uFcGLKjcYu9R zJwXiwE~p!J%JM=_IRQ3#t*(GImO~=tJ|0zR&M>AxYfQ|k_#>oTYTuB)LaFo4R`77T o?eOj_1#Xm;_rFlMHt11`1++2H02!0;lM6nwp1$0ST7g}B0Nvd~`~Uy| literal 0 HcmV?d00001 diff --git a/arghmm/deps/compbio/alignlib.py b/arghmm/deps/compbio/alignlib.py new file mode 100644 index 00000000..7a27b61a --- /dev/null +++ b/arghmm/deps/compbio/alignlib.py @@ -0,0 +1,694 @@ +# +# alignlib.py +# Sequence and alignment functions +# + + +# python libs +import sys +from collections import defaultdict + +# rasmus libs +from rasmus import util + +# compbio libs +from . import fasta, seqlib +from seqlib import * + + + +#============================================================================= +# Alignment functions + + +def new_align(aln=None): + """Makes a new alignment object based on the given object + + given return + ----- ------ + dict FastaDict + other other + """ + + if aln is None: + return fasta.FastaDict() + elif isinstance(aln, SeqDict): + return type(aln)() + else: + return fasta.FastaDict() + + +def mapalign(aln, keyfunc=lambda x: x, valfunc=lambda x: x): + """Maps the keys and values of an alignment""" + + aln2 = new_align(aln) + for key, val in aln.iteritems(): + aln2[keyfunc(key)] = valfunc(val) + return aln2 + + +def subalign(aln, cols): + """Returns an alignment with a subset of the columns (cols)""" + + return mapalign(aln, valfunc=lambda x: "".join(util.mget(x, cols))) + + +def remove_empty_columns(aln): + """ + Removes any column from an alignment 'aln' that contains only gaps + + A new alignment is returned + """ + + ind = [] + seqs = aln.values() + for i in range(aln.alignlen()): + for seq in seqs: + if seq[i] != "-": + ind.append(i) + break + + return subalign(aln, ind) + + +def remove_gapped_columns(aln): + """Removes any column form an alignment 'aln' that contains a gap + + A new alignment is returned + """ + cols = zip(* aln.values()) + ind = util.find(lambda col: "-" not in col, cols) + return subalign(aln, ind) + + +def require_nseqs(aln, n): + """ + Keep only columns with atleast 'n' non gapped sequences + """ + + seqs = aln.values() + ind = [i for i in range(aln.alignlen()) + if sum(1 for seq in seqs if seq[i] != "-") >= n] + return subalign(aln, ind) + + +def get_seq_overlap(seq1, seq2): + """ + Count number of overlapping bases between two gapped sequences + """ + + overlap = 0 + for i in range(len(seq1)): + if seq1[i] != "-" and seq2[i] != "-": + overlap += 1 + + return overlap + + +def calc_conservation_string(aln): + """Returns a string of stars representing the conservation of an alignment""" + + percids = calc_conservation(aln) + + # find identity positions + identity = "" + for pid in percids: + if pid == 1: + identity += "*" + elif pid > .5: + identity += "." + else: + identity += " " + + return identity + + +def calc_conservation(aln): + """Returns a list of percent matching in each column of an alignment""" + + length = len(aln.values()[0]) + seqs = aln.values() + percids = [] + + # find identity positions + identity = "" + for i in xrange(length): + chars = util.hist_dict(util.cget(seqs, i)) + if "-" in chars: del chars["-"] + + if len(chars) == 0: + percids.append(0.0) + else: + pid = max(chars.values()) / float(len(aln)) + percids.append(pid) + return percids + + + +def print_align(aln, seqwidth = 59, spacing=2, extra=fasta.FastaDict(), + out=sys.stdout, order=None): + """Pretty print an alignment""" + + if order == None: + order = aln.keys() + + namewidth = max(map(len, order)) + spacing + + def mkname(name, namewidth): + name2 = name[:namewidth] + name2 += " " * (namewidth - len(name2)) + return name2 + + identity = calc_conservation_string(aln) + + # print alignment + for i in xrange(0, len(aln.values()[0]), seqwidth): + # print sequences + for name in order: + print >>out, "%s %s" % (mkname(name, namewidth), + aln[name][i:i+seqwidth]) + + # print extra + for name in extra.keys(): + print >>out, "%s %s" % (mkname(name, namewidth), + extra[name][i:i+seqwidth]) + + # print identity + print >>out, (" "*namewidth) + " " + identity[i:i+seqwidth] + print >>out + + +def revtranslate_align(aaseqs, dnaseqs, check=False, trim=False): + """Reverse translates aminoacid alignment into DNA alignment + + Must supply original ungapped DNA. + """ + + align = new_align(aaseqs) + + for name, seq in aaseqs.iteritems(): + try: + dna = dnaseqs[name].upper() + dnalen = len(dna) + aalen = sum(int(a != "-") for a in seq) + + if len(dna) != aalen * 3: + if trim: + # make dna a multiple of three + dna = dna[:(len(dna) // 3) * 3] + + if len(dna) > aalen * 3: + # trim dna + dna = dna[:aalen*3] + else: + # trim peptide to match nucleotide + j = 0 + for i in xrange(len(seq)): + if seq[i] != '-': + j += 1 + if j > len(dna) // 3: + seq = seq[:i] + "-" * (len(seq) - i) + break + + aalen2 = sum(int(a != "-") for a in seq) + assert len(dna) == aalen2 * 3, ( + len(dna), aalen2 * 3) + + util.logger("trim dna (%d) and pep (%d)" % + (dnalen - len(dna), aalen - aalen2)) + + else: + # is last residue X? + for i in xrange(len(seq)-1, -1, -1): + if seq[i] == "-": + continue + if seq[i] == "X": + # repair + seq = seq[:i] + "-" * (len(seq)-i) + dna = dna[:-3] #-(len(dna) % 3)] + break + + + align[name] = revtranslate(seq, dna, check=check) + except TranslateError, e: + raise + + return align + + + + +#============================================================================= +# four fold degeneracy + + +def mark_codon_pos(seq, pos=0): + """ + return the codon position for each base in a gapped sequence + + codon + ATG + 012 + + gaps are given codon pos -1 + Ns are counted as bases + """ + + codons = [] + + for base in seq: + if base != "-": + codons.append(pos) + pos = (pos + 1) % 3 + else: + codons.append(-1) + + return codons + + +def make_codon_pos_align(aln): + """Get the codon position of every base in an alignment""" + + def func(seq): + dct = {-1: "-", + 0: "0", + 1: "1", + 2: "2"} + return "".join(util.mget(dct, mark_codon_pos(seq))) + return mapalign(aln, valfunc=func) + + +def find_aligned_codons(aln): + """Returns the columns indices of the alignment that represent aligned + codons. + """ + + ind = range(aln.alignlen()) + + # throw out codons with non mod 3 gaps + ind2 = [] + for i in range(0, aln.alignlen(), 3): + bad = False + + for key, val in aln.iteritems(): + codon = val[i:i+3] + if "-" in codon and codon != "---": + bad = True + break + + if not bad: + ind2.extend([i, i+1, i+2]) + + return ind2 + + + +def filter_aligned_codons(aln): + """filters an alignment for only aligned codons""" + + ind = find_align_codons(aln) + return subalign(aln, ind) + + +def find_four_fold(aln): + """Returns index of all columns in alignment that are completely + fourfold degenerate + + Assumes that columns are already filtered for aligned codons + """ + + # create peptide alignment + pepAln = mapalign(aln, valfunc=translate) + + # find peptide conservation + pepcons = [] + pep = [] + for i in xrange(pepAln.alignlen()): + # get a column from the peptide alignment + col = [seq[i] for seq in pepAln.itervalues()] + + # compute the histogram of the column. + # ignore gaps '-' and non-translated 'X' + hist = util.hist_dict(col) + if "-" in hist: + del hist["-"] + if "X" in hist: + del hist["X"] + + # column is conserved if only one AA appears + if len(hist) == 1: + pepcons.append(True) + pep.append(hist.keys()[0]) + else: + pepcons.append(False) + pep.append("X") + + + # find four-fold sites in conserved peptides + ind = [] + + for i in range(0, len(aln.values()[0]), 3): + # process only those columns that are conserved at the peptide level + if pepcons[i//3]: + degen = AA_DEGEN[pep[i//3]] + for j in range(3): + if degen[j] == 4: + ind.append(i+j) + return ind + + +def filter_four_fold(aln): + """returns an alignment of only four-fold degenerate sites from an + alignment of coding sequences + + This function performs the following steps: + + 1. remove all codon columns that don't have 0 or 3 gaps + 2. keep all codon columns that code for identical AA + 3. if the codon column codes for a 4D AA, then keep its 3rd position + """ + + aln_codons = filter_align_codons(aln) + ind = find_four_fold(aln_codons) + return subalign(aln_codons, ind) + + +def calc_four_fold_dist_matrix(aln): + names = aln.keys() + + mat = [] + # calc upper triangular + for i in range(len(names)): + mat.append([0.0] * (i+1)) + for j in range(i+1, len(names)): + ind = find_four_fold(aln.get([names[i], names[j]])) + + mismatches = 0 + for k in ind: + if aln[names[i]][k] != aln[names[j]][k]: + mismatches += 1 + + if len(ind) == 0: + mat[-1].append(1.0) + else: + mat[-1].append(mismatches / float(len(ind))) + + # make symmetric + for j in range(len(names)): + for i in range(j): + mat[j][i] = mat[i][j] + + return mat + + +def find_degen(aln): + """Determine the degeneracy of each column in an alignment""" + + codon_ind = find_align_codons(aln) + aln2 = subalign(aln, codon_ind) + + pep_aln = mapalign(aln2, valfunc=translate) + pep = pep_aln.values()[0] + identies = calc_conservation(pep_aln) + + degens = [-1] * aln.alignlen() + + for i in range(0, len(codon_ind), 3): + if pep[i/3] == "X": + continue + degen = AA_DEGEN[pep[i/3]] + if identies[i/3] == 1.0: + for j in range(3): + degens[codon_ind[i+j]] = degen[j] + + return degens + + +def make_degen_str(aln): + """Returns a string containing the degeneracy for each column + in an alignment + """ + + degens = find_degen(aln) + degenmap = {-1: " ", + 0: "0", + 1: "1", + 2: "2", + 3: "3", + 4: "4"} + + return "".join(util.mget(degenmap, degens)) + + +def print_degen(aln, **args): + """Pretty print an alignment with its degeneracy for each column""" + + extra = fasta.FastaDict() + extra["DEGEN"] = make_degen_str(aln) + + print_align(aln, extra=extra, **args) + + +#============================================================================= +# Position Specific Scoring Matrix (PSSM) + + +def align2pssm(aln, pseudocounts = {}): + pssm = [] + denom = float(len(aln)) + sum(pseudocounts.values()) + + for i in xrange(aln.alignlen()): + freqs = defaultdict(lambda: 0) + for j in xrange(len(aln)): + freqs[aln[j][i]] += 1 + + for key in pseudocounts: + freqs[key] += pseudocounts[key] + + for key in freqs: + freqs[key] = math.log(freqs[key] / denom, 2) + pssm.append(freqs) + return pssm + + +def pssmSeq(pssm, seq): + score = 0.0 + for i in xrange(len(seq)): + score += pssm[i][seq[i]] + return score + + + +#============================================================================= +# Coordinate conversions +# +# Coordinate systems +# +# 1. local +# 01234567 +# ATGCTGCG +# +# 2. align +# 012222345567 +# ATG---CTG-CG +# +# 3. global +# coordinate on chromosome on positive strand +# +# There should only be two kinds of indexing +# 1. 0-based, end exclusive (local/align coordinates) +# 2. 1-based, end inclusive (global coordinates) +# + + +class CoordConverter (object): + """Converts between coordinate systems on a gapped sequence""" + + def __init__(self, seq): + self.local2alignLookup = local2align(seq) + self.align2localLookup = align2local(seq) + + + def local2align(self, i, clamp=False): + if clamp: + return self.local2alignLookup[int(util.clamp(i, 0, + len(self.local2alignLookup)-1))] + else: + return self.local2alignLookup[i] + + + def align2local(self, i, clamp=False): + if clamp: + return self.align2localLookup[int(util.clamp(i, 0, + len(self.align2localLookup)-1))] + else: + return self.align2localLookup[i] + + + def global2local(self, gobal_coord, start, end, strand): + """Returns local coordinate in a global region""" + return global2local(gobal_coord, start, end, strand) + + + def local2global(self, local_coord, start, end, strand): + """Return global coordinate within a region from a local coordinate""" + local2global(local_coord, start, end, strand) + + + def global2align(self, global_coord, start, end, strand): + local_coord = global2local(global_coord, start, end, strand) + + # throw exception for out of bounds + if local_coord < 0 or \ + local_coord >= len(alignLookup): + raise Exception("coordinate outside [start, end]") + + return self.local2alignLookup[local_coord] + + + def align2global(self, align_coord, start, end, strand): + local_coord = self.align2localLookup[align_coord] + return local2global(local_coord, start, end, strand) + + + +def local2align(seq): + """ + Returns list of indices of non-gap characters + + 'ATG---CTG-CG' ==> [0,1,2,6,7,8,10,11] + + Used to go from local -> align space + """ + + lookup = [] + for i in xrange(len(seq)): + if seq[i] == "-": continue + lookup.append(i) + return lookup + + +def align2local(seq): + """ + Returns list such that + + 'ATG---CTG-CG' ==> [0,1,2,2,2,3,4,5,5,6,7] + + Used to go from align -> local space + """ + + i = -1 + lookup = [] + for c in seq: + if c != "-": + i += 1 + lookup.append(i) + return lookup + + + +def global2local(gobal_coord, start, end, strand): + """Returns local coordinate in a global region""" + + # swap if strands disagree + if strand == 1: + return gobal_coord - start + else: + return end - gobal_coord + + +def local2global(local_coord, start, end, strand): + """Return global coordinate within a region from a local coordinate""" + + # swap if strands disagree + if strand == 1: + return local_coord + start + else: + return end - local_coord + + +def global2align(global_coord, start, end, strand, alignLookup): + local_coord = global2local(global_coord, start, end, strand) + + # throw exception for out of bounds + if local_coord < 0 or \ + local_coord >= len(alignLookup): + raise Exception("coordinate outside [start, end]") + + return alignLookup[local_coord] + + +def align2global(align_coord, start, end, strand, localLookup): + local_coord = localLookup[align_coord] + return local2global(local_coord, start, end, strand) + + + + +#============================================================================= +# old code + +''' +def findAlignCodons(aln): + """find all columns of aligned codons""" + + codonAln = mapalign(aln, valfunc=mark_codon_pos) + cols = map(util.hist_dict, zip(* codonAln.values())) + + ind = [] + codon = [] + gaps = defaultdict(lambda: 0) + for i in range(len(cols)): + + if len(cols[i]) == 1: + codon.append(i) + elif len(cols[i]) == 2 and -1 in cols[i]: + for key, val in aln.iteritems(): + if val[i] == "-": + gaps[key] += 1 + codon.append(i) + else: + codon = [] + if len(codon) == 3: + if len(gaps) == 0 or \ + util.unique([x % 3 for x in gaps.values()]) == [0]: + ind.extend(codon) + codon = [] + for key in gaps: + gaps[key] = 0 + + return ind + + +def findFourFold(aln): + """Returns index of all columns in alignment that are completely + fourfold degenerate + """ + + aln = filterAlignCodons(aln) + pepAln = mapalign(aln, valfunc=translate) + pep = pepAln.values()[0] + + # pep conservation + pepcons = [] + for i in xrange(pepAln.alignlen()): + col = [seq[i] for seq in pepAln.itervalues()] + hist = util.hist_dict(col) + if "-" in hist: + del hist["-"] + if "X" in hist: + del hist["X"] + pepcons.append(len(hist) == 1) + + + ind = [] + + for i in range(0, len(aln.values()[0]), 3): + if pepcons[i//3]: + degen = AA_DEGEN[pep[i//3]] + + for j in range(3): + if degen[j] == 4: + ind.append(i+j) + return ind +''' diff --git a/arghmm/deps/compbio/alignlib.pyc b/arghmm/deps/compbio/alignlib.pyc new file mode 100644 index 0000000000000000000000000000000000000000..84e8ae641994284103b45391ebf30059db7bd749 GIT binary patch literal 18596 zcmcg!ZH!#kSw44WcXnq#{Iy>1j$=F5b{wyhwH>crr%j#2t~YU;CX2b7vc<`27|*@C zJKp))xwFZ-vGb$0N-CuiQiBkM7AOTu3rI!c7YOJNBqW3?Dj@{F_{TqlkPwv+g6DbO zduL|X4lOXw#`ny*_nv$1Iq%2wew=gjH~%_5^6{Vhr=KpnyD@lstMyt`Z~Kj#x1w^}zgdc-&~McJ_FCkxR^E#0$(<2D`AaKyQx{uNd$Uzf zZ=B|zHCFX>dV_>XI(eBSn8$f~uYvcq?53)^zA!@~VSdgnH0n_x3g&UvdAvP>vr1g4 zqq}-JY7gObG1_3ydaQlB85wy=&{DNNgCD0NzKq)0TD^VtW@Ytksa0OXY|l1Z4bmaT zh1IoME&U0jW_-5Xs5Ng^8fQf`@a;_VcAKLCA&a^;jgy!oa`|y@)Z63j%5)e?ri!7Q z4Y;(}9Z8q}TiuKO8Qw5ncBxvb-3&`FoI*Xu;mV{1DNAA0m{_zmEC|ottlN5qLxkAg zb8de5n0qth?hd%S5U-58Ge`-$1JOeX(evD;je=Xkg+)yy<{VxtHDig;dUPAIRuBER zO4Ut>U1J4jU9po=1XLbTveJ%PIBKyNACQz{DSBDitd~_sN2-&KVCWo_z&SL~FK#{x z64pvhF<2xNgR^7axEu2hdSwbF4-%6@i3=!5{{LyV73sOMbk0GmfB>E23e!QQfT7V}Ayf_*Ae54|l|hRG7e_w}#*u-YvZc|K7tZ zffR%*N*~4q(dYZ68n+sidZ6ptZVFZ9+ad4RmqYK-t~<+=iyk|IN_GS1oVdc;L)N8 z5O3hk0=^)sHQpjf*KhmAeSf9ZsBM>(laT(C5Rg(EZR+h3KoG!Pz3s0;`P7YyD?5O` z68nbNQKMk1ah>W9^~OJF^Q}R7Y0&uhoZGi?914Hjy_+RCBoZWdEz$TZc&%jMvr*JE z?M(zZF$L{v1XsyF2~DrVE3;YoP^ira{*I}+PKiESf)&d7A zt*BTh)VCtA>x=|nbgcLU!)-cw`4mqV$$QX z9X%J|-$T&Cta}!nz~l1db&+*^QT%BXlBBDRO}O**&Du>k)bJo+NLp1WL#4hd?>_c# zM(wvFIQH$g8xI#KI1wNifutL`1muCpUybT|Ws3CY(#o9tv*dlEw@DFu>ks93qbFFT zB8Xm0-cw}f^jX|sPH=zB+h;((jsDQ*1n5C?nD&Gh3sl+p<(EZ&Kt5=1q11L#gAL(; zyEULFi?;?lT;N$QJzK)r;>-|-bPYwv#q;BKi}VLO5U8|hft#%;hCSs~hFY-TanyRN z)UH6wcR0xbVNIL}I44d8fB*QREbJMTzIkkWalKrsmH~U+?!>IFFWrn<4i%vw~O`e=7RJFFJD%jHhVCsN$Yk>g-t7tn|3I3wC-}CJCE{(W2v({QxJumKqE! z7%QPSX1t*npbaz$Yy{kHeT~>f2a7CzpWzGSw*nkS*$HLjAm7bVvF_yDt(>8^&Y!Qt zXX86_>;f;OZ0v;^;(0Vs?Bi{G5^aXq#s-0wAgzrQb^M#Wi`#nS&JfS}h=og>TzmyZ zS5T@Ipgs6cE-@^xmD&hQAtaT$AC<~$sXMqsL`-8TCsNx)M8HdeYjBzcfmH6>mfSkC z(7e5dR~8wYOOMOYuiyyV61!@pElXKc8>RLP8DfG&y9pdvZLbM*1N!vAMHYlvJyKqS zpa;+5w9ir%TtJ`i;)n@JE_(oeUJep84nOZPl%tsmZ*S%pe7?#jaUcFVgpdPfm`S7% zARmQMKqw1MCRg`>s~s^KbYeVQJ~#mI(h=}VN}$|Rloa`eU}`7ba(CEm{eZ-U;PCoH|IjYb+yrv(#I^4W!e zE{k~<6x42nS})ZiOR%8wb|q}DNw9c%uFo#t)B<|=w4-n1vpim9n5V*f;} z!7i94@S=2vNsizW&qxR)3ryn_<$nTKqvm;cunn+UH~tl zU7)gQ9mFR`(*^DxWDCEy-}EV1bq4CtLFhHiu zhSNUcwl=8ee3h&K#xuXX?Cy^8{LUzRYWM|s;mha&d35>((BzTs*!XHX4oA^IPxyGE z(;a%4-cL}uzH!#`Hhu`>@I$2r;8Wa(!gQxQ*6s2R(({~d#x)PZ_~ahA1H||umI+{% zD#OhDL&-a|nkKA_xoM(`jM1@-w{>UR95IV&ED+$IVC%+?*#a)Hy0G!*9#Jx-LS8WX zRC?jzlLU>i(jfZc(d+19Nl1$kfi$=YssbW19@8j;?$Q!m+ySI`Z;iOdnDe~5dxw;+P28%I9Wryf^JZRSsEm3jlJ751dq0ALM&e&I^@(zYP{wM`h{ zc(d6=BBjx)tXAr!s=rxJ0xvvwM$OAGABw{%^D&PTD7KCe`Ie{XVN&klH7Mi)p%rc| zL5eN%lbKG|4hR|;l_kNb&T{EWeOB!4an|=aWWsNrsSdm!M~}X{k6gwK?){({^YjRJcc>kk20Gvl&UNJP)cLr+*cP;&4U)2(qtH5$Bg*OanVh z$TFx#Zcmy?@S}plsRPmN&;ns?I4gQE+dpIeElAWfX>+9iD|)S;o~qLZ|r7 z^|U>}30u2_77pRR5l<^kCO`W-W8wUQwR8*`OxgO70zj1u9p`JA{m zj5-gA$oEm9sZHHV=ql*#Y{b*L52!Gz5%KX{}Q}DSZ-f zSLuYQqi_l#5^z$OBA5nH;M`n4uE}F@bO1iwE5M9b6k|1y8I~Z=V0{cC)4iSHghArjN42-nr0V5~H*~-VzrmI-C%){;)9x*pi$q&|cV=oR`lG zc~5pxhvU79qh|rvgbCUT0V3)!yN5nqJd0wbQpIvL;J61>d|Ci4!ih^@hA}~U+-7>N zJ4_Sw*izYqJzE@kRO=!(+#M8}>nhinC*`8ILhnPA_$qFjazTke2EcAdxu{~;De(76 zk`0n|Pe3@o{1;RP+#cYD0RSb8DOGDP13kF))T;qk*thX%_bwDoc@*T+pim??;tnJS zhC7)!ohuVZ_1ktm>nua300+-f9}%3)nC2no$xG3ob_j#;Xk~;00irW;DE*_gN4wFU zc6dv2cZoAZ!sdMk7YE~$QUrNB_~1@)vj za(;PMVHJW!29bajjC>L!hk3zZ(c7q-(m<9F7cO~A8^`f`+*?9}Md_XP@}zJ9*^joA zH?CHb6k#lj*V*4Ate{`PT`bk89U;C+fnuex*;;8-LqCkT4ukYVl-^2z;p1-BfI-P} zvjG#kD^*)jDZK5QctPN)0o#RNeaM&yd4dIqm%$DyN%%o@nPGZg`vH{o={U^tv<;JJ zeYi=)7OBlxKF6ozmeb1#)>+K6xWz(vrrak@#+56@`IoM}w4kW5o61B$aHU$8*NrNZ z=jI|sO+i`kG<%{I4xT_EoMYD1P>(n%!Ub=l)^BJd_6jq|q$0WW3F_iP%9N-2zh#zHq(}`gWSHh z{cwE*aTuhmW;=}Vs%qnHJ=(@Dt>@B5(%*Auu-d{kdPpU8gDzsa#wuyXg_CW6t%MfO zAmsPY`}D!n$LG%Y>s*`tkcZJs3-n~d9o2*tlbfUHjs&KO}APD2xhS_{6AmWOI$O z$uQH6jv>aAv{CQ!I3#TiG$4w?Q~+H&j@lTvHFj2OAbj)7KT_zH?!;JyRGKHYA9ua2 zzmuHWmF{!ghw~@IVyGzJBKw2q3Xo{poK-pLlx?{6Nblv`))h;~e}aYxkFX)y7hsJF z$tj-GDbsvhgjJgVSvHPAcghHAi-ev}HwwaCdunJ*9Q(wfikIBpd);gXox7h0q6Ep z5DK-U9wZzEbRTU%!#rmUG~vG{49?6JPNQ6Q-;bXkfD$Xyg+<)$#O{yf5e-wc%BD~ zlI-sx?tbVZtZf#f>!5MLgNI7);E$kM&TsH(^tE90RNP5T2)@FCe5|Y@tzu{$mo7BcWdG$tb%^R#LAe{r`*#@PdSspa4A~yZC4JA3_7N;&h5(>7R97 z0K2N?cYtuZevgK50=DMZ4x-lSY<~Gj8m9u?bHGMeRt%;9zC4?~v{A+?{4uJ|{PIEJ zvXGVy3_%NQ^gbHRnq%>VB3!M+87R{}PlB&-sqMn0jqfSVTmc*>&H^`x&~j7w7s@E~ zM^#zGp==n`$Z`?Vr7mx}wZ5d~w9)gg$r=v9doA$2`|8w4!z zP3dwb`}&OYq;c10vox?MOVekn`vGdU{=@gi!J994YawB)2xe-9?9rqtwGhZszXNy z)BrBbpeo1$X+fMHu;EKKJUiloBoD%E7RkH2dmGCf5!PXAr7Szp4%RpL2?5mF4R>f3?wA-)yRt?$7C-TJ@Y; zD@zknl;WtmBIfp+o5Se52p6f+E*AfT?Bv^=x7#ac@yny*@y_D}Fw{7M&;`0L&`q2N zFcNgJAWNVNE>6Q)1(6DiNo$a>uqa}&*9GB}0K`5T_*Gog+ORE{QijX;Qpxttm<{?h zHaW`TH&FDM62Y&aPx9S%?!xt&^a`1enhIQ0>{3DV6rg?-1o%4MJ^TOx$cXfiMoi#> zX$1Wec#a4lxbXtQ<6>pTk}sJg*vM{lG|v`5s||c~gB;8%Qa5@?E8?rVY|3B)L2gS; zR`~!j7wAW*&n>fU*$=Ea$Zdz;g?0RX3&b}ufIXxK4oF5hr2>CN2`i>rmx7P9CHRbN@)PVBHRgRimr4NWaw%8?+Bc+YT z52a`DZ8pbYMPIzCUJZ~DI7kAUVV%s5V?P}nU=ny6p?zF1ASSee!1P-|<_ENL!Tbeb z3)$B*SvuW%<3~GJvvfw~6P&AAopDbaZ4|N$^X0;z_5<=#GQ zBV8lLcS5I;!S=ZUyHtihX1Q!Vb`twdu;_ag<<)B^{pX*5!GGhK*>kgVvlnNdo&EUi zIh>qZR#O$9>aW^G)1n%p3mR};MC1PUnD!`m(M7tDqu?Uz3_GT(M^I;z1xR2 zCVJwu_#V1^kT>xrHtlPjp-a^d=fWH~aenr~?8opQIPyPmLK@|R6ULG*CoIdtl&Cpd zluY9U2_R0GPxXg5g;WAuyZX`}RiR#flAKUp1f1x8-T_{47(CFR>vF@dpsj!&;OjIh zhUcc?NJ>ej7hxunFQ@;P{)R}9e9Wfl|0Z7ghz%wIX@Rq+ymQxdKJydLdFo+vzKm}E zzh1=yg!nc){T7Qy>N$LV2O%CXYOi4k^d7`=GiC5DAMQQS^Xz&L2qU+75C2EkLHI*sF}U|F0XB;Q1H9FM$+|LWoWx@l2$*?2{?>~pabY74*)0d0N@A63F!Fc=0pZv z|L_1cT!&PO_L~agK#Pzqr5}J51t+5bmS0oaROpdArPl z;c3uh(PHrqi+5SvXYpkg%8>p7Yrn+emr" % self.name + + def get_dist(self, parent_index=0): + """Get branch length distance from node to parent_index'th parent""" + if len(self.parents) == 0: + return 0.0 + return self.parents[parent_index].age - self.age + + def get_dists(self): + """Get all branch length distances from node to parents""" + return [p.age - self.age for p in self.parents] + + def copy(self): + """Returns a copy of this node""" + node = ArgNode(self.name, age=self.age, event=self.event, + pos=self.pos) + node.data = dict(self.data) + return node + + def is_leaf(self): + """Returns True if this node is a leaf""" + return len(self.children) == 0 + + + + +class ARG (object): + """ + A ancestral recombination graph (ARG) + """ + + def __init__(self, start=0.0, end=1.0): + self.root = None + self.nodes = {} + self.nextname = 1 + self.start = start + self.end = end + + + def __iter__(self): + """Iterates over the nodes in the ARG""" + return self.nodes.itervalues() + + + def __len__(self): + """Returns number of nodes in the ARG""" + return len(self.nodes) + + + def __getitem__(self, name): + """Returns node by name""" + return self.nodes[name] + + + def __setitem__(self, name, node): + """Adds a node to the ARG""" + node.name = name + self.add(node) + + + def __contains__(self, name): + """ + Returns True if node in ARG has name 'name' + """ + return name in self.nodes + + + #================================= + # node manipulation methods + + def new_name(self): + """ + Returns a new name for a node + """ + name = self.nextname + self.nextname += 1 + return name + + + def new_node(self, name=None, parents=[], children=[], + age=0, event="gene", pos=0): + """ + Returns a new node + """ + if name is None: + name = self.new_name() + node = self.add(ArgNode(name, age=age, event=event, pos=pos)) + node.parents = list(parents) + node.children = list(children) + return node + + + def new_root(self, age=0, event="gene", pos=0): + """ + Returns a new root + """ + self.root = self.new_node(age=age, event=event, pos=pos) + return self.root + + + def add(self, node): + """ + Adds a node to the ARG + """ + self.nodes[node.name] = node + return node + + def remove(self, node): + """ + Removes a node from the ARG + """ + for child in node.children: + child.parents.remove(node) + for parent in node.parents: + parent.children.remove(node) + del self.nodes[node.name] + + + def rename(self, oldname, newname): + """ + Renames a node in the ARG + """ + node = self.nodes[oldname] + node.name = newname + del self.nodes[oldname] + self.nodes[newname] = node + + + def leaves(self, node=None): + """ + Iterates over the leaves of the ARG + """ + if node is None: + for node in self: + if len(node.children) == 0: + yield node + else: + for node in self.preorder(node): + if len(node.children) == 0: + yield node + + + def leaf_names(self, node=None): + """ + Iterates over the leaf names of the ARG + """ + if node is None: + for node in self: + if len(node.children) == 0: + yield node.name + else: + for node in self.preorder(node): + if len(node.children) == 0: + yield node.name + + + def copy(self): + """ + Returns a copy of this ARG + """ + + arg = ARG(start=self.start, end=self.end) + arg.nextname = self.nextname + + # copy all nodes + for name, node in self.nodes.iteritems(): + arg.nodes[name] = node.copy() + + # connect nodes + for node in self.nodes.itervalues(): + node2 = arg[node.name] + for child in node.children: + node2.children.append(arg[child.name]) + for parent in node.parents: + node2.parents.append(arg[parent.name]) + + if self.root: + arg.root = arg[self.root.name] + + return arg + + + #================================ + # iterator methods + + def postorder(self, node=None): + """ + Iterates through nodes in postorder traversal + """ + + visit = defaultdict(lambda: 0) + queue = list(self.leaves(node)) + + for node in queue: + yield node + for parent in node.parents: + visit[parent] += 1 + + # if all children of parent has been visited then queue parent + if visit[parent] == len(parent.children): + queue.append(parent) + + + def preorder(self, node=None): + """ + Iterates through nodes in preorder traversal + """ + + visit = set() + if node is None: + node = self.root + queue = [node] + + for node in queue: + if node in visit: + continue + yield node + visit.add(node) + + for child in node.children: + queue.append(child) + + + def postorder_marginal_tree(self, pos, nodes=None): + """ + Iterate postorder over the nodes in the marginal tree at position 'pos' + + If nodes is given, postorder can be determined more quickly + + NOTE: nodes are iterated in order of age + """ + + # initialize heap + heap = [(node.age, node) for node in self.leaves()] + seen = set([None]) + visited = set([]) + visit_age = min(x[0] for x in heap) - 1 + + def get_local_children(node, pos): + return [child for child in self.get_local_children(node, pos) + if child in nodes] + + def reachable(node): + # returns True if node is unreachable from leaves + if node in visited or node.is_leaf(): + return True + if node.age < visit_age: + return False + for child in self.get_local_children(node, pos): + if reachable(child): + return True + return False + + def ready(node): + # returns True if node is ready to yield + # node is ready if all unvisited child are unreachable + if nodes is not None: + for child in get_local_children(node, pos): + if child not in visited: + return False + else: + for child in self.get_local_children(node, pos): + if child not in visited and reachable(child): + return False + return True + + + # add all ancestor of lineages + unready = [] + while len(heap) > 0: + # yield next ready node + del unready[:] + while True: + age, node = heapq.heappop(heap) + if ready(node): + break + unready.append((age, node)) + for x in unready: + heapq.heappush(heap, x) + yield node + visited.add(node) + visit_age = node.age + if len(heap) == 0: + # MRCA reached + return + + # find correct marginal parent + # add parent to lineages if it has not been seen before + parent = self.get_local_parent(node, pos) + if parent not in seen: + heapq.heappush(heap, (parent.age, parent)) + seen.add(parent) + + + def preorder_marginal_tree(self, pos, node=None): + """ + Iterate preorder over the nodes in the marginal tree at position 'pos' + + NOTE: this might also include unreachable nodes + """ + + if node is None: + node = arg.root + + # initialize heap + heap = [node] + seen = set([node]) + + # add all ancestor of lineages + while len(heap) > 0: + node = heap.pop() + yield node + + for child in node.children: + if self.get_local_parent(child, pos) == node: + if child not in seen: + heap.append(child) + seen.add(child) + # NOTE: this prevents error when + # children[0] == children[1] + + + def get_local_parent(self, node, pos): + """Returns the local parent of 'node' for position 'pos'""" + if node.event == "gene" or node.event == "coal": + if len(node.parents) > 0: + return node.parents[0] + else: + return None + elif node.event == "recomb": + if len(node.parents) == 0: + return None + elif len(node.parents) == 1: + if pos < node.pos: + return node.parents[0] + else: + return None + elif len(node.parents) > 1: + return node.parents[0 if pos < node.pos else 1] + + ''' + if len(node.parents) > 0: + return node.parents[0 if pos < node.pos else 1] + else: + return None + ''' + else: + raise Exception("unknown event '%s'" % node.event) + + + def get_local_parents(self, node, start, end): + """Returns the parents of 'node' with ancestral sequence within (start, end)""" + if node.event == "recomb": + parents = [] + if node.pos > start: + parents.append(node.parents[0]) + if node.pos < end: + parents.append(node.parents[1]) + else: + parents = node.parents + return parents + + + def get_local_children(self, node, pos): + """ + Returns the local children of 'node' for position 'pos' + + NOTE: the local children are not necessarily in the local tree + because the children may be unreachable from the leaves + """ + + return [child for child in node.children + if self.get_local_parent(child, pos) == node] + + + def get_local_dist(self, node, pos): + """Returns the local parent of 'node' for position 'pos'""" + parent = self.get_local_parent(node, pos) + if parent: + return parent.age - node.age + else: + return 0.0 + + + def set_root(self): + + for node in self: + if not node.parents: + self.root = node + break + + #=============================== + # ancestral sequence methods + + def set_recomb_pos(self, start=None, end=None, descrete=False): + """ + Randomly aample all recombination positions in the ARG + """ + + if start is not None: + self.start = start + if end is not None: + self.end = end + + length = self.end - self.start + + for node in self: + if node.event == "recomb": + if descrete: + node.pos = random.randint(self.start, self.end-1) + .5 + else: + node.pos = random.random() * length + self.start + + + def set_ancestral(self): + """ + Set all ancestral regions for the nodes of the ARG + + NOTE: recombination positions must be set first (set_recomb_pos) + """ + + def root_path(ptr, pos): + "walk up the root path from a node" + while ptr.parents: + ptr = self.get_local_parent(ptr, pos) + yield ptr + + for node in self: + node.data["ancestral"] = [] + + for block, tree in iter_tree_tracks(self): + pos = (block[0] + block[1]) / 2.0 + for node in chain(tree, root_path(self.nodes[tree.root.name], pos)): + if node.name in self.nodes: + ancestral = self[node.name].data["ancestral"] + if len(ancestral) > 0 and ancestral[-1][1] == block[0]: + # extend + ancestral[-1] = (ancestral[-1][0], block[1]) + else: + ancestral.append(block) + else: + # cap node? + pass + + + def get_ancestral(self, node, side=None, parent=None): + """ + Get the ancestral sequence from an edge above a node + + node -- node to get ancestral sequence from + side -- 0 for left parent edge, 1 for right parental edge + parent -- if given, determine side from parent node + """ + + # set side from parent + if parent: + side = node.parents.index(parent) + + if node.event == "recomb": + if (parent and len(node.parents) == 2 and + node.parents[0] == node.parents[1]): + # special case where both children of a coal node are the same + # recomb node. + return node.data["ancestral"] + + regions = [] + for reg in node.data["ancestral"]: + if side == 0: + if reg[1] <= node.pos: + # keep all regions fully left of recomb position + regions.append(reg) + elif reg[0] < node.pos: + # cut region + regions.append((reg[0], node.pos)) + elif side == 1: + if reg[0] >= node.pos: + # keep all regions fully right of recomb position + regions.append(reg) + elif reg[1] > node.pos: + # cut region + regions.append((node.pos, reg[1])) + else: + raise Exception("side not specified") + return regions + + elif node.event == "gene" or node.event == "coal": + return node.data["ancestral"] + + else: + raise Exception("unknown event '%s'" % node.event) + + + def prune(self, remove_single=True): + """ + Prune ARG to only those nodes with ancestral sequence + """ + + # NOTE: be careful when removing nodes that you call get_ancestral + # before changing parent/child orders + + # find pruned edges + prune_edges = [] + for node in list(self): + for parent in list(node.parents): + if len(self.get_ancestral(node, parent=parent)) == 0: + prune_edges.append((node, parent)) + + # remove pruneded edges + for node, parent in prune_edges: + parent.children.remove(node) + node.parents.remove(parent) + + # remove pruned nodes + for node in list(self): + if len(node.data["ancestral"]) == 0: + self.remove(node) + + + for node in self: + assert not node.is_leaf() or node.age == 0.0 + + # remove single children + if remove_single: + remove_single_lineages(self) + + # set root + # TODO: may need to actually use self.roots + for node in list(self): + if len(node.parents) == 0: + dellist = [] + while len(node.children) == 1: + delnode = node + node = node.children[0] + self.remove(delnode) + self.root = node + + + #=========================== + # marginal tree methods + + def get_marginal_tree(self, pos, nodes=None): + """ + Returns the marginal tree of the ARG containing position 'pos' + + if nodes is given, marginal tree can be determined quicker + """ + + # make new ARG to contain marginal tree + tree = ARG(self.start, self.end) + tree.nextname = self.nextname + + # populate tree with marginal nodes + for node in self.postorder_marginal_tree(pos, nodes=nodes): + tree.add(node.copy()) + + # set parent and children + roots = [] + for node2 in tree: + node = self[node2.name] + parent = self.get_local_parent(node, pos) + if parent is not None and parent.name in tree.nodes: + parent2 = tree[parent.name] + node2.parents = [parent2] + parent2.children.append(node2) + else: + roots.append(node2) + + # make root + if len(roots) == 1: + tree.root = roots[0] + elif len(roots) > 1: + # make cap node since marginal tree does not fully coallesce + tree.root = tree.new_node(event="coal", + name=self.new_name(), + age=max(x.age for x in roots)+1) + tree.nextname = self.nextname + for node in roots: + tree.root.children.append(node) + node.parents.append(tree.root) + + assert tree.root is not None, (tree.nodes, pos) + + return tree + + + def get_tree(self, pos=None): + """ + Returns a treelib.Tree() object representing the ARG if it is a tree + + if 'pos' is given, return a treelib.Tree() for the marginal tree at + position 'pos'. + """ + + # TODO: make more efficient + + # get marginal tree first + if pos is not None: + return self.get_marginal_tree(pos).get_tree() + + tree = treelib.Tree() + + # add all nodes + for node in self: + node2 = treelib.TreeNode(node.name) + tree.add(node2) + + # set parent, children, dist + for node in tree: + node2 = self[node.name] + node.parent = (tree[node2.parents[0].name] + if len(node2.parents) > 0 else None) + node.children = [tree[c.name] for c in node2.children] + + if node.parent: + node.dist = self[node.parent.name].age - node2.age + + tree.root = tree[self.root.name] + return tree + + + #======================= + # input/output + + def read(self, filename=sys.stdin): + read_arg(filename, arg=self) + + + def write(self, filename=sys.stdout): + write_arg(filename, self) + + + +#============================================================================= + +def assert_arg(arg): + """Asserts that the arg data structure is consistent""" + + for node in arg: + # check parent, child links + for parent in node.parents: + assert node in parent.children + for child in node.children: + assert node in child.parents + + # check ages + for parent in node.parents: + assert node.age <= parent.age, ((node.name, node.age), + (parent.name, parent.age)) + + leaves = set(arg.leaf_names()) + for tree in iter_marginal_trees(arg): + assert set(tree.leaf_names()) == leaves + + +#============================================================================= +# coalescence with recombination + +def sample_coal_recomb(k, n, r): + """ + Returns a sample time for either coal or recombination + + k -- chromosomes + n -- effective population size (haploid) + r -- recombination rate (recombinations / chromosome / generation) + + Returns (event, time) where + event -- 0 for coalesce event, 1 for recombination event + time -- time (in generations) of event + """ + + # coal rate = (k choose 2) / 2 + # recomb rate = k * r + coal_rate = (k * (k-1) / 2) / n + recomb_rate = k * r + rate = coal_rate + recomb_rate + + event = ("coal", "recomb")[int(random.random() < (recomb_rate / rate))] + + return event, random.expovariate(rate) + + +def sample_coal_recomb_times(k, n, r, t=0): + """ + Returns a sample time for either coal or recombination + + k -- chromosomes + n -- effective population size (haploid) + r -- recombination rate (recombinations / chromosome / generation) + t -- initial time (default: 0) + + Returns (event, time) where + event -- 0 for coalesce event, 1 for recombination event + time -- time (in generations) of event + """ + + times = [] + events = [] + + while k > 1: + event, t2 = sample_coal_recomb(k, n, r) + t += t2 + times.append(t) + events.append(event) + if event == "coal": + k -= 1 + elif event == "recomb": + k += 1 + else: + raise Exception("unknown event '%s'" % event) + + return times, events + + +def sample_arg(k, n, rho, start=0.0, end=1.0, t=0, names=None, + make_names=True): + """ + Returns an ARG sampled from the coalescent with recombination (pruned) + + k -- chromosomes + n -- effective population size (haploid) + rho -- recombination rate (recombinations / site / generation) + start -- staring chromosome coordinate + end -- ending chromsome coordinate + t -- initial time (default: 0) + names -- names to use for leaves (default: None) + make_names -- make names using strings (default: True) + + Returns (event, time) where + event -- 0 for coalesce event, 1 for recombination event + time -- time (in generations) of event + """ + + arg = ARG(start, end) + + class Lineage (object): + def __init__(self, node, regions, seqlen): + self.node = node + self.regions = regions + self.seqlen = seqlen + + # init ancestral lineages + # (node, region, seqlen) + total_seqlen = k * (end - start) + if make_names: + names = ["n%d" % i for i in range(k)] + if names is None: + lineages = set(Lineage(arg.new_node(), [(start, end)], end-start) + for i in xrange(k)) + else: + lineages = set(Lineage(arg.new_node(name=names[i]), + [(start, end)], end-start) + for i in xrange(k)) + for lineage in lineages: + lineage.node.data["ancestral"] = [(start, end)] + recomb_parent_lineages = {} + lineage_parents = {} + + # block start -> lineage count + block_starts = [start] + block_counts = {start: k} + + # perform coal, recomb + while len(lineages) > 1: + # sample time and event + k = len(lineages) + coal_rate = (k * (k-1) / 2) / n # (k choose 2) / n + recomb_rate = rho * total_seqlen + rate = coal_rate + recomb_rate + t2 = random.expovariate(rate) + event = ("coal", "recomb")[int(random.random() < (recomb_rate / rate))] + t += t2 + + + # process event + if event == "coal": + node = arg.new_node(age=t, event=event) + + # choose lineages to coal + a, b = random.sample(lineages, 2) + lineages.remove(a) + lineages.remove(b) + lineage_parents[a] = node + lineage_parents[b] = node + total_seqlen -= a.seqlen + b.seqlen + + # set parent, child links + node.children = [a.node, b.node] + a.node.parents.append(node) + b.node.parents.append(node) + + # coal each non-overlapping region + regions = [] + lineage_regions = [] + nblocks = len(block_starts) + i = 0 + + for start, end, count in count_region_overlaps( + a.regions, b.regions): + assert start != end, count in (0, 1, 2) + #assert end == arg.end or end in block_starts + i = block_starts.index(start, i) + start2 = start + while start2 < end: + end2 = block_starts[i+1] if i+1 < nblocks else arg.end + + # region coalesces + if count == 2: + block_counts[start2] -= 1 + if count >= 1: + regions.append((start2, end2)) # ancestral seq + if block_counts[start2] > 1: + # regions moves on, since not MRCA + lineage_regions.append((start2, end2)) + + # move to next region + i += 1 + start2 = end2 + node.data["ancestral"] = regions + + # create 1 new lineage if any regions remain + if len(lineage_regions) > 0: + for reg in lineage_regions: + assert block_counts[reg[0]] > 1, (reg, block_counts) + seqlen = lineage_regions[-1][1] - lineage_regions[0][0] + lineages.add(Lineage(node, lineage_regions, seqlen)) + total_seqlen += seqlen + + + elif event == "recomb": + node = arg.new_node(age=t, event=event) + + # choose lineage and pos to recombine (weighted by seqlen) + pick = random.random() * total_seqlen + i = 0 + for lineage in lineages: + i += lineage.seqlen + if i >= pick and lineage.seqlen > 0: + break + + # set parent, child links + lineage_parents[lineage] = node + lineages.remove(lineage) + node.children = [lineage.node] + lineage.node.parents.append(node) + node.data["ancestral"] = lineage.regions + + # choose recomb pos + node.pos = random.uniform(lineage.regions[0][0], + lineage.regions[-1][1]) + + # does recomb pos break an existing block? + for reg in lineage.regions: + if reg[0] < node.pos < reg[1]: + # split block + block_starts.append(node.pos) + block_starts.sort() + prev_pos = block_starts[block_starts.index(node.pos)-1] + block_counts[node.pos] = block_counts[prev_pos] + + # create 2 new lineages + regions1 = list(split_regions(node.pos, 0, lineage.regions)) + regions2 = list(split_regions(node.pos, 1, lineage.regions)) + + regions1_len = regions1[-1][1] - regions1[0][0] + regions2_len = regions2[-1][1] - regions2[0][0] + total_seqlen += regions1_len + regions2_len - lineage.seqlen + a = Lineage(node, regions1, regions1_len) + b = Lineage(node, regions2, regions2_len) + lineages.add(a) + lineages.add(b) + recomb_parent_lineages[node] = (a, b) + else: + raise Exception("unknown event '%s'" % event) + + assert len(lineages) == 0, (lineages, block_counts.values()) + + # fix recomb parent order, so that left is before pos and right after + for node, (a, b) in recomb_parent_lineages.iteritems(): + an = lineage_parents[a] + bn = lineage_parents[b] + for reg in a.regions: assert reg[1] <= node.pos + for reg in b.regions: assert reg[0] >= node.pos + node.parents = [an, bn] + + # set root + arg.root = max(arg, key=lambda x: x.age) + + return arg + + + +def sample_smc_sprs(k, n, rho, start=0.0, end=0.0, init_tree=None, + names=None, make_names=True): + """ + Sample ARG using Sequentially Markovian Coalescent (SMC) + + k -- chromosomes + n -- effective population size (haploid) + rho -- recombination rate (recombinations / site / generation) + start -- staring chromosome coordinate + end -- ending chromsome coordinate + t -- initial time (default: 0) + names -- names to use for leaves (default: None) + make_names -- make names using strings (default: True) + """ + + # yield initial tree first + if init_tree is None: + init_tree = sample_arg(k, n, rho=0.0, start=start, end=end, + names=names, make_names=make_names) + tree = init_tree.copy() + else: + init_tree.end = end + tree = init_tree.get_marginal_tree(start) + remove_single_lineages(tree) + yield init_tree + + # sample SPRs + pos = start + while True: + # sample next recomb point + treelen = sum(x.get_dist() for x in tree) + pos += random.expovariate(treelen * rho) + if pos > end: + break + + # choose branch for recombination + p = random.uniform(0.0, treelen) + total = 0.0 + nodes = (x for x in tree if x.parents) # root can't have a recomb + for node in nodes: + total += node.get_dist() + if total > p: + break + else: + raise Exception("could not find recomb node") + recomb_node = node + + # choose age for recombination + recomb_time = random.uniform( + recomb_node.age, recomb_node.parents[0].age) + + # choose coal node and time + all_nodes = [x for x in tree if not x.is_leaf()] + all_nodes.sort(key=lambda x: x.age) + lineages = set(x for x in tree.leaves() if x != recomb_node) + + coal_time = 0.0 + i = 0 + #print + while i < len(all_nodes): + #print coal_time, recomb_node, lineages + #treelib.draw_tree_names(tree.get_tree(), scale=1e-3, minlen=5) + next_node = all_nodes[i] + + if next_node.age > recomb_time: + if coal_time < recomb_time: + coal_time = recomb_time + next_time = coal_time + random.expovariate( + len(lineages) / float(n)) + + if next_time < next_node.age: + coal_time = next_time + + # choose coal branch + coal_node = random.sample(lineages, 1)[0] + assert coal_node.age < coal_time < coal_node.parents[0].age + break + + # coal is older than next node + coal_time = next_node.age + i += 1 + + # adjust current lineages + for child in next_node.children: + if child in lineages: + lineages.remove(child) + else: + assert child == recomb_node, (next_node, child, recomb_node) + if next_node != recomb_node: + lineages.add(next_node) + else: + # coal above tree + coal_node = all_nodes[-1] + coal_time = coal_node.age + random.expovariate(1.0 / float(n)) + + # yield SPR + rleaves = list(tree.leaf_names(recomb_node)) + cleaves = list(tree.leaf_names(coal_node)) + yield pos, (rleaves, recomb_time), (cleaves, coal_time) + + + # apply SPR to local tree + broken = recomb_node.parents[0] + recoal = tree.new_node(age=coal_time, + children=[recomb_node, coal_node]) + + # add recoal node to tree + recomb_node.parents[0] = recoal + broken.children.remove(recomb_node) + if coal_node.parents: + recoal.parents.append(coal_node.parents[0]) + util.replace(coal_node.parents[0].children, coal_node, recoal) + coal_node.parents[0] = recoal + else: + coal_node.parents.append(recoal) + + + # remove broken node + broken_child = broken.children[0] + if broken.parents: + broken_child.parents[0] = broken.parents[0] + util.replace(broken.parents[0].children, broken, broken_child) + else: + broken_child.parents.remove(broken) + + del tree.nodes[broken.name] + tree.set_root() + + + +def sample_arg_smc(k, n, rho, start=0.0, end=0.0, init_tree=None, + names=None, make_names=True): + """ + Returns an ARG sampled from the Sequentially Markovian Coalescent (SMC) + + k -- chromosomes + n -- effective population size (haploid) + rho -- recombination rate (recombinations / site / generation) + start -- staring chromosome coordinate + end -- ending chromsome coordinate + + names -- names to use for leaves (default: None) + make_names -- make names using strings (default: True) + """ + + it = sample_smc_sprs(k, n, rho, start=start, end=end, init_tree=init_tree, + names=names, make_names=make_names) + tree = it.next() + arg = make_arg_from_sprs(tree, it) + + return arg + + +#============================================================================= +# arg functions + + +def lineages_over_time(k, events): + """ + Computes number of lineage though time using coal/recomb events + """ + + for event in events: + if event == "coal": + k -= 1 + elif event == "recomb": + k += 1 + else: + raise Exception("unknown event '%s'" % event) + yield k + + +def make_arg_from_times(k, times, events, start=0, end=1, + names=None, make_names=True): + """ + Returns an ARG given 'k' samples and a list of 'times' and 'events' + + times -- ordered times of coalescence or recombination + events -- list of event types (either 'coal' or 'recomb') + """ + + arg = ARG(start, end) + + # make leaves + if make_names: + names = ["n%d" % i for i in range(k)] + if names is None: + lineages = set((arg.new_node(), 1) for i in xrange(k)) + else: + lineages = set((arg.new_node(name=names[i]), 1) for i in xrange(k)) + + # process events + for t, event in izip(times, events): + if event == "coal": + node = arg.add(ArgNode(arg.new_name(), age=t, event=event)) + a, b = random.sample(lineages, 2) + lineages.remove(a) + lineages.remove(b) + node.children = [a[0], b[0]] + a[0].parents.append(node) + b[0].parents.append(node) + lineages.add((node, 1)) + + elif event == "recomb": + node = arg.add(ArgNode(arg.new_name(), age=t, event=event)) + a = random.sample(lineages, 1)[0] + lineages.remove(a) + node.children = [a[0]] + a[0].parents.append(node) + lineages.add((node, 1)) + lineages.add((node, 2)) + + else: + raise Exception("unknown event '%s'" % event) + + + if len(lineages) == 1: + arg.root = lineages.pop()[0] + + return arg + + +def make_arg_from_tree(tree, times=None): + """ + Creates an ARG from a treelib.Tree 'tree' + """ + arg = ARG() + if times is None: + times = treelib.get_tree_timestamps(tree) + + # add nodes to ARG + for node in tree: + event = "gene" if node.is_leaf() else "coal" + anode = arg.new_node(node.name, event=event, age=times[node]) + + # connect up nodes + for node in tree: + anode = arg[node.name] + anode.children = [arg[child.name] for child in node.children] + anode.parents = ([arg[node.parent.name]] if node.parent else []) + + # set arg info + arg.root = arg[tree.root.name] + + arg.nextname = max(node.name for node in arg + if isinstance(node.name, int)) + 1 + + return arg + + +def get_recombs(arg, start=None, end=None, visible=False): + """ + Returns a sorted list of an ARG's recombination positions + + visible -- if True only iterate recombination break points that are + visible to extant sequences + """ + + if visible: + return list(iter_visible_recombs(arg, start, end)) + else: + rpos = [node.pos for node in + arg if node.event == "recomb"] + rpos.sort() + return rpos +get_recomb_pos = get_recombs + +def iter_recombs(arg, start=None, end=None, visible=False): + """ + Iterates through an ARG's recombination positions + + visible -- if True only iterate recombination break points that are + visible to extant sequences + """ + + if visible: + return iter_visible_recombs(arg, start, end) + else: + rpos = [node.pos for node in arg + if node.event == "recomb" and start <= node.pos <= end] + rpos.sort() + return iter(rpos) + + +def iter_visible_recombs(arg, start=None, end=None): + """Iterates through visible recombinations in an ARG""" + + pos = start if start is not None else 0 + while True: + recomb = find_next_recomb(arg, pos) + if recomb: + yield recomb + pos = recomb.pos + else: + break + + +def find_next_recomb(arg, pos, tree=False): + """Returns the next recombination node in a local tree""" + + recomb = None + nextpos = util.INF + + if tree: + nodes = iter(arg) + else: + nodes = arg.postorder_marginal_tree(pos) + + for node in nodes: + if node.event == "recomb" and node.pos > pos and node.pos < nextpos: + recomb = node + nextpos = node.pos + + return recomb + + +def iter_recomb_blocks(arg, start=None, end=None, visible=False): + """ + Iterates over the recombination blocks of an ARG + + arg -- ARG to iterate over + start -- starting position in chromosome to iterate over + end -- ending position in chromosome to iterate over + visible -- if True only iterate recombination break points that are + visible to extant sequences + """ + + # determine region to iterate over + if start is None: + start = arg.start + if end is None: + end = arg.end + + a = start + b = start + for pos in iter_recombs(arg, start, end, visible=visible): + if pos < start: + continue + if pos > end: + pos = end + break + b = pos + yield (a, b) + a = pos + + yield (a, end) + + +def iter_marginal_trees(arg, start=None, end=None): + """ + Iterate over the marginal trees of an ARG + """ + for block, tree in iter_local_trees(arg, start, end): + yield tree + + +def iter_local_trees(arg, start=None, end=None, convert=False): + """ + Iterate over the local trees of an ARG + + Yeilds ((start, end), tree) for each marginal tree where (start, end) + defines the block of the marginal tree + """ + # determine region to iterate over + if start is None: + start = arg.start + if end is None: + end = arg.end + + i = 0 + rpos = get_recombs(arg, start, end) + rpos.append(end) + + while start < end: + # find next rpos + while i < len(rpos)-1 and rpos[i] <= start: + i += 1 + + tree = arg.get_marginal_tree((start+rpos[i]) / 2.0) + + # find block end + end2 = arg.end + for node in tree: + if node.event == "recomb" and start < node.pos < end2: + end2 = node.pos + + if convert: + tree = tree.get_tree() + yield (start, min(end2, end)), tree + start = end2 + +iter_tree_tracks = iter_local_trees + + +def descendants(node, nodes=None): + """ + Return all descendants of a node in an ARG + """ + if nodes is None: + nodes = set() + nodes.add(node) + for child in node.children: + if child not in nodes: + descendants(child, nodes) + return nodes + + +def remove_single_lineages(arg): + """ + Remove unnecessary nodes with single parent and single child + """ + queue = list(arg) + + for node in queue: + if node.name not in arg: + continue + + if len(node.children) == 1: + if len(node.parents) == 1: + child = node.children[0] + parent = node.parents[0] + + del arg.nodes[node.name] + child.parents[child.parents.index(node)] = parent + parent.children[parent.children.index(node)] = child + + elif len(node.parents) == 0: + child = node.children[0] + + del arg.nodes[node.name] + child.parents.remove(node) + arg.root = node + + queue.append(child) + + # relabel events for leaves that were recombinations + for node in arg: + if node.is_leaf() and len(node.parents) == 1: + node.event = "gene" + + return arg + + + +def postorder_subarg(arg, start, end): + """Iterates postorder over the nodes of the 'arg' that are ancestral to (start,end)""" + + # initialize heap + heap = [(node.age, node) for node in arg.leaves()] + seen = set([None]) + + # add all ancestor of lineages + while len(heap) > 0: + age, node = heapq.heappop(heap) + yield node + if len(heap) == 0: + # MRCA reached + return + + # find parents within (start, end) + # add parent to lineages if it has not been seen before + for parent in arg.get_local_parents(node, start, end): + if parent not in seen: + heapq.heappush(heap, (parent.age, parent)) + seen.add(parent) + + +def subarg(arg, start, end): + """Returns a new ARG that only contains recombination within (start, end)""" + + arg2 = ARG(start, end) + + # add nodes + for node in postorder_subarg(arg, start, end): + arg2.root = arg2.new_node(node.name, event=node.event, age=node.age, + pos=node.pos) + + # add edges + for node2 in arg2: + node = arg[node2.name] + for parent in arg.get_local_parents(node, start, end): + pname = parent.name + if pname in arg2: + parent2 = arg2[pname] + node2.parents.append(parent2) + parent2.children.append(node2) + + return arg2 + + +def subarg_by_leaves(arg, leaves, keep_single=False): + """ + Removes any leaf from the arg that is not in leaves set + """ + + stay = set(leaves) + remove = [] + + # find nodes to remove + for node in arg.postorder(): + nchildren = sum(1 for child in node.children if child in stay) + if nchildren == 0 and node not in stay: + remove.append(node) + else: + stay.add(node) + + # remove nodes + for node in remove: + arg.remove(node) + + if not keep_single: + remove_single_lineages(arg) + + return arg + + +def apply_spr(tree, rnode, rtime, cnode, ctime, rpos): + """ + Apply an Subtree Pruning Regrafting (SPR) operation on a tree + """ + if rnode == cnode: + return None, None + + def add_node(arg, node, time, pos, event): + node2 = arg.new_node(event=event, age=time, children=[node], pos=pos) + if event == "coal": + node2.pos = 0 + parent = arg.get_local_parent(node, pos) + if parent: + node.parents[node.parents.index(parent)] = node2 + parent.children[parent.children.index(node)] = node2 + node2.parents.append(parent) + else: + node.parents.append(node2) + arg.root = node2 + return node2 + + def remove_node(arg, node): + if node.parents: + parent = node.parents[0] + child = node.children[0] + child.parents[0] = parent + util.replace(parent.children, node, child) + else: + child = node.children[0] + child.parents.remove(node) + arg.root = child + + del arg.nodes[node.name] + + coal = add_node(tree, cnode, ctime, rpos, "coal") + + broken_node = rnode.parents[0] + broken_node.children.remove(rnode) + remove_node(tree, broken_node) + + rnode.parents[0] = coal + coal.children.append(rnode) + + return coal, broken_node + + + + +def iter_arg_sprs(arg, start=None, end=None, use_leaves=False, use_local=False): + """ + Iterate through the SPR moves of an ARG + + Yields (recomb_pos, (rnode, rtime), (cnode, ctime)) + + if use_leaves is True, yields + (recomb_pos, (recomb_leaves, rtime), (coal_leaves, ctime)) + + if use_local is True, yields + (recomb_pos, (rnode, rtime), (cnode, ctime), local_nodes) + (recomb_pos, (recomb_leaves, rtime), (coal_leaves, ctime), local_nodes) + """ + + def get_local_children(node, local, pos): + children = [child for child in node.children + if child in local and + arg.get_local_parent(child, pos) == node] + if len(children) == 2 and children[0] == children[1]: + children = children[:1] + return children + + def get_local_parent(node, pos): + if node.event == "recomb": + if pos < node.pos: + return node.parents[0] + else: + return node.parents[1] + elif node.parents: + return node.parents[0] + else: + return None + + def walk_down(node, local, pos): + while True: + children = get_local_children(node, local, pos) + if len(children) == 1: + node = children[0] + else: + break + return node + + # get coordinate range to iterate over + if start is None: + start = arg.start + if end is None: + end = arg.end + + # init local nodes + nodes = list(arg.postorder_marginal_tree(start)) + local = set(nodes) + local_root = nodes[-1] + pos = start + + while pos < end: + # find next recombination node after 'pos' + recomb_pos = end + recomb = None + for node in local: + if pos < node.pos < recomb_pos: + recomb_pos = node.pos + recomb = node + if recomb is None: + # no more recombinations before position 'end' + break + rtime = recomb.age + + # find recomb baring branch in local tree + # walk down until next coalescent node in local tree + mid = (recomb_pos + pos) / 2.0 + ptr = recomb + rnode = walk_down(recomb, local, mid).name + + + # find recoal node + ptr = recomb + local_root_path = [] + local_root2 = local_root + coal_path = [] + while True: + ptr = get_local_parent(ptr, recomb_pos) + coal_path.append(ptr) + if ptr in local: + # coal within local tree again + break + + # check for coal above local root + while (local_root2 and ptr != local_root2 and + local_root2.age <= ptr.age): + local_root2 = get_local_parent(local_root2, recomb_pos) + if not local_root2: + break + local_root_path.append(local_root2) + # NOTE: searching whole local_root_path is necessary for + # discretized node ages + if ptr in local_root_path: + # coal above root + local_root2 = ptr + # truncate local_root_path + i = local_root_path.index(ptr) + local_root_path = local_root_path[:i+1] + break + ctime = ptr.age + recoal = ptr + + # find recoal baring branch in local tree + # walk down until next coalescent node in local tree + if ptr in local: + cnode = walk_down(ptr, local, mid).name + else: + cnode = local_root.name + + + # find broken nodes + # walk up left parent of recomb until coalescent node or coal path + ptr = recomb + broken_path = [] + while True: + assert ptr in local, ptr + ptr = get_local_parent(ptr, mid) + if ptr is None: + break + children = get_local_children(ptr, local, mid) + if len(children) != 1 or ptr in coal_path: + break + broken_path.append(ptr) + + # find broken root path + # if broken path reaches local_root and we don't re-coal above + # local root, then broken path continues down the other side + # until coalescent node or coal path + if ptr == local_root and cnode != local_root.name: + broken_path.append(ptr) + children = get_local_children(ptr, local, mid) + ptr = (children[0] + if children[1] in broken_path[-2:] + [recomb] + else children[1]) + while True: + children = get_local_children(ptr, local, mid) + if len(children) != 1 or ptr in coal_path: + break + broken_path.append(ptr) + ptr = children[0] + local_root = ptr + + # yield SPR + if use_leaves: + rleaves = list(x.name for x in + arg.preorder_marginal_tree(mid, arg[rnode]) + if x.is_leaf()) + cleaves = list(x.name for x in + arg.preorder_marginal_tree(mid, arg[cnode]) + if x.is_leaf()) + recomb_point = (rleaves, rtime) + coal_point = (cleaves, ctime) + else: + recomb_point = (rnode, rtime) + coal_point = (cnode, ctime) + + if use_local: + yield (recomb_pos, recomb_point, coal_point, local) + else: + yield (recomb_pos, recomb_point, coal_point) + + + # update local nodes + if cnode == local_root.name: + # add root path + local.update(local_root_path) + local_root = local_root2 + + # remove broken nodes and nodes in coal path + for node in broken_path: + local.remove(node) + for node in coal_path: + local.add(node) + + # advance the current position + pos = recomb_pos + + + +def iter_arg_sprs_simple(arg, start=None, end=None, use_leaves=False): + """ + Iterate through the SPR moves of an ARG + + Yields (recomb_pos, (rnode, rtime), (cnode, ctime)) + """ + + trees = iter_tree_tracks(arg, start, end) + block, last_tree = trees.next() + + for block, tree in trees: + + # find recombination node + recomb_pos = block[0] + node = (x for x in tree if x.pos == recomb_pos).next() + rtime = node.age + ptr = last_tree[node.name] + while len(ptr.children) == 1: + ptr = ptr.children[0] + rnode = ptr.name + + # find recoal node + ptr = node + ptr = ptr.parents[0] + + # BUG: only works for non-bubbles + #while len(ptr.children) != 2: + # ptr = ptr.parents[0] + while len(ptr.children) != 2 and ptr.name not in last_tree: + ptr = ptr.parents[0] + ctime = ptr.age + if ptr.name in last_tree: + ptr = last_tree[ptr.name] + while len(ptr.children) == 1: + ptr = ptr.children[0] + cnode = ptr.name + else: + cnode = last_tree.root.name + + # yield SPR + if use_leaves: + rleaves = list(last_tree.leaf_names(last_tree[rnode])) + cleaves = list(tree.leaf_names(last_tree[cnode])) + yield (recomb_pos, (rleaves, rtime), (cleaves, ctime)) + else: + yield (recomb_pos, (rnode, rtime), (cnode, ctime)) + last_tree = tree + + +# TODO: more testing of ignore_self=False is needed +def make_arg_from_sprs(init_tree, sprs, ignore_self=False, + modify_self=False): + """ + Make an ARG from an initial tree 'init_tree' and a list of SPRs 'sprs' + + NOTE: sprs should indicate branches by their leaf set (use_leaves=True) + """ + + def add_node(arg, node, time, pos, event): + node2 = arg.new_node(event=event, age=time, children=[node], pos=pos) + if event == "coal": + node2.pos = 0 + parent = arg.get_local_parent(node, pos) + #if parent is None and node.event == "recomb": + # parent = node.parents[0] + + if parent: + node.parents[node.parents.index(parent)] = node2 + parent.children[parent.children.index(node)] = node2 + node2.parents.append(parent) + else: + node.parents.append(node2) + arg.root = node2 + + return node2 + + + def walk_up(arg, node, time, pos, local): + parent = arg.get_local_parent(node, pos) + + while parent and parent.age <= time: + if parent in local: + break + node = parent + parent = arg.get_local_parent(node, pos) + + # safety check + if parent and parent.age < time: + print (pos, node, parent.age, time) + tree = arg.get_marginal_tree(pos).get_tree() + tree.write() + treelib.draw_tree_names(tree, maxlen=8, minlen=8) + assert False + + return node + + + arg = init_tree + tree = None + mapping = {} + local = set() + + for rpos, (rleaves, rtime), (cleaves, ctime) in sprs: + if tree is None: + # create first tree + tree = arg.get_marginal_tree(rpos) + remove_single_lineages(tree) + mapping = dict((node.name, arg[node.name]) for node in tree) + local = set(arg[node.name] for node in tree) + + # check whether self cycles are wanted + if ignore_self and rleaves == cleaves: + if modify_self: + rnode_tree = arg_lca(tree, rleaves, rpos, time=rtime) + cnode_tree = arg_lca(tree, cleaves, rpos, time=ctime) + cnode_tree = cnode_tree.parents[0] + ctime = cnode_tree.age + else: + continue + else: + # do lca on local tree + rnode_tree = arg_lca(tree, rleaves, rpos, time=rtime) + cnode_tree = arg_lca(tree, cleaves, rpos, time=ctime) + + # do rest of lca on arg + rnode = walk_up(arg, mapping[rnode_tree.name], rtime, rpos, local) + cnode = walk_up(arg, mapping[cnode_tree.name], ctime, rpos, local) + + # DEBUG + #rnode2 = arg_lca(arg, rleaves, rpos, time=rtime) + #cnode2 = arg_lca(arg, cleaves, rpos, time=ctime) + #assert (rnode == rnode2) and (cnode == cnode2) + + # add edge to ARG + recomb = add_node(arg, rnode, rtime, rpos, "recomb") + if rnode == cnode: + cnode = recomb + coal = add_node(arg, cnode, ctime, rpos, "coal") + recomb.parents.append(coal) + coal.children.append(recomb) + + # apply SPR to local tree + if rnode_tree != cnode_tree: + coal2, broken_node = apply_spr(tree, rnode_tree, rtime, + cnode_tree, ctime, rpos) + #assert_arg(tree) + + # update local node set and tree2arg mapping + local.remove(mapping[broken_node.name]) + del mapping[broken_node.name] + mapping[coal2.name] = coal + local.add(coal) + + + return arg + + +def make_arg_from_sprs_simple(init_tree, sprs, ignore_self=False): + """ + Make an ARG from an initial tree 'init_tree' and a list of SPRs 'sprs' + + NOTE: sprs should indicate branches by their leaf set (use_leaves=True) + """ + + def add_node(arg, node, time, pos, event): + node2 = arg.new_node(event=event, age=time, children=[node], pos=pos) + if event == "coal": + node2.pos = 0 + parent = arg.get_local_parent(node, pos) + if parent: + node.parents[node.parents.index(parent)] = node2 + parent.children[parent.children.index(node)] = node2 + node2.parents.append(parent) + else: + node.parents.append(node2) + return node2 + + arg = init_tree + + for rpos, (rleaves, rtime), (cleaves, ctime) in sprs: + node1 = arg_lca(arg, rleaves, rpos, time=rtime) + node2 = arg_lca(arg, cleaves, rpos, time=ctime) + + # check whether self cycles are wanted + if ignore_self and node1 == node2: + continue + + recomb = add_node(arg, node1, rtime, rpos, "recomb") + if node1 == node2: + node2 = recomb + coal = add_node(arg, node2, ctime, rpos, "coal") + + recomb.parents.append(coal) + coal.children.append(recomb) + + return arg + + +def smcify_arg(arg, start=None, end=None, ignore_self=True): + """ + Rebuild an ARG so that is follows the SMC assumptions + """ + + if start is None: + start = arg.start + + arg2 = arg.get_marginal_tree(start-.5) + remove_single_lineages(arg2) + sprs = iter_arg_sprs(arg, start, end, use_leaves=True) + make_arg_from_sprs(arg2, sprs, ignore_self=True, + modify_self=not ignore_self) + + if start is not None: + arg2.start = start + if end is not None: + arg2.end = end + + return arg2 + + + +def subarg_by_leaf_names(arg, leaf_names, keep_single=False): + """ + Removes any leaf from the arg that is not in leaf name set + """ + + return subarg_by_leaves(arg, [arg[x] for x in leaf_names], + keep_single=keep_single) + + +def arg_lca(arg, leaves, pos, time=None, local=None): + """ + Find the Least Common Ancestor (LCA) of a set of leaves in the ARG + + arg -- an ARG + leaves -- a list of nodes in arg + pos -- position along sequence to perform LCA + time -- the time ascend to (optional) + local -- the set of nodes considered local (optional) + """ + + def is_local_coal(arg, node, pos, local): + return (len(node.children) == 2 and + node.children[0] in local and + arg.get_local_parent(node.children[0], pos) == node and + node.children[1] in local and + arg.get_local_parent(node.children[1], pos) == node and + node.children[0] != node.children[1]) + + + order = dict((node, i) for i, node in enumerate( + arg.postorder_marginal_tree(pos))) + if local is None: + local = order + + queue = [(order[arg[x]], arg[x]) for x in leaves] + seen = set(x[1] for x in queue) + heapq.heapify(queue) + + while len(queue) > 1: + i, node = heapq.heappop(queue) + parent = arg.get_local_parent(node, pos) + if parent and parent not in seen: + seen.add(parent) + heapq.heappush(queue, (order[parent], parent)) + node = queue[0][1] + parent = arg.get_local_parent(node, pos) + + + if time is not None: + while parent and parent.age <= time: + if is_local_coal(arg, parent, pos, local): + break + node = parent + parent = arg.get_local_parent(node, pos) + + # safety check + if parent and parent.age < time: + print (pos, leaves, parent.age, time) + tree = arg.get_marginal_tree(pos).get_tree() + tree.write() + treelib.draw_tree_names(tree, maxlen=8, minlen=8) + assert False + + return node + + + +def arglen(arg, start=None, end=None): + """Calculate the total branch length of an ARG""" + + treelen = 0.0 + for (start, end), tree in iter_tree_tracks(arg, start=start, end=end): + treelen += sum(x.get_dist() for x in tree) * (end - start) + + return treelen + + +#============================================================================= +# region functions + + +def split_regions(pos, side, regions): + """ + Iterates through the regions on the left (side=0) or right (side=1) of 'pos' + """ + + for reg in regions: + if side == 0: + if reg[1] <= pos: + # keep all regions fully left of recomb position + yield reg + elif reg[0] < pos: + # cut region + yield (reg[0], pos) + elif side == 1: + if reg[0] >= pos: + # keep all regions fully right of recomb position + yield reg + elif reg[1] > pos: + # cut region + yield (pos, reg[1]) + else: + raise Exception("side not specified") + + +def count_region_overlaps(*region_sets): + """ + Count how many regions overlap each interval (start, end) + + Iterates through (start, end, count) sorted + """ + + # build endpoints list + end_points = [] + for regions in region_sets: + for reg in regions: + end_points.append((reg[0], 0)) + end_points.append((reg[1], 1)) + end_points.sort() + + count = 0 + start = None + end = None + last = None + for pos, kind in end_points: + if last is not None and pos != last: + yield last, pos, count + if kind == 0: + count += 1 + elif kind == 1: + count -= 1 + last = pos + + if last is not None and pos != last: + yield last, pos, count + + + +def groupby_overlaps(regions, bygroup=True): + """ + Group ranges into overlapping groups + Ranges must be sorted by start positions + """ + + start = -util.INF + end = -util.INF + group = None + groupnum = -1 + for reg in regions: + if reg[0] > end: + # start new group + start, end = reg + groupnum += 1 + + if bygroup: + if group is not None: + yield group + group = [reg] + else: + yield (groupnum, reg) + + else: + # append to current group + if reg[1] > end: + end = reg[1] + + if bygroup: + group.append(reg) + else: + yield (groupnum, reg) + + if bygroup and group is not None and len(group) > 0: + yield group + + +#============================================================================= +# mutations and splits + +def sample_arg_mutations(arg, mu, minlen=0): + """ + mu -- mutation rate (mutations/site/gen) + """ + + mutations = [] + + for (start, end), tree in iter_tree_tracks(arg): + remove_single_lineages(tree) + for node in tree: + if not node.parents: + continue + blen = max(node.get_dist(), minlen) + rate = blen * mu + i = start + while i < end: + i += random.expovariate(rate) + if i < end: + t = random.uniform(node.age, node.age + blen) + mutations.append((node, node.parents[0], i, t)) + return mutations + + +def get_marginal_leaves(arg, node, pos): + return (x for x in arg.preorder_marginal_tree(pos, node) if x.is_leaf()) + + +def get_mutation_split(arg, mutation): + """Get the leaves of an ARG that inherit a mutation""" + node, parent, pos, t = mutation + return tuple(sorted(x.name for x in get_marginal_leaves(arg, node, pos))) + + +def split_to_tree_branch(tree, split): + """Place a split on a tree branch""" + + node = treelib.lca([tree[name] for name in split]) + + if sorted(split) != sorted(node.leaf_names()): + inv = [x for x in tree.leaf_names() if x not in split] + node = treelib.lca([tree[name] for name in inv]) + if sorted(inv) != sorted(node.leaf_names()): + # split does not conform to tree + return None + + return node + + +def split_to_arg_branch(arg, pos, split): + + # TODO: make more efficient + tree = arg.get_tree(pos) + node = split_to_tree_branch(tree, split) + if node is not None: + return arg[node.name] + else: + None + + +def iter_tree_splits(tree): + for node in tree: + if len(node.children) != 2 or node.children[0] == node.children[1]: + continue + split = tuple(sorted(tree.leaf_names(node))) + if len(split) > 1: + yield split + + +def is_split_compatible(split1, split2): + """Returns True if two splits are compatible""" + i = j = 0 + intersect = 0 + while i < len(split1) and j < len(split2): + if split1[i] == split2[j]: + intersect += 1 + i += 1 + j += 1 + elif split1[i] < split2[j]: + i += 1 + else: + j += 1 + #intersect = len(split1 & split2) + return intersect == 0 or intersect == min(len(split1), len(split2)) + + +def is_split_compatible_unpolar2(split1, split2, leaves): + + a = set(split1) + b = set(split2) + x00 = False + x01 = False + x10 = False + x11 = False + for l in leaves: + if l in a: + if l in b: + x11 = True + else: + x10 = True + else: + if l in b: + x01 = True + else: + x00 = True + + return not (x00 and x01 and x10 and x11) + +def is_split_compatible_unpolar(split1, split2, leaves): + if is_split_compatible(split1, split2): + return True + else: + split1rev = tuple(x for x in leaves if x not in split1) + return is_split_compatible(split1rev, split2) + + +def split_relation(split1, split2): + + i = j = 0 + intersect = 0 + while i < len(split1) and j < len(split2): + if split1[i] == split2[j]: + intersect += 1 + i += 1 + j += 1 + elif split1[i] < split2[j]: + i += 1 + else: + j += 1 + + if intersect == 0: + return "disjoint" + + elif intersect == len(split1): + if intersect == len(split2): + assert split1 == split2 + return "equal" + else: + return "child" + + elif intersect == len(split2): + return "parent" + + else: + return "conflict" + + return intersect == 0 or intersect == min(len(split1), len(split2)) + + +def iter_mutation_splits(arg, mutations): + + nleaves = sum(1 for x in arg.leaves()) + + for node, parent, pos, t in mutations: + split = tuple(sorted(x.name for x in get_marginal_leaves( + arg, node, pos))) + if len(split) != 1 and len(split) != nleaves: + yield pos, split + + + + +#============================================================================= +# alignments + +def make_alignment(arg, mutations, infinite_sites=True, + ancestral="A", derived="C"): + aln = fasta.FastaDict() + alnlen = int(arg.end - arg.start) + leaves = list(arg.leaf_names()) + nleaves = len(leaves) + + # sort mutations by position + mutations.sort(key=lambda x: x[2]) + + # make align matrix + mat = [] + + pos = arg.start + muti = 0 + for i in xrange(alnlen): + if muti >= len(mutations) or i < int(mutations[muti][2]): + # no mut + mat.append(ancestral * nleaves) + else: + # mut + #mut_group = [] + #while muti < len(mutations) and i == int(mutations[muti][2]): + # mut_group.append(mutations[muti]) + # muti += 1 + + node, parent, mpos, t = mutations[muti] + row = [] + split = set(x.name for x in get_marginal_leaves(arg, node, mpos)) + mat.append("".join((derived if leaf in split else ancestral) + for leaf in leaves)) + muti += 1 + + # make fasta + for i, leaf in enumerate(leaves): + aln[leaf] = "".join(x[i] for x in mat) + + return aln + + +def iter_align_splits(aln, warn=False): + """Iterates through the splits in an alignment""" + names = aln.keys() + + for j in xrange(aln.alignlen()): + col = [x[j] for x in aln.itervalues()] + chars = util.unique(col) + if len(chars) > 1: + # column has mutations + # check bi-allelic + if warn and len(chars) != 2: + print >>sys.stderr, "warning: not bi-allelic (site=%d)" % j + + part1 = tuple(sorted(names[i] for i, c in enumerate(col) + if c == chars[0])) + part2 = tuple(sorted(names[i] for i, c in enumerate(col) + if c != chars[0])) + if len(part1) > len(part2): + part1, part2 = part2, part1 + split = (part1, part2) + + yield j, split + + + +#============================================================================= +# input/output + + +def write_arg(filename, arg): + """ + Write ARG to file + """ + + out = util.open_stream(filename, "w") + + # write ARG key values + out.write("start=%s\tend=%s\n" % (arg.start, arg.end)) + + # write nodes header + out.write("\t".join(("name", "event", "age", "pos", "parents", "children")) + + "\n") + + # write nodes + for node in arg: + util.print_row( + node.name, node.event, node.age, node.pos, + ",".join(str(x.name) for x in node.parents), + ",".join(str(x.name) for x in node.children), + out=out) + + if isinstance(filename, basestring): + out.close() + + +def parse_number(text): + if text.isdigit(): + return int(text) + else: + return float(text) + + +def parse_node_name(text): + if text.isdigit(): + return int(text) + else: + return text + +def parse_key_value(field): + try: + i = field.index("=") + return field[:i], field[i+1:] + except: + raise Exception("improper key-value field '%s'" % text) + + + +def read_arg(filename, arg=None): + """ + Read ARG from file + """ + infile = util.DelimReader(filename) + + if arg is None: + arg = ARG() + + # read ARG key values + row = infile.next() + for field in row: + key, val = parse_key_value(field) + if key == "start": + arg.start = int(val) + elif key == "end": + arg.end = int(val) + + # read header + row = infile.next() + assert row == ["name", "event", "age", "pos", "parents", "children"] + + # read nodes + clinks = {} + plinks = {} + for row in infile: + node = arg.new_node(name=parse_node_name(row[0]), event=row[1], + age=float(row[2]), + pos=parse_number(row[3])) + if len(row) > 4 and len(row[4]) > 0: + plinks[node.name] = map(parse_node_name, row[4].split(",")) + if len(row) > 5 and len(row[5]) > 0: + clinks[node.name] = map(parse_node_name, row[5].split(",")) + + # setup parents + for node in arg: + for parent_name in plinks.get(node.name, ()): + parent = arg.nodes.get(parent_name) + if parent: + node.parents.append(parent) + else: + raise Exception("node '%s' has unknown parent '%s'" % + (node.name, parent_name)) + + # detect root + if parent_name not in plinks: + arg.root = node + + # setup children + for node in arg: + for child_name in clinks.get(node.name, ()): + child = arg.nodes.get(child_name) + if child: + node.children.append(child) + assert node in child.parents, \ + "node '%s' doesn't have parent '%s' (%s)" % ( + child.name, node.name, str(child.parents)) + else: + raise Exception("node '%s' has unknown child '%s'" % + (node.name, child_name)) + + + # set nextname + for name in arg.nodes: + if isinstance(name, int): + arg.nextname = max(arg.nextname, name+1) + + return arg + + +def write_tree_tracks(filename, arg, start=None, end=None, verbose=False): + out = util.open_stream(filename, "w") + for block, tree in iter_tree_tracks(arg, start, end): + if verbose: + print >>sys.stderr, "writing block", block + remove_single_lineages(tree) + tree = tree.get_tree() + out.write(str(int(block[0]))+"\t"+str(int(block[1]))+"\t") + tree.write(out, oneline=True) + out.write("\n") + if isinstance(filename, basestring): + out.close() + + +def read_tree_tracks(filename): + for row in util.DelimReader(filename): + yield (int(row[0]), int(row[1])), treelib.parse_newick(row[2]) + + + +def write_mutations(filename, arg, mutations): + out = util.open_stream(filename, "w") + + for mut in mutations: + l = get_marginal_leaves(arg, mut[0], mut[2]) + util.print_row(mut[2], mut[3], ",".join(x.name for x in l), out=out) + + if isinstance(filename, basestring): + out.close() + + +def read_mutations(filename): + for row in util.DelimReader(filename): + chroms = row[2].split(",") if row[2] else [] + yield int(row[0]), float(row[1]), chroms + + +def write_ancestral(filename, arg): + out = util.open_stream(filename, "w") + + for node in arg: + regions = util.flatten(node.data.get("ancestral", ())) + util.print_row(node.name, *regions, out=out) + + if isinstance(filename, basestring): + out.close() + + +def read_ancestral(filename, arg): + for row in util.DelimReader(filename): + node = arg[parse_node_name(row[0])] + node.data["ancestral"] = [(int(row[i]), int(row[i+1])) + for i in xrange(1, len(row), 2)] + + +#============================================================================= +# OLD CODE + + + +def sample_mutations(arg, u): + """ + u -- mutation rate (mutations/locus/gen) + + DEPRECATED: use sample_arg_mutations() instead + """ + + mutations = [] + + locsize = arg.end - arg.start + + for node in arg: + for parent in node.parents: + for region in arg.get_ancestral(node, parent=parent): + # ensure node is not MRCA + for pregion in parent.data["ancestral"]: + if pregion[0] <= region[0] < pregion[1]: + break + else: + continue + + frac = (region[1] - region[0]) / locsize + dist = parent.age - node.age + t = parent.age + while True: + t -= random.expovariate(u * frac) + if t < node.age: + break + pos = random.uniform(region[0], region[1]) + mutations.append((node, parent, pos, t)) + + return mutations + + +''' + +def has_self_cycles(arg): + """ + Return True if there are lineages that coalesce with themselves + + Requires ancestral sequences set. + """ + + # Such a cycle does not contain 'local coalescent nodes' on the sides + # but it might have non-local coalescent and recombination nodes. + # The relative order of theses nodes from the left and right side + # does not matter + # + # | + # coal + # / \ + # | | + # \ / + # recomb + # | + + # get overall postorder + # assumes stable sort + nodes = list(arg.postorder()) + nodes.sort(key=lambda x: x.age) + order = dict((x, i) for i, x in enumerate(nodes)) + + # find cycles by their recombination nodes + recombs = [x.name for x in arg if x.event == "recomb"] + + # find smallest separation + recomb_pos = [arg[x].pos for x in recombs] + recomb_pos.sort() + eps = .5 + for i in xrange(1, len(recomb_pos)): + sep = recomb_pos[i] - recomb_pos[i-1] + if sep > 0 and sep/2.0 < eps: + eps = sep / 2.0 + + for recomb_name in recombs: + if recomb_name not in arg: + continue + if is_self_cycle(arg, arg[recomb_name], order=order, eps=eps): + print recomb_name, arg[recomb_name].pos + return True + + return False + + +def iter_self_cycles(arg): + """ + Return True if there are lineages that coalesce with themselves + + Requires ancestral sequences set. + """ + + # Such a cycle does not contain 'local coalescent nodes' on the sides + # but it might have non-local coalescent and recombination nodes. + # The relative order of theses nodes from the left and right side + # does not matter + # + # | + # coal + # / \ + # | | + # \ / + # recomb + # | + + # get overall postorder + # assumes stable sort + nodes = list(arg.postorder()) + nodes.sort(key=lambda x: x.age) + order = dict((x, i) for i, x in enumerate(nodes)) + + # find cycles by their recombination nodes + recombs = [x.name for x in arg if x.event == "recomb"] + + # find smallest separation + recomb_pos = [arg[x].pos for x in recombs] + recomb_pos.sort() + eps = .5 + for i in xrange(1, len(recomb_pos)): + sep = recomb_pos[i] - recomb_pos[i-1] + if sep > 0 and sep/2.0 < eps: + eps = sep / 2.0 + + for recomb_name in recombs: + if recomb_name not in arg: + continue + if is_self_cycle(arg, arg[recomb_name], order=order, eps=eps): + yield arg[recomb_name] + + + + +def is_self_cycle(arg, recomb, order=None, eps=1e-4): + + def is_local_coal(node, child, pos): + if node.event != "coal": + return False + + i = node.children.index(child) + other_child = node.children[1 - i] + + for start, end in other_child.data["ancestral"]: + if start < pos < end: + return True + + return False + + if order is None: + # get overall postorder + # assumes stable sort + nodes = list(arg.postorder()) + nodes.sort(key=lambda x: x.age) + order = dict((x, i) for i, x in enumerate(nodes)) + + # find cycle + # also check for local coal nodes along the way + rpos = recomb.pos + path1 = [] + path2 = [] + ptr1 = arg.get_local_parent(recomb, rpos-eps) + ptr2 = arg.get_local_parent(recomb, rpos+eps) + while ptr1 and ptr2: + order1 = order[ptr1] + order2 = order[ptr2] + + if order1 < order2: + if is_local_coal(ptr1,path1[-1] if path1 else recomb,rpos-eps): + break + path1.append(ptr1) + ptr1 = arg.get_local_parent(ptr1, rpos-eps) + + elif order1 > order2: + if is_local_coal(ptr2,path2[-1] if path2 else recomb,rpos+eps): + break + path2.append(ptr2) + ptr2 = arg.get_local_parent(ptr2, rpos+eps) + + else: + # we have reached coal node + assert ptr1 == ptr2 + coal = ptr1 + return True + + return False + + +def remove_self_cycles(arg): + """ + Removes cycles that represent a lineage coalescing with itself + + Requires ancestral sequences set. + """ + + # Such a cycle does not contain 'local coalescent nodes' on the sides + # but it might have non-local coalescent and recombination nodes. + # The relative order of theses nodes from the left and right side + # does not matter + # + # | + # coal + # / \ + # | | + # \ / + # recomb + # | + + def is_local_coal(node, child, pos): + if node.event != "coal": + return False + + i = node.children.index(child) + other_child = node.children[1 - i] + + for start, end in other_child.data["ancestral"]: + if start < pos < end: + return True + + return False + + # get overall postorder + # assumes stable sort + nodes = list(arg.postorder()) + nodes.sort(key=lambda x: x.age) + order = dict((x, i) for i, x in enumerate(nodes)) + + # find cycles by their recombination nodes + recombs = [x.name for x in arg if x.event == "recomb"] + + # find smallest separation + recomb_pos = [arg[x].pos for x in recombs] + recomb_pos.sort() + eps = .5 + for i in xrange(1, len(recomb_pos)): + sep = recomb_pos[i] - recomb_pos[i-1] + if sep > 0 and sep/2.0 < eps: + eps = sep / 2.0 + + + for recomb_name in recombs: + if recomb_name not in arg: + continue + recomb = arg[recomb_name] + rpos = recomb.pos + + # find cycle + # also check for local coal nodes along the way + is_cycle = False + path1 = [] + path2 = [] + ptr1 = arg.get_local_parent(recomb, rpos-eps) + ptr2 = arg.get_local_parent(recomb, rpos+eps) + while ptr1 and ptr2: + order1 = order[ptr1] + order2 = order[ptr2] + + if order1 < order2: + if is_local_coal(ptr1,path1[-1] if path1 else recomb,rpos-eps): + break + path1.append(ptr1) + ptr1 = arg.get_local_parent(ptr1, rpos-eps) + + elif order1 > order2: + if is_local_coal(ptr2,path2[-1] if path2 else recomb,rpos+eps): + break + path2.append(ptr2) + ptr2 = arg.get_local_parent(ptr2, rpos+eps) + + else: + # we have reached coal node + assert ptr1 == ptr2 + coal = ptr1 + is_cycle = True + break + + if not is_cycle: + # this recombination node is not a cycle + # either because it contains a local coal node or never recoals + # which can happen in SMC ARGs + continue + + if path1: + assert coal in path1[-1].parents + else: + assert coal in recomb.parents + if path2: + assert coal in path2[-1].parents + else: + assert coal in recomb.parents + + if len(set(path1) & set(path2)) != 0: + print [(order[x], x) for x in path1] + print [(order[x], x) for x in path2] + assert False + + # remove coal node + top = coal.parents[0] if coal.parents else None + if top: + util.replace(top.children, coal, None) + + # remove recomb node + bottom = recomb.children[0] + util.replace(bottom.parents, recomb, None) + + # unlink nodes in left path + last = recomb + for node in path1: + util.replace(last.parents, node, None) + util.replace(node.children, last, None) + last = node + util.replace(last.parents, coal, None) + + # unlink nodes in right path + last = recomb + #print "--" + for node in path2: + #print last.parents, node + util.replace(last.parents, node, None) + util.replace(node.children, last, None) + last = node + util.replace(last.parents, coal, None) + + + # merge paths + combine = path1 + path2 + combine.sort(key=lambda x: (x.age, order[x])) + last = bottom + + for n in combine: + util.replace(last.parents, None, n) + util.replace(n.children, None, last) + last = n + + if top: + util.replace(last.parents, None, top) + util.replace(top.children, None, last) + else: + # no top node + if last.event == "coal" or last.event == "gene": + last.parents = [] + elif last.event == "recomb": + # remove last recomb node since it is a single lineage + c = last.children[0] + p = last.parents[1 - last.parents.index(None)] + util.replace(c.parents, last, p) + util.replace(p.children, last, c) + del arg.nodes[last.name] + else: + raise Exception("unknown event '%s'" % last.event) + + del arg.nodes[recomb.name] + del arg.nodes[coal.name] + + + + + +def remove_self_cycles2(arg): + """ + Removes cycles that represent a lineage coalescing with itself + + Requires ancestral sequences set. + """ + + # Such a cycle does not contain 'local coalescent nodes' on the sides + # but it might have non-local coalescent and recombination nodes. + # The relative order of theses nodes from the left and right side + # does not matter + # + # | + # coal + # / \ + # | | + # \ / + # recomb + # | + + # get overall postorder + # assumes stable sort + nodes = list(arg.postorder()) + nodes.sort(key=lambda x: x.age) + order = dict((x, i) for i, x in enumerate(nodes)) + + # find cycles by their recombination nodes + recombs = [x.name for x in arg if x.event == "recomb"] + + # find smallest separation + recomb_pos = [arg[x].pos for x in recombs] + recomb_pos.sort() + eps = .5 + for i in xrange(1, len(recomb_pos)): + sep = recomb_pos[i] - recomb_pos[i-1] + if sep > 0 and sep/2.0 < eps: + eps = sep / 2.0 + + + for recomb_name in recombs: + if recomb_name not in arg: + continue + recomb = arg[recomb_name] + rpos = recomb.pos + + # find cycle + # also check for local coal nodes along the way + is_cycle = False + path1 = [] + path2 = [] + ptr1 = arg.get_local_parent(recomb, rpos-eps) + ptr2 = arg.get_local_parent(recomb, rpos+eps) + while ptr1 and ptr2: + order1 = order[ptr1] + order2 = order[ptr2] + + if order1 < order2: + if ptr1.event == "coal": + break + path1.append(ptr1) + ptr1 = arg.get_local_parent(ptr1, rpos-eps) + + elif order1 > order2: + if ptr2.event == "coal": + break + path2.append(ptr2) + ptr2 = arg.get_local_parent(ptr2, rpos+eps) + + else: + # we have reached coal node + assert ptr1 == ptr2 + coal = ptr1 + is_cycle = True + break + + if not is_cycle: + # this recombination node is not a cycle + # either because it contains a local coal node or never recoals + # which can happen in SMC ARGs + continue + + # remove coal node + top = coal.parents[0] if coal.parents else None + if top: + util.replace(top.children, coal, None) + + # remove recomb node + bottom = recomb.children[0] + util.replace(bottom.parents, recomb, None) + + # unlink nodes in left path + last = recomb + for node in path1: + util.replace(last.parents, node, None) + util.replace(node.children, last, None) + last = node + util.replace(last.parents, coal, None) + + # unlink nodes in right path + last = recomb + for node in path2: + util.replace(last.parents, node, None) + util.replace(node.children, last, None) + last = node + util.replace(last.parents, coal, None) + + + # merge paths + combine = path1 + path2 + combine.sort(key=lambda x: (x.age, order[x])) + last = bottom + + for n in combine: + util.replace(last.parents, None, n) + util.replace(n.children, None, last) + last = n + + if top: + util.replace(last.parents, None, top) + util.replace(top.children, None, last) + else: + # no top node + if last.event == "coal": + last.parents = [] + elif last.event == "recomb": + # remove last recomb node since it is a single lineage + c = last.children[0] + p = last.parents[1 - last.parents.index(None)] + util.replace(c.parents, last, p) + util.replace(p.children, last, c) + del arg.nodes[last.name] + else: + raise Exception("unknown event '%s'" % node.event) + + del arg.nodes[recomb.name] + del arg.nodes[coal.name] + + +''' + + +''' +SLOW remove cycles + +def remove_self_cycles(arg, eps=.5): + """ + Removes cycles that represent a lineage coalescing with itself + + Requires ancestral sequences set. + """ + + # Such a cycle does not contain 'local coalescent nodes' on the sides + # but it might have non-local coalescent and recombination nodes. + # The relative order of theses nodes from the left and right side + # does not matter + # + # | + # coal + # / \ + # | | + # \ / + # recomb + # | + + def is_local_coal(node, child, pos): + if node.event != "coal": + return False + return True + + i = node.children.index(child) + other_child = node.children[1 - i] + + for start, end in other_child.data["ancestral"]: + #print node, other_child, (start, end), pos + if start < pos < end: + return True + + return False + + # get overall postorder + order = dict((x, i) for i, x in enumerate(arg.postorder())) + + + # find cycles by their recombination nodes + recombs = [x.name for x in arg if x.event == "recomb"] + for recomb_name in recombs: + if recomb_name not in arg: + continue + recomb = arg[recomb_name] + rpos = recomb.pos + + # get left path + path1 = [] + ptr = arg.get_local_parent(recomb, rpos-eps) + while ptr: + path1.append(ptr) + ptr = arg.get_local_parent(ptr, rpos-eps) + + # get right path + path2 = [] + ptr = arg.get_local_parent(recomb, rpos+eps) + while ptr: + path2.append(ptr) + ptr = arg.get_local_parent(ptr, rpos+eps) + + + # find recoal node + i = -1 + length = min(len(path1), len(path2)) + while -i <= length and path1[i] == path2[i]: + i -= 1 + if i == -1: + # this happens with SMC ARGs + continue + a = len(path1) + (i + 1) + b = len(path2) + (i + 1) + coal = path1[a] + + + # are there any coal nodes in left and right paths? + is_cycle = True + for i in range(a): + if is_local_coal( + path1[i], path1[i-1] if i > 0 else recomb, rpos-eps): + is_cycle = False + break + + for i in range(b): + if is_local_coal( + path2[i], path2[i-1] if i > 0 else recomb, rpos+eps): + is_cycle = False + break + + if not is_cycle: + # this recombination node is not a cycle + print "recomb", recomb, "pos=", rpos, "is not a cycle" + continue + + + print path1, path2, recomb, coal, i, a, b + print path1[:a], path2[:b] + + + # remove coal node + top = coal.parents[0] if coal.parents else None + if top: + util.replace(top.children, coal, None) + + # remove recomb node + bottom = recomb.children[0] + util.replace(bottom.parents, recomb, None) + + # unlink nodes in left path + last = recomb + for node in path1[:a]: + util.replace(last.parents, node, None) + util.replace(node.children, last, None) + last = node + util.replace(last.parents, coal, None) + + # unlink nodes in right path + last = recomb + for node in path2[:b]: + util.replace(last.parents, node, None) + util.replace(node.children, last, None) + last = node + util.replace(last.parents, coal, None) + + + # merge paths + combine = path1[:a] + path2[:b] + #print "path1", [order[x] for x in path1] + #print "path2", [order[x] for x in path2] + combine.sort(key=lambda x: (x.age, order[x])) + #print [(x.name, x.age, order[x]) for x in combine] + + + last = bottom + for n in combine: + util.replace(last.parents, None, n) + util.replace(n.children, None, last) + last = n + if top: + util.replace(last.parents, None, top) + util.replace(top.children, None, last) + else: + print "no top for", recomb, rpos + + # no top node + if last.event == "coal": + last.parents = [] + elif last.event == "recomb": + # remove last recomb node since it is a single lineage + c = last.children[0] + p = last.parents[1 - last.parents.index(None)] + util.replace(c.parents, last, p) + util.replace(p.children, last, c) + del arg.nodes[last.name] + print "remove last", last.name + else: + raise Exception("unknown event '%s'" % node.event) + + del arg.nodes[recomb.name] + del arg.nodes[coal.name] + print "remove", recomb.name, coal.name + print " ", order[bottom], order[recomb], order[coal], order[top] + + assert_arg(arg) + #print "good" + + + +''' + diff --git a/arghmm/deps/compbio/arglib.pyc b/arghmm/deps/compbio/arglib.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b4400d7def33c8e625b29d17e27536159e2ffd9 GIT binary patch literal 71323 zcmeFa3!Ge6Uf+4Es^98X>(P3&9(KvH)Gfbc*`Be-$hKs)Eqlh+cuJlT<8e>Z>MBWP zb$4}lb=$3pW%!KD*iK+3yc3uq8CX~XA3os?VHdIi64=cyginCalKliGELpP4@>n*H z&4w)d{r&#;-l}Rzj)C$p;n8ZR5%7z^Sa;>K=dbKvw5%Np)}XIuJdW)D9*u+}@VI&-sVa zoa+AMbw=TEQsb3dT**XR;agpJGAmqm;UjGY4!FP_N$rm0g*%mBz0H-}l~fP9&37l& z+nu;4sUC9TsK0gC#qLe26V5!=s#7hQbl2`{Wmb>)L-*@l6}ZDKdBEvAUDbQC!oc}1 z7k;mw-tDIkrcbN)_{E2^+oQgHUsAo-&3HJe9&_T$JYM(t-Qx!P{Z2oUR3C7GN0aJ% zoOmp$zSoHpN%cV|PA1idoOnE`zR!tMN%dhTrjqKHIq^hNJ?_MlN%avYPWy{gONA%g z{-@mjlm710N%e6Tdw)_r<-`Y)>XZ}DB-JOJcvhXNKI!sj(hdq%r(N3zlj>70_FPhZ z+KCS()%QD5Nva=k;%riV#);>X>a$LKSfqJ&@(fhme1ywTng3>2F3#7_-?MbZcfOol zoU1igS7ztSE48`C!uk5*>}tKSSiZP2yL72Md9rf)$k0&zPxx=LKsBoaT&ne#>rD!s znJg-+hfDnv^`+Hb5_6Yk>x)rzlZw_Z%&yI^R_k-Ck*P}VsOuW{dgsDyb9HtSHj}LO z>#|a-(M;F7wpyQey_8sO#w*(>Q(vvE%+wdTHfwWgaMQ1zT)B9rQLW`nOtn8=1_WnG z)S!2G1J`28$^cQ7bdQmibgw}N10%D~!_k|-ki;nZ#FmtRKMguDkW|i24pDD&50?OD zd6BP|>x<>t#WJ9C=BhfoSmzR;x>#GR&8cDf34o`{)NzW$tA*s%B$-K)i=;^uvP3a? zwd5SGqpV^*DoHO zUEwn?*N!f&G+ttenq*wOw6KuYAHXePOWe z|2cJ0zZ`Z(G{X)k?>uX)JkuPc$Z?Oyq2{B8i<=**F&@4fEnlfEt<20ErS_(pmlWpo zC;Oc$lYcK6s~$7R;2H>7NM0%?jb7weF*#dcP{{`s#_JwxuDj2gI1Jy@wbkFcV$o1LG(3D}x9f?G3S&;p5?5+D&(&3x?Gw8?s@y8sc&W&mkE z#7!Vk(a;bQ4e&EGPLRlv2n-U|z_bEm;ACbO^&ar61i&D473dSX0k4@pUC&OcE6u%J zDz(+MmBnUxwmjEZx>9ajD6d|sH;qdhsa>3ud63v_Ep8$(*PAo?>?x8d9!v^BQXVPJJ~Okbp_rLjXjIqcb*uMgW~z<3nVHHC(v_XM zjO(&bms@oyb7_r)Q(fTQ#)<<}ZJym%>MQOT94rm@4fYK7Tdp7w=XmV@)XvAtl>KUk zsdgj5{5sk#h>UFJDIz&!Gx8Y09P&5>v+V`t3}k|NPzvisbBijb#im48ECP&?NXK== z+SPDUwON3zk(vaXGm~Ic^H{n>c9q|%~%~603ceZJ?6yiNu9S2`VnRWw-Z+JDLE-s#^P3zD^+DAQ~pz$ zp_ED>9si|?KYVPiM23E1S!P$qNK`YW=29eJSLRzY@Ty3l(S&x>8@h~MKNGs5B!juj znZ{zxiIqlU)e4M6eCRccwe?jiJQ5lR#g))&YKv7%I`i+!5uR1<;L@UiZXmyi?SuNP z2(qQHm~)F0>1R+#g%UI7AL2h#*_yYJ{WR+A>}su9ZoFJufd$vhft%7Nbp!nb_nDxq z313?9%d_)qwa|>+!LD{I)5oN3&z$44?E*<%(h%j{YG<&aiMm@EYh7GhI8VQjE*`S$ zx@D%SzP+1Fbq1cgI5YENTVK?N?B~8npNt^_F~`%MjFFl5L|Xp&D`g>DG}g^lC4pob z)RV_1m3fj2C#zM-HtWc!j46(V zRcG-YWQ9GZU5U}3aCcW8k?GS|;#fZrfL_g!c}AFDmdO9M$`OOhH#*OCNSEP~GhOfdmX zRSJZ#>HaF>gB)qQC6NAVza$65K>F2wSOXYj;UxgtPWXH&12q#bu~)BT_18|;y1QBwk0~IA(lqvA^s?d^#ke~q&PLb(?+!|YB?b6q9`m6CFjyIg(zbT z;Dxp__ax^89TOv_=OZm)b2p#JyI-j-px@+z$6kw6^yFpEm>z9mnLt+6+7-W8k*E#8 zsT(c8SsAoz%ca9@(N%P*yq6Dt8^7>M^$aC_g|R|ht@EluIl@Ti&9P0oVcne;4@@fu?N=;ldhgVA(%uA#a`v!oqJ=mD9ss^Vaf2H3A<`N^M%D-C> z8sL)jjNrE$>LjMQGEZOQA*cm65o3QKDXhOFSWQhI5@*M?B5X9r1}O55F<(U1M+(bh ziGJr6ALgC+8otIic>6ne`9r+?q2wH&O+9Mwp&dCGuUEe@zlEvxhgkp4%Q9s0vl;`< z_j8YcW!=(Bt+7(At+WZ%uG6-$JX-hzG^QzMtRUVGR_-ipEpGAO-2W)NH0)m|(SrSh zz;P*HKY0__4;%K-bsP8};kEY}{K!)Ch^gs!^8WLT!1L`R@IG3TL-mFufVp7&zxgOg zc8B!MN5KrDa-0Y6>JZ4-laI!VP4lO@+b|4c*)T}Yl2je!XSVytBv!4;iAjWHH9NhQ z6hqSsPKtTFes2otdao?Tsp(sj7mH%kVk#y3^*p37UGb6S2VvHq$X@A>SL!YdVNre{ z8Nb?-tWWx8koxu~SMd<`B=y0hKBVzPg;FV~s<8Y9wMEfQQ521dqMNd!eXXKDl$=|B zDWt$~(%5CZQSXLbgnDO&Lv54X6hb!d;*t-4OXKQrLuL~xR&9^)LYVKiL~hq@o@YhY z0%BEgmU%35sTpoBan-;J>0u_H*`+1C1EGT*Cesrq7Aax+tXzy2M7+7P;l9d)s!9IY z?ur*V=l(~2wwjaPy@lb@mSniFi)&wD#DZq8ueTP53cHFXZQ;IHuSxrBNd!(o|mXMwSyG%n7jIPH0^7k7AI`$GGH!oYVN~rIp6o#Y-(`faaadHOtIL;sI&S z&gXBXz|xe13M}e0BEG-{(6`eazNVs`v|&EBEUn z3pB8)#feUk7wU{uNK0O$lIBpugSEU?TdQ>+pG*TeQ2%#6-c$>c-qFI2qNGHxk!ee5 zPe^E?lAvl-dXogG1iOhj7Rg~mH4zzsPMHD59A%)?hXbXaO;HMj!7y(S_py9`M)Myj zkQ-c0$g+NmajplP+iskLHJJ?=lj>m6{a{gU;yy}?a(s9jEJ{W14Up)}q$tIdji(_b zhNwR5H8F+C2`(E3t1_hrPv|1L?<#ItLj4D7^&E22I|!MhGkJ879m=mcn50ckv<=IuePT{Rpue2&T^TFqzcsS+L zvlNhA#ZO(82R9ftx*5>}`B&$}rHlM5tJY-+;gv|TG2uH&zCMR^@t8oD8~BI%A@;euN`pcKWh4%zAIL!-}W*OkCOm>dtio) zKkcL^lBh+n?3PpydlME3`fg&(ZRu@WMg!van#>JVd&bvQDe9W#oo~#|&d+4Q7-36D z(QsHrMNZscIBHCoxf7(i1%b-@sq7YYT%Ak``(xHrAJeG0*YsE&eG~s%U$T_)ezO{p zAj1;j3c!S}C#Fld>)FEkU4{~@h?h+UQ`5UO>~ChqTK@j>>ExW=NdaV4zRYLsIg{_! zZZQkYCuirIH8(qSdc(>r*>6d#!;&VY{wq_b9PvjircpARG01!}~q@n08ksd9#T zZWAOmvPp3dYm(Z}|6`RhjSGFaE8$kQH4rpuSLkvEnlrW<50WKTiE3p|Es2)Z+b-MCFUAl>t z9%x3ZdgX2b)cQ&Z2cx^&2!}VV{41< zQ22MB)@v=(pXLT`n{SCpm3s|};SmbhLCX*mDk|3jk^}{?*hVI1;JY3jQ z+}5*q(=DTYg;KJ;*u%BIumjDcx3Js(g)wO+jKlU~Z_hSTrv}E%&^1PPBt%7^+pnR2ePGUiqlrBBEt#`izZN zr~&l=A#VcWnY>0vdD9xDXy}%xCQ~ac@zETthZL575LP|s-?`=YphC2KL|;cga>-M98;KAL~c?Eiorl z0_i})UaP+2pw~#Cm*kRur^>uh;xz^vKnP8W$&#jr;ywmnN|~+O)R9;+GNsNetFz;0 z3(~_De=L6!ZVTFc{q=9QDS{o9_WFg}sQ_Ng*Qo$Gc!Q5D|H+0*c@x||7@y|l_V*Pn zUrH8x_|wY-bYT&%CmalIIeNGgUGkG?&$$vQKW64VkuTk0J(5LcI!zUf?a#tB|){)9I-SJRbnGMBK zA}s{X38gl3*=Xk;(StkkrzE;Z{*uodC2I528ZR|0DQ+qBF-E)j?bD3EMn)r~pQ&9g zBt}H$ZVc4a^c@BvJ+-0B5TT6oOav?y+(xj1K?@29%{0vBuTxRZMXx8BOdEe&qRHRS z6B!%NC@-%@=F(zgm5F$QshYDZ_4zAV+)L1?#BW~B`P$s!wE?zH_-z_bTN49gF^0Oysq9{EP;tRbhAGA?9N3Lf zkCz70WV9Q;jBU{p!GC7<2Fk>L=@glTpl~sW!s=GeklNHTz0?hzOJvqUd&N2b*MiZ2 z>Mw4?!86*5?akqIHxJhj3skl(11d;QP+5TUl>2kb@~rh^J7+G?w1@g)%%IES2nnwq zc9q*&WyXazqj~cmP>-aDBz9X3p%$9MN*T=Q_eZ&*@rvZY;*lkp^(0jW+nzwi^f6Tv z!580y9J@7Xk__w#g+U#U#l-MsJy>wiFuf_;RAWPPt( z2lfctJqe$HVK9Klmv=G|_1l`zpa2Tc)by`;8Z1PqW8TN%`V89mF4U&II6T*XP%b3K z7i{Atxt_rdqsO=u|I{_{SMB5GL%nG!xpA;MctxK_1&bz(%V~-Z<~VwgNjky%Ks#KV zeN2?_VSOKIFBw#BJs>A-7(T%$eSCSiVAmi<3z0N(4Sv$UbIT8dc)4QtX+V`@&e?P! zLrV=ZKgDTHzd@}&~#y~$+D7Knei-NsIPD{**UaF@`~ofSM(FV zy1&dQw)hobYV0-K#LG?bz!+}_X(EQIh%GB#EwYZQ-X{Go#l0s*rj>RToRi5Q_`!P|AmZE}PNh)SR5~ zYPmBJ=e3=ub|=VZYFnGj?A+yM#q4>4$6>YEm0>;l1bMA_$jT)Z5%t=IaUQMVvh{T- z4x!Mg3GTanXa+?$WS3ebB*?q^i-Lq&lJt-EY5r(yafIo~orS}sw-X6-SFv2&TDqS3 z|1d9t<&B+Y`!rYBVXV)b0f>Frb1?E#5C(+Ho6H_}JKvV^Lk1UZ{nu6~1C^fpg zQ~;X^<|=vdf4PdtoSObEiw#Qxz49??^X!GI!XjgCK~tJ=2*^D33|yo=6iUBGA&^|Y zgsf2Oi9G(IWZqc*YD(~(FeuaSw5sU6`FF%c=yaHet?xK1ZS02cAQ<33VG{CPO>W?q z4UqN=YME(Fdj<@}4WLp5ctZR}W?Bv*`ZU?pEHZdG@t=l2h|lCW0mn!Uksw+u#Og(! zpC`nt93))M-_8?`#l8D(v#T&0hOBw{7y?{6NaJzHGfUw1W@_=8uSmU<#D# z3Re_AkS?2#{qKn%NU2YT`U^WT;r18ymWGRch0XTXjup2S#)`M02&5@W^kSF~mn=f0 z2nfMWm8s>&s7eG|Ox7=^>$EK4Kk8_O=bVKL(DRa0(~pU=CD=oxTLlE^uN4Zslrjnh zpCG#t{TBKe$sB<^k1(^m9bMu9N%yJgyV|q}s|KR|kjebvo4QZK8(!hH$J!fsObt|1 zCG<;Th;O8kU z{~=TjZ^8mtWiHVSu^`2H{jan)W}&Sy22~1+4}~`aedx=ww#Mu$Oa?9}?-fdv+X>&{ zB}Xep)6G~4%NGhvngU4_^WkLqBn<8(vRsk(8pq~oE;+3{x3aeAXgQH~V-ZDp^-=?? zHk6Z_I5P6Ljm;)9(-&9~|1l7nL9?86tc5KdOUxCXR8Ht((^pZNUVvCBDQa28j@Y>@-nwTjADHKZ1EnVXUOQJ%t_I_Z5zm z`U{(Tl(Q3WODWNHM5`sG$qn*TKTVE52dr1EwMkNg2v(G3tY;M09+d`WBD$d^2ykk8 z+-@20*p7FwAKZo}3W`c&Fr}5hA#$0T{z~HmZ&f!@+nL-&#=cIc+T$p8DOBDyb^!1pmx2Mw z7f79>S(Ag?00%|c(PBR{5bK3=Hkh4*&;~L~?0mAiJp~J60jv9@;tu5Ul3CvTMlLP# zYV$H?b3hSA(6Sm>1?;6MP{js!aVsVO5gPQ&^4L_Cc2;+NaDn3rYAda#Owcfsb&$L4 zU5m_!xf9*aiRdMDg`&+ezkMXucL?Uwo6|r7-cI8PTD}myXUY$k*i*WFnoC#durr8Y zQM9PG4#KAFS-M?cJ)(=fs`_?C(e@Em&aLHd?@;#OdS%m?H@NvRcg3uPQ4j=bgp+=;8Do2U;}0nU<2`3I2E-xJEg z-?ohAV=k<)Sv)Kv%t0|VSuC)G9em$z2FY%1+<-U0lQ-52W`?BN^rR8CilH4d8-YQuxk+MOd}}Ca9B{AVMr9)Ui0f|hW=4|bDonMCVn8kQezPm$W(J0p2e|(? zK&(Y@`7(bQkJ@qLdHzlwDL2k*iw>Mv%+NJasYUow^R-qELa(pJB6$0%^V)deU?V+C zIRH*4_7N!0nxVASuX!icm!U-#wv^YEIn1KQd-8lUhBK2U1``OrgjfJCP1Gi^+>ADe z)fcy({sWLfJImy+I z0mDX-2r~qa1zYs1BB~2@+=N8gceP_46@Bn;sdH*g(rUgKx|{D|aPz@pgnqPLU{1rO z21&l6IPT7NS6(N-yX}=)S$P%g?(?u*g zX64PX-=kZjp*7Pl)9oX=JgSSuz>`WH*TwSTV@g>n%qaDKUA{t>SzVsd<$^8?x~%A; z7?;WwT|^;9DGQ0_m8Q+hCR;EqR=%2Nb_#RbSX-^U!F>yaPIbX^RJomM)#m0D7i|PF zIxyJl|N91q1_uWV+lIFd4E7Ih8XOwlJiK{u|M0fqvEkFh`v&(7jtmY{Vrbjg;H{%a zhwmM}d-(8Ztcn$(OP?{;lt7U69}b@WebNK8L;+JxJf77IutaO5$OJ{M$&x}Vk`}rP z-X2BeL8hpEOyaU$N!(uaaf(J*ylN#Z-nGh!jI_Y}TS(acMHTDA)>+9#`_khODZWkh zvE>EKP9|&^4zcS@FGiwZh$4}N{c6%-smLmXvYHh_;K+^B){S4qNt}C9g>+%uG-0zu zgW5&2a!@u8C2j*)?Hz@c7h0X{KoYDqSLClw=1~PU|8JMYp8Aa$jcb z+l;pv?ue5rCPl-VMXZc4a*KJkaH|Vw^LcR8k}R8MW-MaHXtmDTa|y*7v-fO?px6r{X**|; zLf5`5pVu5qB^DaZ2Fn0l;i58Y7cOY)GyH7C_^i#xwr$P&Cu-%%OS4P!je0el_A4sf z>G)UlZh5jJuUS6Y_8D#z8%sINWj26oUsXQo7?8Wl>co-qE3`k{WDy#|x57iGJ~0nf zi;$7rKi9FzoL_O}hKL&K#3T_7t?xHcP%cC@E*mBT!=G9U8cxeI&pqqAP4SB8o+D4p z3N>wZo_rhcg#9*NW;z}~*n?rYez_vM-pLhv`Bm49faQ0h!np6z2Y6tQU3bxZ8&wCM zutk8rZ;<^1p}W{$GP;u`l|#7N;aKP*YRLdU{WW%fKM530a9CH&?QcvmU8?G*U|50t z=8xA(c7FBQ)P}M3Cl87W&~JI1`oQ2r+*nigR986SH5qOCc?f~CtOXY7t>JU-{35T0|_X!ewn^S67g}$bB;13i09cyD`}K|Xx+EfCvb@08m;Cbty9r$nMsaSsbwq%WkReyf(JuzCf$ zVQtFJZNwbBHkiD{CJksRZ$e$$yyl8`QKhw_hSfDJyy1Jep1kBHlnLn#{P@SJl*Nt3 zy&gx4bi54b25+oSXnBfOm0lfAI|o_HlZFG$zB!y+$Cv+Ea_$lgWkefJUfryM6uCN@ zT%w?X^4_aklJ!Qf3}ml%!5t=+^ReDSo&`goEdO|h;2lT>?^c__0|?%2$*bIKL-1}_ zFuxY=Y8pkGW_R4Me#K9YzZ$9C>;OS$cgb`2>TX?k zJ*4p_1GRvRzT_gKy)&8Fl4jy3tkdqDQJ+h6WPu}B$IZ9**wHZRzOxnBh)`vHryYG% zKj!Mk)bOe4KS^FJY30&ecue=D1Nc(Ovwo|9h5f_Eqy>LcPC;CZJ5?msLQyB_tNWAn zpGaQauO$emHI47396P4<6D+gW8L(?!O!LMUQ9}h(pv5vPYL7WZIbrDL{#Z2HpFC7L zZcilzzyl^cAEUsyyT3nq^%ik{L)h<(r?{dikUiIxS$?(T*^X$zgCZK%-0V-5f4x{Z zxBT_RbJPn=H30*n3fcCkZh$8IdxdEdW?c3FDP~-5P1H&@WFASz)30iJRDkj$nk7Ij z|F@-cyu#jaDm1PnFX*@XYS}olTWd3f2z!!O%L#(iz2VzsaBv`5{+hnR@@-5VgNt~p z!Ntek9v1_tMFR)2;eJFPnwox(=#svR97m~~6#otwsB%3yJMFAi#x1Y(6!@J}jri}T6OJ7 zdP+hssR!f&Rqk+_77LB1$pSM?8I&_5x!)@~H|iN}qY@1)%wzoOZ63+n7#s%F^&ek8 z(#!N-@43}|$tpvFzI-T;pKj%Pj63NJ=hv`lKH3~N?yIa^@L>pLlik9A6g>{=5w&04mF4h_!_En2!?ne1xJjAKE~UW6S84xF*e|%W%Eo`t z;f`F7BJW(zTWJtzwnUJHkQsirC8^&k2&O2#{(03^2UlgzeuFvQ&NjYh);4|>VL)m( z^D&bN>$t?s#wdRv8%>+N z?$q2`tVXK|`OZtqMBG?#HHzI_8P$hliw!;;2_|k8hb$8{LX6wgSM9(UmgU0iah*49uiV#&z3<=dGhchpxxlL!;PQ>w+LH@vr<8}%CSVraGETU3XfcE85iuvuw*^1 zd>fZZesSr7C&li|Q5s=8IX0`Dbej(7d$;K#VrzZWscwAvTd8)lzAUKq6`K0$;-TuC zAnGjs-e2aq{PgR`1iEPg{T;rEs)EUTEp%(ob0Wmx45%%;VX!xz1AKsb!rA>4DcdIX zfU=qr6LV7X!%t448?iE-6N7uQ{1d;Dd-LKu8g zCscGy4Z&APLk@DWtK9S-Q*a&+V!T}K?KW#%5Ojnpb9R7S)-*H~L5fZq{gGum)=es-x@F~hMZc#$$e9-6u^mt0${BM}xn zIp)coCSf$*_{1SH&82y^?N39JD{>FGF|k2ySX(9ap%Bh)(RZODYKg$j`lL) zsmP`ML@M=_*c_##<;V&-#skVi<#J(&4bOI$cH5bwys^>_LXo!we(Tk)cA6L}+oju@N<6w{WTfwM$otCg|3Z?ibGZSxB?65+b zX%TWCpc`3dSi_ zWUyhkcRq$sYACt7BU%3{nfZ#rq7XBV7`cyQ`BU=_%qXmI>?N$YQFre1Gy>zA8;WT) zDEpy|+2cocfLk@$>$OTj55``q-Gv2-H5QfW<+Hqu6DHom&%@?S58qs0xTW;#Hf52}wJ^p|%5)hYR?WFcMcF@@U}V= z;K4lH02n}k7X~Es;vuTw-&H(lStcD%Gxr*M$se_C%`FFp6%(4pen6m9fCo34YQuE} z`Y_7d)KV(JnIafX1cKXy^@Zh6Fku6SI_tQ$=8vMp*ls=>)0rm;6AF{>tXBhQ(u1hqXJ=O~ zH(st|pr1&pvzVzgXQ&0e!KpZhf!vJxCoGWc=5rY)Q|m{2eZH(odxml=wBv z{3NW_K@bcS<||;rxPEPg-Pqn5r+CZWdbeXP|9hHgQ*!VIEuT=!!Dd=cP;%Dt)>g88 zAhvd&YT4zs5;WcO#p}T@w|d|`;Lrl+tAy5S9UdN@@LD<4*7if?+i9H#Np02dFVOP& zg4#|KI$?i*ncO?bUB~qa?DwUxhdYYd=R5gc4}Mj(5%TI{`|NujOy31Xw9`!g7p;*9 z3sp8~N-Tf)SQw}>7S4p_X8Y{F<31v|)ZvJ|Vumo{u_6F#ZNX6oVW$08dL|OFc@%XD z-zsHYzDt+Wx;&zb0)K4~UYKvpu2#N7c{a_|iZJeuDs{gukLn@=&=y{$pOh^OVmjw% z=V}oZ?7*H`9!-j>E1%KD^S0s(Eq3782cN;_2+N!~R2&ewjk2y<4`qMtRfdcPTV?Sj z8LI8Wris`v)SlLpK`zLM8HbQpzAwviyn~C{oKq>C>EdF3$9F2=I~P|K4m*nOhP%?~ zxil?b%lVbYWy~KXMN+f#VdFVBwqTp&QF`*oKOofw^s?2L+BwaIxtZqDO7r`KWU>~M zEhB|nFe|pO-Nh)TLN6u@yDSxV7ItAo^p<3>uxuJ*V+;3#7(E2iVQ}1sA#xjLjpnhp zmxeGz*cg^2_j^e7m39{H!(d_0ZJQ}07&*7|d?zMM8N+BtVGrgK8~tPC$bi{}dBZ-@ z?% z8&Vz6!kEpB32%%B&$7ncRT>}lJ<#?$a^=5MtwJL1KYxGzUySB(1S|+wz&}(f=Lz9A z{-?r`9xftKZ*XXb4c7gPmQ)(`y`1!>Jg3rGtsN5e)!;ZMsW`}!L{}(N$ldZl%s`!Y z)B5I+>K{D-i41FJj!X|y$L7jB{|XY?c}#JdA96gX$w9NbY0y(KQ9RV*UFhRB-8&vB z9!gp_)FhHAX7hNE7Rr=1^XNsz7LRI*OM0MBaYW{t&V*{6o|Jm4i1PIQ$zq48`YpzG zG*uZQ;wI?pMQXE5^D@?(_*GNSvHW&1s1^_3!tJ{h>8l0W3Cnh z-j0=fvUt<&8SFMYW@P3AfasOOdKHP^)BU!%MN<`V5 zaPCAA9z~XJLj^AZsZW*D(Y3lthV*KE*My{Eh^GnYxW)Ilvl4T!5}8nVB!hY zGoczLf)z}Jv3cEG8>&dmK^-EYEt65!@h5q%t!@*q>fnLY(95Qo%t+p5nhB}?ZqB9= z`*xklC*{*-$5HiU(mgrgp4eEW8*f^B(Nr2t07Oaql&zEU)Y*TkV-g1~zqkcb7qrAq zBji%6cM*`O325_QPZ}{S?}(Fc*IT#i^0#%-0Ec1md8Ir;0+r9GisdTjb^9HD zZ7f7Qp_GI|OgX)v)L|~MDC;x2{bpS(C|X)Vt{}9gXRPN}Jg<{al~jIE@BWbLC0EABsgE^-JjH_T zN!RxG^EK8U67mGgqzuccz4q|ilh?>#+JGm#M~mewj!IDEDFa(pi>OP*29xEh`MSd) z7oarj9I^P2U9VFLfd{7*v+W}&iigM@5t~5Ol))Ox4XeYX+oUl(>ApXs=7yiJqV0a0bql z*IpN~j8GG-ZN3TFw}G)Fx`C&iY~XJRJJqGZq*O@xl8yJ<$qRCpnrP(CRzgOw^C~Xc zW~HoR59s2zOwEd@2$B~0##F4j5HZ{~?M=ucyo&l~DBG}~wUNC?c@OFhesSprfy$q- zfj}8@qNvm5J)9EAd|gQj;hw@_{~amZQ@FFVo!P;mqT_I+9!dz93NMiWk9GbG#F$T~ zfq(edK}qio)FaXo`Q5HH5@0p_6}rpxbsmycVx}l|40)}`Yj%l-HkE3%L)-p@KfxC} z)r7L9#u4tBpbiY5Xl_`DldXGgMjVA-EZ@y(utR=EZke;_D+F@!15u@c^>6_^SlnCCBg|Xj7 zk=UZj$dYq?yIs=Pg6|8DY#6T4)$ZbrOabA3C^^mV62c6Vmkjj7ocjyM@M>I-9B)xT z$Ps237!XbkFryPKp2*Mw78YnhT)oN~Q;1|`I~L8}5i$*LbQG&hz`z?Fy1`^DzZTwr zdN^!lQFtj$8}|GFP4cKc9mA}_jH7Y>a$wDG(Ei}aB4v9qJ>){78(~T1H&x8s#BAmZ z+;$f=ga$_86gPgX9XGbJFVR!N4e@$04gH-fgFPQ1A*mg^3LuENu^`3(53LNgQ2Zu^Wq6YIooF}Gey*r}gLY%Xm(i65i>>&7^v&uFURJH4YHE8t@E zt@5>>H1`2&@s@I$Aega7G~P9BMwC}4Sq9Ng^IliMT+zV)yg+M{onwTwP5fwjY>mMy zzAW&XFH3|A@>3T6G~}4 zVMNnSTPfq=V>s961eBc5GH#-q5*r0&qp6`|s35oHk4DATPBolylnUBwoc6;=BC5?| zZNh128PSh2nq)GIFT0L5r2~5g-LR9d=I5$+5L-Eduw=If$%Qb+d1(=fFxCV&@>48D zVp!F<;t1EaHSeUNo->gyd})i`zR~LhcV%BlgSh_`FK2w?i>fP#bGP%NT=G}`SDv(C zcWK%tRL!_^*}+8Bk@J^FJZU1y|$49qtheRbF(Jp@vXBVvW4?z9NyJEMKle6y3K|7 zgu7uDlP zhK1h6;|IkJ=C!sMP+BIW1$SXMD}+jRFpp?ff1<4&bbJWWOfg@De>i-y_wWJQf#o}p z#F8Z`dYJNf@2%*G6Z$FDEdL^W>lZCgd_XzU$1Az`l$j)UCth0&Yg?aiC_Wm@t6z+C z`9*06x!eD{0d_mfYN1d|@U-KP%YL+0pReNoYd-^k<@p#xCVE!-XDyhbc%p7P-gePCEsZ&ai%Rj|iBuv=FAs^>!r6bJyc4?Y(&efyxnT5e zC6y167h73%DlajW+gOC>t~!pr@&&zZ>G|VINr&t%Df}yH92F^u4-ZX-mkI^~lusu!JJxEy=YDIAR5hFEGYIB-Uc=!*=o1 z?mNWm;UBbDQ0@Xx-nUf|Q$p6)CsexYet^pVLACD@)P)#2$XNd?+IEDA>p2*Yc{BwA zvR^?C=6wJUc&-nNYNw`q3IRL3COi}Pq*-h?HA-=}BTnoD*y0ZQ@+|Ef9*=ZJ6(mepZz%Dk?ET_BYYCa^*@ktiJG&G9l z9QNf+4s#pfD|SA>PqKdp%m&3Z8(50-4u%yk{c|v(cPhqAvqv78Bx1ezjU)<5s*$q694{VZXAi-h=K44u_eW)?PhJS?kVq9 zigI%sjN~iDUZ$pBvA)KK>=*R`;>_21z=$rm7Yfus(Hpa z4gxXW#nI3_g{`Ge-vB3AT7ceG1t?j&K!E+cjqLniu_dN{bj1XgTxJ@T0w52TC3?cv>T4JRV8gl+E$+8KC~x2 zF^=FnxMJ?oh3S+H^GL!0bpcxAn$Go{CRH&l{fttUxF?hnV+>)-=%@S53zTNB)(9`x zYD?*M*-=m;H$pSB1FPJVNp-IB2VA<)Co{`H>Y4LbShmLL@Xc>h*Y@#&q*t+#+Uf09 zRB*hzmb+5PHy7uG(nI|8_f(lT~obP<;GIFKO4NSJZpO|&=$a0(~yWbt3Lmf z`fN2Lm_D6^e#Gv5dOMw=pVzW2YGvG`A4n&(CczqZTe4KZdj=idlPL|}KS@_0TY%!) zxT=qy;W(}bs0P_J@4OzUpldMg?x7Y}k?St56%7}F$~czdbS5lD%VVRg7`KBY+ECk! z+Ydghr)JVtAzMg(mER^WeyC_uV#1aZa4#R(^8X94ym--vHI#AVA7k@u@_zKjnzM*@DPYLcXY<-K)i= zn-mba{}W#fu+ub0TeNU@A8U5Jr3saSC&*Yt@g!eqlVan=Lfg?b^Jj1?W zDifQ;{X8$wOt)9C{&DWxi(mgqZ3jkF6s3spx|I$RAGJ%6)r1w#h$dYeh@^tB*cR^v z_q$>}_s`w_^b^#(e6Hl4C@@)C1+BjXI9Z-CD1}DE1|p9t!LsLZfSM`id*zKMkauG# zHhyX=#bQy^@Xb>VFxZq~>>}WK`CEHzDQM5B{3*>!pZ)~9pusGL zlI73H9kB`FB}=dQI_jyFrEIDui@1=p8||trN7wa^_Ik~Z_Lx1?)(dyBi>^5SU1biI zVLM#zEdlN-oBbb2SGp3+!*)uk;;L=iH>l5^G3n-#MB62SAhu4=*wplco=vvU5RKg) z#v&L^mOW5tDAHI(d?-3S1m9p9#;L!|G0ETA?)XxS017>hUHu`7Qt$FY+?9F5q$ITy zX^5gDb;sHcA`T@$)?nC7Rx#yeimkA%Nvb2Bs|i&ykC_WEfvqBGKohzs-8Zgp3{D>{T+ z#_h&JRTM(~0#=>^YJ-&k*f!B)o$3&I5lLw+P^y`s(KV20SpJPbqR%C`ICS{8*4Pb) zt3!>_&-iwl#QF(&I~orKceaWx#Bv;u9BCLwR){i0@#?p6Cb4v)<#RsP65xdE;f#^Q zAc#(WfDhOe8S1b5{Kv{u$J?;t{oHm71UATZw&qNgL)&Hirm#?e&Nf1rH?xA!dn}=; z_`eg;?EjA?uWtLCFjd%NcS@NG?&c;=CDulm@}S>wgZa-?*k)m4wIMHh^!R zQr^>Cx;pTku8$SQRlzUo^8ct7SvjfQrGs6AhyS7;|B@byf#!`Orq->D8Wmz2sA}{TsX8j73k=*l zXF2Wzhosj@{-#*hFYt+EGv{V(8XXFwb%X`H!!R*#V66qfo_jukpMXVj#ooeT&t~#= z^yrEHk61k3E%q{#pmhrona2)CMNZ`4EiD`F!!R5#4)$hm?4lH=B21#JYu|o(C5jW2 z4c4JtEg>Fa&BP$nUHWm-96u(+3OkCMd$fp{r?}&`x`pY1u=1%$&>t+*yWfDjz;R)k zl0U&}5$4Q7*tz^7+9u96=D2=X0hLqJ-&aVp><*HX_#cJ@p>B1K_$#e zVV7#|Jj~bV^y;o3+yrK-0V(57?xYTs^>40GeJ^h|Z7YfN?{@%DR*Q zJq0LsUGiGnQ)*+xmhU7YDi3EtsrVwP*-}gqM@+A_3$FWxY&RBMbVTC(ZFZwjMP^rR z%RVf#{6NRzPU?eQ-~pY2>pR8i#b+=`^(pJ0=`^PRTKN@ST8!OH+}<~{HKtX$UZAeZ^sW_qZWd|WM#+~UL)vs7`@O3pY9g~=Q!U4Tbx z2|~sbnQ~ZIny-D6K%;IY{cN|!4E!)V8|;G-jliCE!n6j7Y#r$>C5M$p@5}N>bfT7B ztJ<#3s{-VA9pYCiQzr_q==62`Hl7ew!k*8!$j7DAms}#pi;B^BU@+_V7GYiNWLA6; zavOA9pqKWFQQ`1_G@yL^3;4%A90m*IFY-Zm9AAfmXFe&Gg_2MvD7hf)WFeIsJt#QUHg2xQ^+ z0|;)>wLXt5i{|i-Ax8ilUE1L`YxwfJN2psjc=iT-DuA|LOPy~IV2pC4ARH$>kn?jw zJ#nmOwPkXIf~JSwFES6A9vY&_pNV`ImRL(n0D8K@7$-%uJfST`(uKumKKT3-4|4$Z zO0(Q#V}SW8HC5|#idR0*=Am=YUy&1#O};$mmS0Xv{GE* ztO(@4Od++;lZwY~m_n-j-{ghP{Vq-)c`D(T#0AB^l5EtS*$D@+3}fl`BJl6#XOKT2Fz$1>*+oKr1}?X_SNew=%l z9Peax;{iPVlxlzv+Mq^H5if=2piyS_=??5EW(DEJuw}5Zth5#&#tpEBprd>duxKU3tuwr$I8jtZzceIL5*o$UgUCS(rTG)_0!5U5@P)% zmHk{i*iv)m=VoKNx~18jP$6q}|3N9wLmnkHDdMmELoN|M0?)T`Tfey2SgEls^85wg zFEpz43rqxZA9Ijf8EHpO`>PAg6B1cqGrjT!RcW10Hn=TfD|=z9%S~(o38Of_wRvvq zy47Kb7&RPA{JzTnODagi>Y;F`3+Pk%RqFY95ymqll7YePgJWUyvZyK9i^VHv-@Zb* zG{$`T?pVuT>}QwY-KAR#w+?7+{}8!22H6N}7@K*Fd!md>c1LTt#6C}z`NHp#NEJia z8SIMV8h=$(G&TJ{B~tx*tswXSHK~4Bi|lv>ong+x+3aUVFOZhS^7zQd@``-aLGsZ% z(A~7+9y^oCE^ZXDyfaiAzDiX*3DXyz2`je!n3k0$gu=wwzk=jilbGX3O7XfneC?J-*P1i1lJ5g-f|lXCz< zLgz0q#6|7^MgX82Ev!7g)5sQKV}vAbQ_AwpOl48GW)LSx-CS6`+ZxIH!~(}?Lavdt z1+s)@l=H)IgKrZjav#+4aD)3+T&=QQPseom*SZzoxH*FoyAeY=7N<0~K16=kMAqs! zm38h?AXPiHOYc+vB`cHu!I3^QAu$`+PcdaS1oVs*ZFfrP$l1&1xdAN{autCT!eQJn z8$zzY>D^!q%tw}EL_xy(Da^~l^P(&}F%zb{z%L$oYKzRk{NZawHf4?v5t1NZk;x>B z?V<{rkUoKQdK7t;npWwKWRKBQGlfd+{2B|6GP|YO$ipZvH0I|Uuf*~pw)rh{SnJxt zlGii)&5KmQe_wd4A`e6i@D4^fJ&N{_GNrk+iO3-#fu~T!^zG+vJ9pNF@5t-=Ka~>h zbXOgdmw+AOBuWv~AJbUxCM)TcbCoS95GC6R1Gqi?f6OrnDf3ajj~ns+*372l2fLD) zD`^x7C)~EYHF*t%*8_U3SV&j)O!00C_L!4?r;)4|%8D?Skr%K3Ar*xx_eV;pfk9aE zr&RPb!r!0PQ-0ke9WjQE=*f07p-}X3RY%JZJemL`^9E8-3 z+)rw&WQA@#Q^SSHcH;{R#KfLt^ByfUDNjCg>Ll^9Ebq3@gG3tXP0Y96VtIGRl5dt& zx2?}j%PKSXzDV9Qy_Rm}qFX>Lh#V&rB83A-A-VGn!ge{MWX(Iev37-kU7ep&ra68| zAtUjUTH(7{ui5iLmxej#?0iICc&f`EP+7Td=teZdXL?sThMDbV9Q!fZ`KrZ^=&Z!= zY#E~aW)fmZ42MYwrX#iUs~ApaL2Bc|5Ig|$Cx_%OC3jzwYra*~9hbOiVP&sQHwb z*08?8vahj1*}pju(a;ryn`M5IW%-tRVgyKU^DMf1BjcOmX=KS$q+;VfnYUl2={f;_*xj5@0YNkL_zhB{J6m@OLP&aZY2O>G>k_21-QUz| zx4&Q4=PR3OirqhzQBr4oP(c{jgFLRWd#d+ti1~4A8mIMIg^P{hOSRc0Bz#PXpVcF) z*)q3d(b&x#NR=&ougzm*b^N4WdqkIQy4W`;das`TQN!-d4dU{Tm&$^s0 z?~Z$^BHjD12#AvW$)>@4^&Cbv1)O#4Gn5PbeN*&#B_r;5=m(1|J)cGRC{aih2H&qRGWF zj?llrR-L#*^`rcWDnp|*41lSVb@ z`>4`^n2H0n_egt*Ebfa(fn@okhxABq@g3I4ekBe3Q+$g?g_hG?7X+m>p3+f#0m|BPu!Vq?AJxl8bP@ZpQ(CbEMpoZlMnUf`ntmrA z`d@^m_mD^iA{t6jN4r=CIOO;!`Gg4(U{Rn>X7g6!JV|%y5Za2M%&Gb%mC+vx$tEy1 zOTi=9^$~0U>Db8X2Lh?TC!Mm&bYytS2f>AyC`Qo($&H&nh#tbsqDiiW$$3bXvscDMQY}td{Ix+Z6tMQWwX4n zhW&LO10}Yl`Nu*+p?%V)0|*1@w4WNmdW6Jrc84Y;dbSdC+fVLj zaUymelno&ywALgA@glqvt?7SDwI;@NYWjc9G$jxbM?R4ws6{c}>wizFsp%gS6M_L@ zqG^hB`Mt&v`3cGMv?}aZUI$nn{|X#1k1vAyh=*J8%mSW22z^qC(Xz66XD#5 zZ<(Ef*YO&Kubl)_jii z+N$?J1nW@lCKuA%=24xQee_~&G1#8bCsyRq{G$3 z6pI7!>#o^oa12!bxfXs|ld_d*Ud9Hki*f4bXL;BAS$CDSIr^#XaEiP|w}aiy+xceF z+gFSOCdKfE@U?3uI1aen4#w?cOCnEY$($3DGmc4~Gc2zoXkm3+tzI*FQn%AZopUh4 z&aiJ`_J$?zyn79V5$CwcF3PHJ;-SNm;us~ zl^7t1hs`$_p!k>=*cS8Z8WDgF?g-&hISQJ!CIig-v(CnF+kUnI z{!QI(ug|#gz^C9Pbeq$UA5+6caS12R&ILM#nrssMP4&fk2F$)k4k0mlCIW05YseA$ zo(QmB3}OfqJorjj+(UZcf>ha0r><=~a3h+=N1oy_<{xG7D{D=Zm4a={gyisJR}kby zTO-WMvqo@}HR79-*Ss}L+6b@Ey+I@J6~k1{MofKgS=zr$1@+hZ`zAVTu*)R8@gE zuw@@{z1oOip@^${LSwGE%cOKq@z%ma>`i`aaj@kY5dwSkA0q*Hp|KcJMqT3EV=p0U z&8&)Q> zo9`76bJPrxlEbtj5O)`_TWFIPgeW>Y*h-U~-m)cJvEyKWVJ|B>%wg1A{fuM3NCE(g zw}4)xz^e7nNY)F)!8(xLe+A%zeHkW7|6|>-FfM>CJ|yq{ZDi)=FwZa=9u(WM`^0{^ z)SuG!8goy3Q9AYNAm_W0h?VmkSJV^YTVP5|l_c^f^MmvYns2FFs3ZS6^ua zb76ZQwh7~Ocb4^N|9mLSOqP)mHwvHbhe+9379vE1e5{qc&u&-R0r^=Ew7uP|&86#p z$nS;+-Ul`?pAdx8YVjqF?{0mh7>e9st8_zHtvfIrIEzeNL}Zn76ZvWKF%>Jx22%{% z7OSqd7{u^N6SdCSW%9**$V+2T*JF<70E1wgKodSpokCRb< zQUdDzWoo~Hp3aw~0=$i$&db-S=&HG*Au(W^630gu7FUm+uU|YmyTT?;z>MgDqcCf%t>7i~FyCN7Z{nc@a|*hqw(&@qVjE2yNa&E7mx|OcW`Wy{!t6 z4wwj5@TDtZSs?cFEChzKMj2(73Lhn2O-?syQ=;YoXU>ttA)E5Rtg~hZ^O7hS^swBv z%-jtg<1hB~@lR?@f&pFM=6!1Om!wJjQ>|tjY+?BR4DF41=9ZQ!vpD>wLZGk)?jq05 zQ&vUDU2r4&_+3zC$7q06*~~6a)!Cm=XKXSR_Yp0*4MSnPa9FG*NlyBD%JE>^xR=|c zRHpnV)CcQzAio?Y=%L(Z>c0adLHV$T@sXOT3Ayc)6ry9@Vo?xY1TvZ?t|bLUhshyI zO;8P|ZbCkrlJ$SA6_Md-otplsEVokwwrF)a+*EYxSS|&N5h<0^>06MltAwwzwCs(7 zRE9T7n!+Mj>Qg+870`T2U!y^UQ9Vdc@VTB_e9?s~DyJ}3kzt=L)hh-zwEg}v5AWvThCMgl1jSIX5yWp2_Kb=R5?<%B zvP32bZVlR)AdT2H+zF1%6AO<)m@#UP4=n<-%)tf@=+4(rg zp1+!kaG24v*~5j|wrN+`ZoS3o{n-U(;yh>Js#Y4WRCFAuAsD*1eMb6L1R@DJx&T@$ zM-e|i&oZTj+T!Xj2)>~l4HI0ntuS2LUg{&zfF;~aYur=ZF|d>LZG~H~f;Y3l>u_Nr zl%qk~BwP4knHr{Os<=1op8X05J~$>hq8z+GmH|`K`*kJ&d^X!}y`(2?5!k3kI54&` zHAMurL<@d5*q?Wacnhfa$!deIA~@tq`X6AhOs!6$CFM^-OM>$wZV)j&$Pwz?S$z8a zny>|Jm^~NEcFY5^0+tQSk59Dx`_kDq6z#oPQ6!r^y|haS>u+kECWaAteMynpkbj;q zb^%m3$v{Mw_il%6;C@kL6>=cmhgOkQPj5Vi&Jm|_m_*2{LR-@@v?^l8+dROVugtD! zuiA$l!*afU_w4+94b5EPFsrpk4pons0SJeC_?ZF7N56hd<28MU1QUv)985()T=GOF z2+I{w7m&J>+5Jc1v-&ld zuy-q1X&Lm+JBpJ1=k93b<09P8so(s%LAafL{FC-Rp7AuQxyt3*m8LnJ4KkyLSd*~H z)A}H2lCY04vC!d-dy{)-Hdm2FE0vb|YL+MpubiVVu3cyqMw{j^TfY9e#=J+CBPa0X zi9IWIUz;K;>Dq!x7lwRQbI2g`%8Wai>a%|+uODZ}w{S~R(NbAff1}ESO)yMoiS9Dx<+UNCQshi5*lv#u6gOTjPaQ*=G5q8?g zeh|-bk~+csv>g2u;tXGJ8pqIK$x7W*1`V z{t=#p;`L#137U{Bt>7e@kw){jXjA!Ts#G+1ezwW_5Z<~N@?pNwtc4B)U8IKdWx-QZ zjfP#f09Zl#CnJKJ7+TW1t+c(+MU zXb)Wn24FujkCd#o6lVe2mVdojtzWFCD(laZYagol>ndkYcC~-rOO0V=&r<@i&ev8Z z)K|%X1Sk2Xm=WygSD8mQ>lgX$Ju+f1XqWHn_eu?f~=Y;^zRm& zXw!__^nlp;1dqM=&w%m@CLYo&~MToc*fjo>;u@IWt~7=cmI( zB65z;@ht`jg&pqVDIL1(uY`?G8@?i;FhS(u; zI9cQ*qB_+(gx3gT!>jQSy?%lLfOO^IaO}y3&4NLI_yr!kIAQ~YJzFA+d%wjxKQ#Xd zvPZKo@q}%Xs|aZ;7O4DkAmP!mJ(5>Ax}n0+n^wq^jtbE%qQd2W4%q%VZl+I%2J$I6 z)HwH*PuB7Ox=?jY%EHw2Gu}-Pps*Da&p4Efp$9aZVh04uvV#&1ST?Lem012GC}gJ; zov4NKsUoBp{Xg>dcYH;&Tw2nM$BuE{l`Q`nddjaA&aHoMH0V3rpuZ^_snwtz(V$Oy zH-)@G-VPyMp!t4RS>NpjrTZrE36?f`Z}FwWfSbT^Z#$(0jP584#iIo6$2qt2lSR70 za$a>K0|w5$wr*T2o?E_H^nq<|#?kD!cl5o4+- z6BUwg9pIv6h-N|MOS4Umhr7JUuCwJ3>&|aZbd;(#YR$z7BHnRDw-+l<9%>$u_I8sR ztsJ<*bkcy5$)p%#7#W*LO<`~@sNbxvI;1DJ>+&nQyiB9RW7F!tCyE}>Q>l}cpH*r_ zDLHVg&ag{LxXA=6=MeT4`vEEdZ6A4g z=4+J~^}RJ+zFkiQW%#H|%+1#qFE^dU{w8T%#41sM*Mcdg!o+b6p~&+bncCb{eqVtJ zCy$81!}2g^Mz!p{H0+%|6=kpR(_P#jW?fLk3f(f|I;ArEoDFeNAc zre%XYA&3fXG7p5o;vU36@#!AcdcJ%JLkiAEKVUkkA;rgFGzd-`Qqtno>ob*0 zU`JDf`3jblBJX|3vFS5RB~z*Oyv8CFz#*=@kqpJ&LxNgmUI_M*Qg7;VUbjH7eLpOt z?bj;L^YKPt8j@5RT-=ft0=IaGzsjP3^Qg}i>&ur_g+inz`NgGMrnmYZI}s6rH#>K^ zc}=+J$Qf`-hZgs<0)(x;3-1Guwt_u#wB7o%*wtK_($A4tf4YS}jKMjE{DBo?jUZ^ zUAW1V!g8uc&|-k?>jbt?$hSabj#klaPGH}lX2RsHzorC+^7UfqHqTr6GPEEn!AO5Z z8{t2-ut>A?hq*B{9jFdspqCIr>hpvcOsEL#ap(aOHJYV<^hN%ChY64p~rfgyb_ zp*A(UON;`Ri7|)acL;_9YGy&Cc&HU#q%pR%gD6^{ONo)Nk0(OC<)~H%iUBtfjHMeS z34jnQfKhgm)iph}2m@$P<<}sROm)y8^%L#ga_6yuNjudUsFp%87bP|yaFNy#E4L(I zEDJ6W{IZJoce@^45n>vPCB8OdvFdHZ;@sQfwT0aUd<%Q?^JKqgBlh0HZste~5v5?y zx=ln{pnrxaJ274#s6_CHk^Q8e zO6FLzjQIM`stCXCiS8WhbJ+i|U@znetk+I`hWju%A_n9Ale8-s00>$S4-q>80OR!| zib|WBK2QMC>$?@T2KrA;k7x_5bq)%nJk~b?2LYyeyu#G7 z&&zkmQ}wQ=qy_1cK4imW(3uud6${=LXPKRX`JG#S+S(wG)3WSwnPnl~xC!+tW$Y_h*5^WMDo z<~>z2{3{-kxJ~M{jqJ2Td{%^=T_g{mHjvy?&}C3mJXetJgJk4c9FAuy{qMdB!=jC%}tO8 zf?e=H_-B(Y9YpZNi#)FhVTXg&3zTg+w;RWG#p;Z{&=N^nwWD5Xm+En&+t`ctq7`e} zRG5+eg_V*mZ7oaUQgS-9(8W@y>sv6tzYN-5k5-Lx4j xSxRa)WwW6Znl@&!C8jB7Y|-s%{{=fwbYuVk literal 0 HcmV?d00001 diff --git a/arghmm/deps/compbio/birthdeath.py b/arghmm/deps/compbio/birthdeath.py new file mode 100644 index 00000000..4f224db1 --- /dev/null +++ b/arghmm/deps/compbio/birthdeath.py @@ -0,0 +1,425 @@ +""" + Birth death process and reconstructed process for trees + +""" + +from math import * +import random +from rasmus import util, stats, treelib + + +def prob_birth_death1(ngenes, t, birth, death): + """ + Returns the probability that one lineage leaves 'ngenes' genes + after time 't' + """ + + # special cases + if birth == death: + if birth == 0.0: + if ngenes == 1: + return 1.0 + else: + return 0.0 + ut = t / (1.0 / birth + t) + if ngenes == 0: + return ut + else: + return ((1.0 - ut)**2) * (ut**(ngenes-1)) + + l = birth + u = death + r = l - u + a = u / l + + ut = (1.0 - exp(-r*t)) / (1.0 - a * exp(-r*t)) + p0 = a*ut + + if ngenes == 0: + return p0 + + return (1.0 - p0)*(1.0 - ut) * (ut**(ngenes-1)) + + +def prob_birth_death(genes1, genes2, t, birth, death): + """Probability of 'genes1' genes at time 0 give rise to 'genes2' genes at + time 't' with 'birth' and 'death' rates. + """ + + # special cases + if birth == 0.0 and death == 0.0: + if genes1 == genes2: + return 1.0 + else: + return 0.0 + + + l = birth + u = death + elut = exp((l-u)*t) + a = u * (elut - 1.0) / (l*elut - u) # alpha + b = l * (elut - 1.0) / (l*elut - u) # beta + n = genes1 + i = genes2 + + if genes1 < 1: + return 0.0 + + if genes2 == 0: + return a ** n + else: + return sum(stats.choose(n,j) * stats.choose(n+i-j-1, n-1) *\ + a**(n-j) * b**(i-j) * (1.0 - a - b)**j + for j in xrange(min(n, i)+1)) + + +def birth_wait_time(t, n, T, birth, death): + """Probability density for for next birth at time 't' given + 'n' lineages starting at time 0, evolving until time 'T' with a + 'birth' and 'death' rates for a reconstructed process. + """ + + # special case + if birth == death: + t2 = t - T + nl = 1.0 / birth + return birth * n * (-nl + t2)**n / (-nl - T)**n / (1.0 - birth * t2) + + l = birth + u = death + r = l - u + a = u / l + + return n * r * exp(-n*r*t) * \ + ((1.0 - a * exp(-r * (T - t)))**(n-1)) / \ + ((1.0 - a * exp(-r * T))**n) + + +def prob_no_birth(n, T, birth, death): + """Probability of no birth from 'n' lineages starting at time 0, + evolving until time 'T' with 'birth' and 'death' rates + for a reconstructed process. + """ + + # special cases + if birth == 0.0: + return 1.0 + elif birth == death: + return 1.0 / (1.0 + birth * T)**n + + l = birth + u = death + r = l - u + a = u / l + + return (1.0 - (l*(1.0 - exp(-r * T))) / \ + (l - u * exp(-r * T))) ** n + + + +def num_topology_histories(node, leaves=None): + """ + Returns the number of labeled histories for a topology + + The topology is specified by a root 'node' and a set of leaves 'leaves'. + If leaves are not specified, the leaves of 'node' will be used. + """ + + # TODO: can simplify + + if leaves is None: + leaves = node.leaves() + leaves = set(leaves) + + prod = [1.0] + + def walk(node): + if node in leaves: + return 0 + else: + internals = map(walk, node.children) + prod[0] *= stats.choose(sum(internals), internals[0]) + return 1 + sum(internals) + walk(node) + + return prod[0] + + + +#============================================================================= +# sampling + + +def sample_birth_death_count(n, t, birth, death): + """ + Sample the gene count at time 't' with birth death rates + 'birth' and 'death' with starting count 'n' + """ + + t2 = 0.0 + bd = float(birth + death) + + while t2 < t and n > 0: + rate = bd * n + next_t = random.expovariate(rate) + + if t2 + next_t < t: + if random.random() < birth / bd: + # birth + n += 1 + else: + # death + n -= 1 + + t2 += next_t + + return n + + +def sample_birth_wait_time(n, T, birth, death): + """ + Sample the next birth event from a reconstructed birthdeath process. + Let there be 'n' lineages at time 0 that evolve until time 'T' with + 'birth' and 'death' rates. + + Conditioned that a birth will occur + """ + + # TODO: could make this much more efficient + + # uses rejection sampling + start_y = birth_wait_time(0, n, T, birth, death) + end_y = birth_wait_time(T, n, T, birth, death) + M = max(start_y, end_y) + + while True: + t = random.uniform(0, T) + f = birth_wait_time(t, n, T, birth, death) + + if random.uniform(0, 1) <= f / M: + return t + + +def sample_birth_death_tree(T, birth, death, tree=None, node=None, + keepdoom=False): + """Simulate a reconstructed birth death tree""" + + # create tree if one is not given + if tree is None: + tree = treelib.Tree() + + # create starting node if one is not given + if node is None: + tree.make_root() + node = tree.root + else: + node = tree.add_child(node, tree.new_node()) + + bd_rate = float(birth + death) + doom = set() + + def walk(T, node): + if bd_rate == 0.0: + next_t = util.INF + else: + next_t = random.expovariate(bd_rate) + + if next_t > T: + # finish branch + node.dist = T + + elif random.random() < birth / bd_rate: + # birth + node.dist = next_t + + node2 = tree.add_child(node, tree.new_node()) + walk(T - next_t, node2) + + node2 = tree.add_child(node, tree.new_node()) + walk(T - next_t, node2) + + else: + # death + node.dist = next_t + doom.add(node) + walk(T, node) + + if not keepdoom: + leaves = set(tree.leaves()) - doom + treelib.subtree_by_leaves(tree, leaves) + + if len(leaves) == 0: + doom.add(tree.root) + + return tree, doom + + +def sample_birth_death_gene_tree(stree, birth, death, + genename=lambda sp, x: sp + "_" + str(x), + removeloss=True): + """Simulate a gene tree within a species tree with birth and death rates""" + + # initialize gene tree + tree = treelib.Tree() + tree.make_root() + recon = {tree.root: stree.root} + events = {tree.root: "spec"} + losses = set() + + def walk(snode, node): + if snode.is_leaf(): + tree.rename(node.name, genename(snode.name, node.name)) + events[node] = "gene" + else: + for child in snode: + # determine if loss will occur + tree2, doom = sample_birth_death_tree( + child.dist, birth, death, + tree=tree, node=node, keepdoom=True) + + # record reconciliation + next_nodes = [] + def walk2(node): + node.recurse(walk2) + recon[node] = child + if node in doom: + losses.add(node) + events[node] = "gene" + elif node.is_leaf(): + events[node] = "spec" + next_nodes.append(node) + else: + events[node] = "dup" + walk2(node.children[-1]) + + # recurse + for leaf in next_nodes: + walk(child, leaf) + + # if no child for node then it is a loss + if node.is_leaf(): + losses.add(node) + walk(stree.root, tree.root) + + + # remove lost nodes + if removeloss: + treelib.remove_exposed_internal_nodes(tree, + set(tree.leaves()) - losses) + treelib.remove_single_children(tree, simplify_root=False) + + delnodes = set() + for node in recon: + if node.name not in tree.nodes: + delnodes.add(node) + for node in delnodes: + del recon[node] + del events[node] + + if len(tree.nodes) <= 1: + tree.nodes = {tree.root.name : tree.root} + recon = {tree.root: stree.root} + events = {tree.root: "spec"} + + return tree, recon, events + + + + + + +#============================================================================= +# testing functions +# These are functions that are not efficient but implement the distributions +# in a more literal way, thus they are handy to test against. + + + +def sample_birth1_literal(T, birth, death): + """ + Sample the next birth from a reconstructed birth death process + starting with only 1 lineage. + + This function does not condition on the survival of the lineage. + + T -- stopping time + birth -- rate of birth + death -- rate of death + + Returns (t, alive) + t is a float of the first birth or None if none occurs. + alive is True if the lineage is still alive, False if extinct. + + NOTE: This function uses a very literal way of performing the sampling. + It is only good for testing purposes. + """ + + # sample events + t1 = random.expovariate(birth) + t2 = random.expovariate(death) + + # both events occur after stopping time T, simulation is done + if t1 >= T and t2 >= T: + return None, True + + if t2 < t1: + # death occurs + return None, False + else: + # birth occurs + + # recurse + t3, alive3 = sample_birth1_literal(T - t1, birth, death) + t4, alive4 = sample_birth1_literal(T - t1, birth, death) + + if alive3 and alive4: + # if both lineages are alive then our birth is in the + # reconstructed process + return t1, True + elif alive3: + # lineage 3 contains the first birth of the recon proc + if t3 is not None: + return t1 + t3, True + else: + return None, True + elif alive4: + # lineage 4 contains the first birth of the recon proc + if t4 is not None: + return t1 + t4, True + else: + return None, True + else: + # both lineages died, so we are dead + return None, False + + + + +def sample_birth_literal(n, T, birth, death): + """ + Sample the next birth from a reconstructed birth death process + + n -- number of current lineages + T -- stopping time + birth -- rate of birth + death -- rate of death + """ + + tmin = util.INF + + for i in xrange(n): + # require each lineage to be alive + while True: + t, alive = sample_birth1_literal(T, birth, death) + if alive: + break + + if t is not None: + tmin = min(tmin, t) + + if tmin < T: + return tmin + else: + return None + + diff --git a/arghmm/deps/compbio/birthdeath.pyc b/arghmm/deps/compbio/birthdeath.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe7eb2272bd9ac18580ec541d8fb6bc3aa023a4f GIT binary patch literal 10518 zcmcgyU2h!8b*<_lXE;Mrl*FMp(#lE85tg$Xt-f}#aUAX1Udiih;Ly8G!Lk(#ljd}j zYHFr?INd{$i1?(%iVEN$0rHZUyaf3L4)OzHz(C+9C(p@iUIPaR{16~H=hS?WtKB#N z5lUTMU0+>w>)dm1-JW0kaOGIJ)XC{s1joH8{I6jjZq);wG= zQB8%5CYo2~jA~0JUNX^w`Ltvno;A@TtKxGeI%7V?49=TqNsX6HbXJ)jt9c@Y+QL3y!LC)sSXx`wNa>n^VRS4f_D=2^+?cqZF677vgiA1>hm zqV@DJ3z1>!7Ihn`MUCW)#w*CJt{l!7ww)QZeKy0kC4+kdqF%<`jn!DLyZ)XzoaJi| z%jOv}Wz#sU@P>z#L+nP$6qs4X%wzoebBkdx+T4l4dP8rX<9q6d)f0254(CvHoiUaA zv3WEph5V0@)O`E}aWUwpxnDevDe%p3GwCG70iMIc&(hfMBxxLO;~s~5aqh3B+i@D_ zYrbw8BHSus>`>B;{k39EMcWu&e+jo~`1J4JNLZG5NZcNB^Wyzplbcg;UT$#hZK2KF zBvOI#AkI8mhW?vx!DVN&W$HH@azYV>8*l5%$}UlfC!`*0~e6iyQ^p zkGtK`EsA^jjds@UZ6+Do{2={6}MD=YaWI9RV1deX!&o}o3rP2zwDK5 z+16~$yM~K3Z9;R7ZE~B#vk6Px?Qt*KG5hSNYjvuk?RTMQMV;ZRC4Kk=xXle^4 zfLPFqjh5@!d{i}1)Ih;<{rh&eY#wmCMQq2lXT8ZO1x%zY?Z;V`$0`)?uiumw>@>-D z!6KV+9h*uMrD+od=9IJ|uVSn_+_L>%|NZZ9V9u@Tt-Bd;8bAVy2VG5gzaK)uRn$$= z<{axOVYlm(?CpT10Z|gLQ9ynOsBU8VxHBlUj>$rUI$NwTSw?b73oOoJtdnYgjpKh6 zi78)lI$xr$*X+mB{e?BQnr=C@(epu51l7w*iF)6i~B5%OIps zYv-ZBgwwKnKtn#PxTdIsC4hE<4`4*V2e1gp%waef1wE`B)`+xcRZ}NbSd54)P)x2M z`kvLqAyrfWlYpWs_!<(30D7#+xO#cTK{WpdENNPeqd3iJKxnmj0Ud=eRUGRxtb{Z= z9VzZwx;C;ixexo@FOqb7-2CP>KiB3K=JV`P6y?^(G&RHHEp{}d0{ZfyUweIWpxL$o4`0|2lHI9UZSu;28O=<u6?9zB4U zuxj!n(9*)z!3DUu(UfogPk4nyJ?(dASL{U0ZJk@XY!n{Gdfte17N z?Sq>7-iNO`D)AFAMlWtBTM7DY9&q!rtN>iHD0ap_^z#_LEJh!?uV&tB<4Ev2S~#a7E}TDf6(uUxoHTVkYS@58*XwTXOuj;K-YTlT7sa|*4=5LT-J3*u5NHH$GW7z-w&uGK3+w{EmrF+=z zof(ro!2=w57;Sp|mIo-XD|^Z8!{*aRV7IrSqNtb3a&Vj~S$xKgRx-bHXw2@PkDk7U zC!K2X+6wAs%2*XPR?)+|J6&)pnk? zI=_l?)qHH|-Ebgywco~D^*04-%1@`jtxgseLMD94DC-uq&+tF9y|ACaxtCMv9KjPl zBP_oJ&alcjA%)>vJ6yNaT#EvzH4fF9Txqdulu8`#iFMr-g{6ZiO4}_|}o25}wBw(4y zg+Icf{~^N9+U-GK?IyTf7uyCtw}ueyhWo2BhP+J( z2Ob2#PMUlP+MLtCh&tdu!VmI1Ud)&+ouEFOaz5}@3_%c7JIsSreU5wZ)@44+{AkNH&u0paWj`tQHi?McO1R`F{4+8~Qc1pd5mUizLGhkR;fOh(0QF>ji36HS+kl2nXIl{Ck6lBF z9&YOv!xJVp2a@=;8>ZOP$?%voSVWGCm3zX4u8%3* z(KC@PefenpL#at{oh6h<1jkRlNEk&ec`ou{()ejhZVKgvDv3Oh8eK(6aEtv3NH)6d zd>{>%!9*}ap*x3w(3Qq*MlEW86VIomQF_C_aJ)+#6`sr@xO@@+R^bse>^WR3Gu4r2 zLw`LWZUQcA$Q-fr#k%2>xJ6G^#mofnvE})R&;`&`w~9uVOF2b}oaJq)tQ{*R08ry8!6Om{mGj0_5sOQ^;e| za#^;=_Xg(+bAY@jvfURdP7@lE@tscC-HgKbJxn3T+l->Zl@SyVFb;^>7U_yeP77;z8;dQCGU@-ybeta(v3FDhmW-x%BxZPpygryP$* zr3YnZf9a?P^&ngKen-cSG}(;v11*cyv99OVTK8tUvA4I~9|i z!z*S@G9p>K`&01gPtAjUpH9lXM|E~0-IvXKN8l0Mc7J6a=;#R!);M55esO^V@=ZgT zuyiC7yzE^(w`!COwtrmlDq)Glu|S+@AsG5E#C!g2BvU*tf0tmCi<_i==+1{YnvP3H z;Vpc@nsR=nxICH)og^Xpsv`UUK!($Uk%N1F6Kp$2vIk6IH?aWLiLq%$^`D=hjvzN{ z?xa64cK1Jc)x!kI(^2fhmI-#1Q(FgvO8*Xz_We^dlHkkI2{tK5%9i8W6Ro z{d7V6d9#HGjw1Lk@h&%;GQSjFq}khl&F$;5ImGL*saS!t6uW)_bAZ#;4 zIxz&4G&Ir0{nsTIiq%mI2`KmoNt=4ftAtBp8X)7u1Z&1gKs0*^DAOWtx^0%u1@Z3JAL$Fvm62NPVBrQ z?1CWjA)+;L@Q4FawnCA_AN)QZC%}V|g99WBF7bx!LGZb;J3ym0A&NV0jH~R9%Q%;f z?VZ=~r5kff@-Hsqb+2#=2$2?7V!1cBIlb}%FuRPHOVzu2WqGDzt9I2cdsWNbS+*;> zS8#&4U@zji?zOoiRb0g9U0nHP4CYKOC=7%Ef*U@9Iq(QR7FX=dnyeus3F6=bc>|vU z-6?>^6YX|D=)-axO8DIt8-wb0DqB*awtLR$}$n88{hcguhME~)N zQ1K>8bmG~mnrunccwQYfnM^Ca6e`AzxYId z{C6H2VjTHV94JdW2mY(XC?1s$`g_S< z*kOE^@u}g@6|OUEH5`dq|E;(1VJYkN_z{VpOf(M{utF6N0vtgFs&a;Ws!9cJ#8K?J zSzPnO4n9r4p)Lhu(V?#p)o40fNk1Qb{=%09hA{nvpEht;L}3`?&@k-J&;eUatwkYj zeD>jx#1W1KtC$QH!Vu7Y1frd9b? zYd8-V4(~EJpb;GLAw(9{byjx`6_~CHEfvP_f>dROYM+ zR~n=tR)BsFiJ`2xF1%&@@Z;m%--L&Z>j8Y>q4F-H7hDx|RLp{5+Y4xP0UnqJBLkr; z9^4O?CMx_hOiM`Jn8b6ZLUaEo71lWENVO(kaNrivHjVT!SG)V8cBi3&9BW;cjr(K* z{-iA6DbhJ0+Q2vX$oQ(WUa$BfCkLbe`rS_l=wHc#N)b;TRz)4-Piw(Q3I2>wQ?Q^` z^%m)1AkfyZ75!>xRlg-@a`y5uTdgQ-w_2Kd7r&izAx2KUALiXb?&LwXRzM-(m*Rk? zH1L^RVIs-=Hghze!Pl8kPn{npwFQ7oATj31O=O|mX<7I=41K4YMT1WKJ}t34t+H3M e3-)rUdZp@DFH|oqT&Z5FHp+J4U0mO*-uWLLrLZIb literal 0 HcmV?d00001 diff --git a/arghmm/deps/compbio/coal.py b/arghmm/deps/compbio/coal.py new file mode 100644 index 00000000..abe25a43 --- /dev/null +++ b/arghmm/deps/compbio/coal.py @@ -0,0 +1,2254 @@ +""" + + Coalescent methods + + +A note about population size. In this code all population sizes N or n are +uncorrected. If you need to compute a coalescent for a diploid species +you must multiply N by 2 before passing it to any of these functions. + +""" + +#============================================================================= +# imports + +from __future__ import division + +# python imports +import itertools +from itertools import chain, izip +from math import * +import random +from collections import defaultdict + +# rasmus imports +from rasmus import treelib, stats, util, linked_list + +try: + from rasmus.symbolic import * +except ImportError: + # only experimental functions need symbolic + pass + +# compbio imports +from . import birthdeath + +# import root finder +try: + from scipy.optimize import brentq +except ImportError: + def brentq(f, a, b, disp=False): + return stats.bisect_root(f, a, b) + + + + +#============================================================================= +# single coalescent PDFs, CDFs, and sampling functions + + +def prob_coal(t, k, n): + """ + Returns the probability density of observing the first coalesce of 'k' + individuals in a population size of 'n' at generation 't' + """ + + # k choose 2 + k2 = k * (k-1) / 2 + k2n = k2 / n + return k2n * exp(- k2n * t) + + +def sample_coal(k, n): + """ + Returns a sample coalescent time for 'k' individuals in a population 'n' + """ + + # k choose 2 + k2 = k * (k-1) / 2 + k2n = k2 / n + return random.expovariate(k2n) + + +def sample_coal_times(k, n): + """ + Returns a sampling of (k-1) coalescences for 'k' lineages in a + population of size 'n'. + """ + times = [0] + for j in xrange(k, 1, -1): + times.append(times[-1] + sample_coal(j, n)) + return times[1:] + + +def prob_coal_counts(a, b, t, n): + """ + The probabiluty of going from 'a' lineages to 'b' lineages in time 't' + with population size 'n' + """ + + C = stats.prod((b+y)*(a-y)/(a+y) for y in xrange(b)) + s = exp(-b*(b-1)*t/2.0/n) * C + for k in xrange(b+1, a+1): + k1 = k - 1 + C = (b+k1)*(a-k1)/(a+k1)/(b-k) * C + s += exp(-k*k1*t/2.0/n) * (2*k-1) / (k1+b) * C + + return s / stats.factorial(b) + + +def prob_coal_counts_slow(a, b, t, n): + """ + The probability of going from 'a' lineages to 'b' lineages in time 't' + with population size 'n' + + Implemented more directly, but slower. Good for testing against. + """ + + s = 0.0 + for k in xrange(b, a+1): + i = exp(-k*(k-1)*t/2.0/n) * \ + (2*k-1)*(-1)**(k-b) / stats.factorial(b) / \ + stats.factorial(k-b) / (k+b-1) * \ + stats.prod((b+y)*(a-y)/(a+y) for y in xrange(k)) + s += i + return s + + +def prob_coal_cond_counts(x, a, b, t, n): + """ + Returns the probability density of a coalescent happening at time 'x' + between 'a' lineages conditioned on there being 'b' lineages at time + 't'. The population size is 'n'. + """ + + lama = -a*(a-1)/2.0/n + C = stats.prod((b+y)*(a-1-y)/(a-1+y) for y in xrange(b)) + s = exp(-b*(b-1)/2.0/n*(t-x) + lama*x) * C + for k in xrange(b+1, a): + k1 = k - 1 + lam = -k*k1/2.0/n + C = (b+k1)*(a-1-k1)/(a-1+k1)/(b-k) * C + s += exp(lam*t + (lama-lam)*x) * (2*k-1) / (k1+b) * C + + return s / stats.factorial(b) * (-lama) / prob_coal_counts(a, b, t, n) + + +def prob_coal_cond_counts_simple(x, a, b, t, n): + """ + Returns the probability density of a coalescent happening at time 'x' + between 'a' lineages conditioned on there being 'b' lineages at time + 't'. The population size is 'n'. + """ + + return (prob_coal_counts(a-1, b, t-x, n) * prob_coal(x, a, n) / + prob_coal_counts(a, b, t, n)) + + +def cdf_coal_cond_counts(x, a, b, t, n): + """ + Returns the probability a coalescent happening *before* time 'x' + between 'a' lineages conditioned on there being 'b' lineages at time + 't'. The population size is 'n'. + """ + + lama = -a*(a-1)/2.0/n + C = stats.prod((b+y)*(a-1-y)/(a-1+y) for y in xrange(b)) + c = -b*(b-1)/2.0/n + s = exp(c*t) * (exp((lama-c)*x)-1.0) / (lama-c) * C + for k in xrange(b+1, a): + k1 = k - 1 + lam = -k*k1/2.0/n + C = (b+k1)*(a-1-k1)/(a-1+k1)/(b-k) * C + s += exp(lam*t) * (exp((lama-lam)*x) - 1.0) / (lama - lam) \ + * (2*k-1) / (k1+b) * C + + return s / stats.factorial(b) * (-lama) / prob_coal_counts(a, b, t, n) + + +def sample_coal_cond_counts(a, b, t, n): + """ + Samples the next coalescent between 'a' lineages in a population size of + 'n', conditioned on there being 'b' lineages at time 't'. + """ + + # this code solves this equation for t + # cdf(t) - p = 0 + # where p ~ U(0, 1) + + p = random.random() + + # compute constants + lama = -a*(a-1)/2.0/n + C0 = stats.prod((b+y)*(a-1-y)/(a-1+y) for y in xrange(b)) + c = -b*(b-1)/2.0/n + d = 1.0/stats.factorial(b) * (-lama) / prob_coal_counts(a, b, t, n) + + + # CDF(t) - p + def f(x): + if x <= 0: + return x - p + if x >= t: + return 1.0 - p + (x - t) + + C = C0 + s = exp(c*t) * (exp((lama-c)*x)-1.0) / (lama-c) * C + for k in xrange(b+1, a): + k1 = k - 1 + lam = -k*k1/2.0/n + C = (b+k1)*(a-1-k1)/(a-1+k1)/(b-k) * C + s += exp(lam*t) * (exp((lama-lam)*x) - 1.0) / (lama - lam) \ + * (2*k-1) / (k1+b) * C + + return s * d - p + + return brentq(f, 0.0, t, disp=False) + + + +def prob_mrca(t, k, n): + """ + Probability density function of the age 't' of the most recent + common ancestor (MRCA) of 'k' lineages in a population size 'n' + """ + + s = 0.0 + for i in xrange(1, k): + lam = (i+1) * i / 2.0 / n + s += lam * exp(- lam * t) * mrca_const(i, 1, k-1) + return s + + +def cdf_mrca(t, k, n): + """ + Cumulative probability density of the age 't' of the most recent common + ancestor (MRCA) of 'k' lineages in a population size 'n' + """ + + if k == 1: + return 1.0 + + s = 0.0 + for i in xrange(1, k+1): + lam = i * (i-1) / (2.0 * n) + p = 1.0 + for y in xrange(1, i): + p *= (y-k) / (k+y) + s += exp(-lam * t) * (2*i - 1) * p + return s + + +def mrca_const(i, a, b): + """A constant used in calculating MRCA""" + + # i+1 choose 2 + y = (i+1) * i / 2.0 + prod = 1.0 + + for j in xrange(a, b+1): + if j == i: + continue + # j+1 choose 2 + x = (j+1) * j / 2.0 + prod *= x / (x - y) + return prod + + + +def prob_bounded_coal(t, k, n, T): + """ + Probability density function of seeing a coalescence at 't' from + 'k' lineages in a population of size 'n' with bounding time 'T' + """ + + if t > T: + return 0.0 + + if k == 2: + prob_coal(t, k, n) + return prob_coal(t, k, n) * cdf_mrca(T-t, k-1, n) / \ + cdf_mrca(T, k, n) + + +def cdf_bounded_coal(t, k, n, T): + """ + Cumalative density function of seeing a coalescence at 't' from + 'k' lineages in a population of size 'n' with bounding time 'T' + """ + i = k - 1 + + lam_i = (i+1)*i/2.0 / n + C = [mrca_const(j, 1, i-1) for j in xrange(1, i)] + A = lam_i / n / cdf_mrca(T, k, n) + B = sum(C) / lam_i + F = [C[j-1] * exp(-(j+1)*j/2.0/n * T) / ((j+1)*j/2.0/n - lam_i) + for j in xrange(1, i)] + + return (lam_i / cdf_mrca(T, k, n) * + (B * (1-exp(-lam_i * t)) + - sum(F[j-1] * (exp(((j+1)*j/2.0/n - lam_i)*t)-1) + for j in xrange(1, i)))) + + +def sample_bounded_coal(k, n, T): + """ + Sample a coalescent time 't' for 'k' lineages and population 'n' + on the condition that the MRCA is before 'T' + """ + + # special case + if k == 2: + return sample_bounded_coal2(n, T) + + # this code solves this equation for t + # cdf(t) - p = 0 + # where p ~ U(0, 1) + + i = k - 1 + p = random.random() + + # compute constants + lam_i = (i+1)*i/2.0 / n + C = [mrca_const(j, 1, i-1) for j in xrange(1, i)] + A = lam_i / cdf_mrca(T, k, n) + B = sum(C) / lam_i + F = [C[j-1] * exp(-(j+1)*j/2.0/n * T) / ((j+1)*j/2.0/n - lam_i) + for j in xrange(1, i)] + + # CDF(t) - p + def f(t): + if t <= 0: + return t - p + if t >= T: + return 1.0 - p + (t - T) + + return (A * (B * (1-exp(-lam_i * t)) + - sum(F[j-1] * (exp(((j+1)*j/2.0/n - lam_i)*t)-1) + for j in xrange(1, i)))) - p + + return brentq(f, 0.0, T, disp=False) + + +def sample_bounded_coal2(n, T): + """ + Sample a coalescent time 't' for 'k=2' lineages and population 'n' + on the condition that the MRCA is before 'T' + """ + + # sample from a truncated expontial distribution + + # k choose 2 + lam = 1 / n + p = exp(-lam * T) + return - log(random.uniform(p, 1.0)) / lam + + +def sample_bounded_coal_reject(k, n, T): + """ + Sample a coalescent time 't' for 'k' lineages and population 'n' + on the condition that the MRCA is before 'T' + + Uses rejection sampling. It works but is very inefficient. + """ + + i = k - 1 + consts = [mrca_const(j, 1, i-1) for j in xrange(1, i)] + x = sum(consts) + + while True: + while True: + t = sample_coal(k, n) + if t < T: + break + + if i == 1: + return t + + y = sum(mrca_const(j, 1, i-1) * exp(-((j+1) * j / 2.0 / n) * (T - t)) + for j in xrange(1, i)) + + r = 1 - y / x + + if random.random() < r: + return t + + +def count_lineages_per_branch(tree, recon, stree): + """ + Returns the count of gene lineages present at each node in the species + tree 'tree' given a gene tree 'tree' and reconciliation 'recon' + """ + + # init lineage counts + lineages = {} + for snode in stree: + lineages[snode] = [0, 0] + + for node in tree.postorder(): + snode = recon[node] + if node.is_leaf(): + lineages[snode][0] += 1 # leaf lineage + else: + lineages[snode][1] -= 1 # coal + + for snode in stree.postorder(): + if not snode.is_leaf(): + lineages[snode][0] = sum(lineages[x][1] for x in snode.children) + lineages[snode][1] += lineages[snode][0] + + return lineages + + +def get_topology_stats(tree, recon, stree): + """ + The function computes terms necessary for many topology calculations + """ + + # How many gene nodes per species + nodes_per_species = dict.fromkeys(stree, 0) + + # How many descendent nodes recon to the same species + descend_nodes = {} + + # iterate through tree + for node in tree.postorder(): + if len(node.children) > 1: + nodes_per_species[recon[node]] += 1 + if not node.is_leaf(): + descend_nodes[node] = 1 + sum(descend_nodes.get(child, 0) + for child in node.children + if recon[child] == recon[node]) + + return nodes_per_species, descend_nodes + + + +def prob_multicoal_recon_topology(tree, recon, stree, n, + lineages=None, top_stats=None): + """ + Returns the log probability of a reconciled gene tree ('tree', 'recon') + from the coalescent model given a species tree 'stree' and + population sizes 'n' + """ + + popsizes = init_popsizes(stree, n) + if lineages is None: + lineages = count_lineages_per_branch(tree, recon, stree) + if top_stats is None: + top_stats = get_topology_stats(tree, recon, stree) + + # iterate through species tree branches + lnp = 0.0 # log probability + for snode in stree.postorder(): + if snode.parent: + # non root branch + a, b = lineages[snode] + + try: + p = (util.safelog(prob_coal_counts(a, b, snode.dist, + popsizes[snode.name])) + + stats.logfactorial(top_stats[0].get(snode, 0)) + - log(num_labeled_histories(a, b))) + except: + print (a, b, snode.dist, popsizes[snode.name], + prob_coal_counts(a, b, snode.dist, + popsizes[snode.name]), + ) + + raise + + #p = log(prob_coal_counts(a, b, snode.dist, + # popsizes[snode.name]) * + # stats.factorial(top_stats[0].get(snode, 0)) + # / num_labeled_histories(a, b)) + lnp += p + else: + a = lineages[snode][0] + lnp += (stats.logfactorial(top_stats[0].get(snode, 0)) - + log(num_labeled_histories(a, 1))) + + for node, cnt in top_stats[1].iteritems(): + lnp -= log(cnt) + + return lnp + + + +def cdf_mrca_bounded_multicoal(gene_counts, T, stree, n, + sroot=None, sleaves=None, stimes=None, + tree=None, recon=None): + """ + What is the log probability that multispecies coalescent in species + tree 'stree' with population sizes 'n' and extant gene counts 'gene_counts' + will have a MRCA that occurs in branch 'sroot' before time 'T'. + + As a convenience, you can pass None for gene_counts and give a reconciled + gene tree instead ('tree', 'recon'). + """ + + # determine active part of species tree + if sroot is None: + sroot = stree.root + if sleaves is None: + sleaves = set(sroot.leaves()) + + if len(sleaves) == 0: + return 0.0 + + # init gene counts + if gene_counts is None: + if tree is None: + gene_counts = dict.fromkeys([x.name for x in sleaves], 1) + else: + gene_counts = dict.fromkeys([x.name for x in sleaves], 0) + for leaf in tree.leaves(): + gene_counts[recon[leaf].name] += 1 + + popsizes = init_popsizes(stree, n) + + # get time to MRCA above sroot + if stimes is None: + stimes = treelib.get_tree_timestamps(stree, sroot, sleaves) + + # use dynamic programming to calc prob of lineage counts + prob_counts = calc_prob_counts_table(gene_counts, T, stree, popsizes, + sroot, sleaves, stimes) + return util.safelog(prob_counts[sroot][1][1]) + + +def calc_prob_counts_table(gene_counts, T, stree, popsizes, + sroot, sleaves, stimes): + + # use dynamic programming to calc prob of lineage counts + # format: prob_counts[node] = [a, b] + prob_counts = {} + def walk(node): + if node in sleaves: + # leaf case + M = gene_counts[node.name] + + # populate starting lineage counts + start = [0.0] * (M+1) + start[M] = 1.0 + + elif len(node.children) == 2: + # internal node case with 2 children + + c1 = node.children[0] + c2 = node.children[1] + M1 = walk(c1) + M2 = walk(c2) + M = M1 + M2 # max lineage counts in this snode + end1 = prob_counts[c1][1] + end2 = prob_counts[c2][1] + + # populate starting lineage counts + start = [0.0, 0.0] + for k in xrange(2, M+1): + start.append(sum(end1[i] * end2[k-i] + for i in xrange(1, k) + if i <= M1 and k-i <= M2)) + + elif len(node.children) == 1: + # single child case + + c1 = node.children[0] + M1 = walk(c1) + M = M1 # max lineage counts in this snode + end1 = prob_counts[c1][1] + + # populate starting lineage counts with child's ending counts + start = [0.0] + for k in xrange(1, M+1): + start.append(end1[k]) + + else: + # unhandled case + raise Exception("not implemented") + + + + # populate ending lineage counts + n = popsizes[node.name] + ptime = stimes[node.parent] if node.parent else T + if ptime is None: + # unbounded end time, i.e. complete coalescence + end = [0.0, 1.0] + [0.0] * (M-1) + else: + # fixed end time + t = ptime - stimes[node] + + end = [0.0] + for k in xrange(1, M+1): + end.append( + sum(prob_coal_counts(i, k, t, n) * start[i] + for i in xrange(k, M+1))) + + prob_counts[node] = [start, end] + + assert abs(sum(start) - 1.0) < .001, (start, node.children) + + return M + M = walk(sroot) + + return prob_counts + + +def prob_coal_bmc(t, u, utime, ucount, gene_counts, T, stree, n, + sroot=None, sleaves=None, stimes=None, + tree=None, recon=None): + """ + The PDF of the waiting time 't' for the next coalescent event in species + branch 'u' within a bounded multispecies coalescent (BMC) process. + """ + + # NOTE: not implemented efficiently + + if sroot is None: + sroot = stree.root + + # find relevent leaves of stree (u should be treated as a leaf) + if sleaves is None: + sleaves = set() + def walk(node): + if node.is_leaf() or node == u: + sleaves.add(node) + else: + for child in node.children: + walk(child) + walk(sroot) + + + # find timestamps of stree nodes + if stimes is None: + # modify timestamp of u to be that of the previous coal (utime) + stimes = {u : utime} + stimes = treelib.get_tree_timestamps(stree, sroot, sleaves, stimes) + + # init gene counts + if gene_counts is None: + if tree is None: + gene_counts = dict.fromkeys([x.name for x in sleaves], 1) + else: + gene_counts = dict.fromkeys([x.name for x in sleaves], 0) + for leaf in tree.leaves(): + gene_counts[recon[leaf].name] += 1 + + # modify gene counts for species u + gene_counts[u.name] = ucount + + popsizes = init_popsizes(stree, n) + + + p = cdf_mrca_bounded_multicoal(gene_counts, T, stree, popsizes, + sroot=sroot, sleaves=sleaves, + stimes=stimes, tree=tree, recon=recon) + + gene_counts[u.name] = ucount - 1 + stimes[u] = utime + t + + p2 = cdf_mrca_bounded_multicoal(gene_counts, T, stree, popsizes, + sroot=sroot, sleaves=sleaves, + stimes=stimes, tree=tree, recon=recon) + + gene_counts[u.parent.name] = ucount + stimes[u] = stimes[u.parent] + + p3 = cdf_mrca_bounded_multicoal(gene_counts, T, stree, popsizes, + sroot=sroot, sleaves=sleaves, + stimes=stimes, tree=tree, recon=recon) + + p4 = log(prob_coal(t, ucount, popsizes[u.name])) + + p5 = log(prob_coal_counts(ucount, ucount, + stimes[u.parent] - utime, popsizes[u.name])) + + return (p2 + p4) - stats.logsub(p, p3 + p5) + + +def prob_no_coal_bmc(u, utime, ucount, gene_counts, T, stree, n, + sroot=None, sleaves=None, stimes=None, + tree=None, recon=None): + """ + Returns the log probability of no coalescent occurring in branch u + of the species tree during a bounded multispecies coalescent (BMC). + """ + + if sroot is None: + sroot = stree.root + + # find relevent leaves of stree (u should be treated as a leaf) + if sleaves is None: + sleaves = set() + def walk(node): + if node.is_leaf() or node == u: + sleaves.add(node) + else: + for child in node.children: + walk(child) + walk(sroot) + + + # find timestamps of stree nodes + if stimes is None: + # modify timestamp of u to be that of the previous coal (utime) + stimes = {u : utime} + stimes = treelib.get_tree_timestamps(stree, sroot, sleaves, stimes) + + # init gene counts + if gene_counts is None: + if tree is None: + gene_counts = dict.fromkeys([x.name for x in sleaves], 1) + else: + gene_counts = dict.fromkeys([x.name for x in sleaves], 0) + for leaf in tree.leaves(): + gene_counts[recon[leaf].name] += 1 + + # modify gene counts for species u + gene_counts[u.name] = ucount + + popsizes = init_popsizes(stree, n) + + + p = cdf_mrca_bounded_multicoal(gene_counts, T, stree, popsizes, + sroot=sroot, sleaves=sleaves, stimes=stimes, + tree=tree, recon=recon) + + gene_counts[u.parent.name] = ucount + stimes[u] = stimes[u.parent] + + p2 = cdf_mrca_bounded_multicoal(gene_counts, T, stree, popsizes, + sroot=sroot, sleaves=sleaves, + stimes=stimes, tree=tree, recon=recon) + + p3 = log(prob_coal_counts(ucount, ucount, + stimes[u.parent] - utime, popsizes[u.name])) + + return p2 - p + p3 + + + +def num_labeled_histories(nleaves, nroots): + n = 1.0 + for i in xrange(nroots + 1, nleaves + 1): + n *= i * (i - 1) / 2.0 + return n + +def log_num_labeled_histories(nleaves, nroots): + n = 0.0 + for i in xrange(nroots + 1, nleaves + 1): + n += log(i * (i - 1) / 2.0) + return n + + +def prob_bounded_multicoal_recon_topology(tree, recon, stree, n, T, + root=None, leaves=None, + lineages=None, top_stats=None, + stimes=None): + """ + Returns the log probability of a reconciled gene tree ('tree', 'recon') + from the coalescent model given a species tree 'stree' and + population sizes 'n' and stopping time 'T' + """ + + # get input stats + popsizes = init_popsizes(stree, n) + if lineages is None: + lineages = count_lineages_per_branch(tree, recon, stree) + if top_stats is None: + top_stats = get_topology_stats(tree, recon, stree) + if stimes is None: + stimes = treelib.get_tree_timestamps(stree) + + + p = prob_multicoal_recon_topology(tree, recon, stree, popsizes, + lineages=lineages, top_stats=top_stats) + k_root = lineages[stree.root][0] + T_root = T - stimes[stree.root] + return log(cdf_mrca(T_root, k_root, popsizes[recon[tree.root].name])) + p \ + - cdf_mrca_bounded_multicoal(None, T, stree, popsizes, + tree=tree, recon=recon, stimes=stimes) + + + +#============================================================================= +# sampling coalescent trees +# +# - normal kingman coalescent +# - censored coalescent +# - bounded coalescent (conditioned on completion before a fixed time) +# + + +def sample_coal_tree(k, n): + """ + Returns a simulated coalescent tree for 'k' leaves from a population 'n'. + """ + times = [0] + for j in xrange(k, 1, -1): + times.append(times[-1] + sample_coal(j, n)) + return make_tree_from_times(times)[0] + + +def sample_bounded_coal_tree(k, n, T, capped=False): + """ + Returns a simulated coalescent tree for 'k' leaves from a populations 'n' + with fixed maximum time 't'. The simulation is conditioned on returning + a tree that completely coaleces before time 'T'. + + capped -- if True an artificial root to the tree. Used primarily by + other methods. + """ + times = [0] + for j in xrange(k, 1, -1): + times.append(times[-1] + sample_bounded_coal(j, n, T - times[-1])) + return make_tree_from_times(times, t=T, capped=capped)[0] + + +def sample_bounded_coal_tree_reject(k, n, T, capped=False): + """ + Returns a simulated coalescence tree for k leaves from a populations n + with fixed maximum time t. The simulation is conditioned on returning + a tree that completely coaleces before time T. + + This works, but is very inefficient. Use sample_coal_tree_bounded + instead. + """ + + # sample times with rejection sampling + while True: + times = [0] + for j in xrange(k, 1, -1): + times.append(times[-1] + sample_coal(j, n)) + if times[-1] < t: + break + + return make_tree_from_times(times, t=T, capped=capped)[0] + + +def sample_censored_coal_tree(k, n, t, capped=False): + """ + Returns a simulated coalescence tree for 'k' leaves from a population size + 'n' with a fixed maximum time 't'. + + The return value is the tuple (tree, lineages) where lineages is a set + of lineages that have not yet coalesced. + + capped -- if True, remaining lineages are added as children to a artificial + tree root. + """ + + times = [0] + for j in xrange(k, 1, -1): + times.append(times[-1] + sample_coal(j, n)) + if times[-1] > t: + times.pop() + break + + return make_tree_from_times(times, k, t, capped=capped) + + +def sample_coal_cond_counts_tree(a, b, t, n, capped=False): + """ + Returns a simulated coalescence tree for 'a' leaves from a population size + 'n', conditioned on their being 'b' lineages at time 't'. + + The return value is the tuple (tree, lineages) where lineages is a set + of lineages that have not yet coalesced. + + capped -- if True, remaining lineages are added as children to a artificial + tree root. + """ + + times = [0] + for j in xrange(a, b, -1): + times.append(times[-1] + sample_coal_cond_counts(j, b, t-times[-1], n)) + + return make_tree_from_times(times, a, t, capped=capped) + + + +def init_popsizes(stree, n): + """ + Uses 'n' to initialize a population size dict for species tree 'stree' + """ + + if isinstance(n, (int, float)): + return dict.fromkeys(stree.nodes.keys(), n) + elif isinstance(n, dict): + return n + else: + raise Exception("n must be a int or dict.") + + +# TODO: right now this assumes that there are at least 1 or more genes +# in each extant species + +def sample_multicoal_tree(stree, n, leaf_counts=None, + namefunc=None, sroot=None, sleaves=None): + """ + Returns a gene tree from a multi-species coalescence process + + stree -- species tree + n -- population size (int or dict) + If n is a dict it must map from species name to + population size. + leaf_counts -- dict of species names to a starting gene count. + Default is 1 gene per extant species. + namefunc -- a function that generates new gene names given a species + name. + """ + + if sleaves is None: + sleaves = set(stree.leaves()) + if sroot is None: + sroot = stree.root + + # initialize vector for how many genes per extant species + if leaf_counts is None: + leaf_counts = dict((l, 1) for l in stree.leaf_names()) + + # initialize function for generating new gene names + if namefunc is None: + spcounts = dict((l, 1) for l in stree.leaf_names()) + def namefunc(sp): + name = sp + "_" + str(spcounts[sp]) + spcounts[sp] += 1 + return name + + # initialize population sizes + popsizes = init_popsizes(stree, n) + + # init gene counts + counts = dict((n.name, 0) for n in stree) + counts.update(leaf_counts) + + # init reconciliation + recon = {} + + # subtrees + subtrees = {} + + queue = MultiPushQueue(sleaves) + + # loop through species tree + for snode in queue: #stree.postorder(): + # simulate population for one branch + k = counts[snode.name] + + if snode != sroot: + # non basal branch + queue.push(snode.parent, len(snode.parent.children)) + subtree, lineages = sample_censored_coal_tree( + k, popsizes[snode.name], snode.dist, capped=True) + counts[snode.parent.name] += len(lineages) + else: + # basal branch + subtree = sample_coal_tree(k, popsizes[snode.name]) + lineages = [subtree.root] + subtrees[snode] = (subtree, lineages) + + for node in subtree: + recon[node] = snode + + + # stitch subtrees together + tree = treelib.Tree() + + # add all nodes to total tree + for subtree, lineages in subtrees.values(): + tree.merge_names(subtree) + tree.remove(subtree.root) + del recon[subtree.root] + + for snode in subtrees: + if snode not in sleaves: # not snode.is_leaf(): + subtree, lineages = subtrees[snode] + + # get lineages from child subtrees + lineages2 = chain(*[subtrees[child][1] + for child in snode.children]) + + # ensure leaves are randomly attached + leaves = subtree.leaves() + random.shuffle(leaves) + + # stitch leaves of the subtree to children subtree lineages + for leaf, lineage in izip(leaves, lineages2): + tree.add_child(leaf, lineage) + + + # set root + tree.root = subtrees[sroot][0].root + tree.add(tree.root) + recon[tree.root] = sroot + + # name leaves + for node in tree: + if recon[node].is_leaf(): + tree.rename(node.name, namefunc(recon[node].name)) + + #print "HERE" + #treelib.draw_tree_names(tree, maxlen=8) + #print "recon", [(x[0].name, x[1].name) for x in recon.items()] + + + return tree, recon + + +def sample_bounded_multicoal_tree(stree, n, T, leaf_counts=None, namefunc=None, + sroot=None, sleaves=None, stimes=None, + gene_counts=None): + """ + Returns a gene tree from a bounded multi-species coalescence process + + stree -- species tree + n -- population size (int or dict) + If n is a dict it must map from species name to + population size. + T -- deadline for complete coalescence + leaf_counts -- dict of species names to a starting gene count. + Default is 1 gene per extant species. + namefunc -- a function that generates new gene names given a species + name. + sleaves -- you can specify a subtree of the stree by giving the a + list 'sleaves' of leaf nodes of the stree + sroot -- you can specify a subtree of the stree by giving the + subtree root node 'sroot' + """ + + # initialize vector for how many genes per extant species + if sleaves is None: + sleaves = set(stree.leaves()) + if sroot is None: + sroot = stree.root + if leaf_counts is None: + leaf_counts = dict((l.name, 1) for l in sleaves) + + # initialize function for generating new gene names + if namefunc is None: + spcounts = dict((l.name, 1) for l in sleaves) + def namefunc(sp): + name = sp + "_" + str(spcounts[sp]) + spcounts[sp] += 1 + return name + + # initialize population sizes + popsizes = init_popsizes(stree, n) + + # init gene counts + if gene_counts is None: + gene_counts = dict.fromkeys([x.name for x in sleaves], 1) + + # init species tree timestamps + if stimes is None: + stimes = treelib.get_tree_timestamps(stree) + + + # calc table + prob_counts = calc_prob_counts_table(gene_counts, T, stree, popsizes, + sroot, sleaves, stimes) + + # init lineage counts + lineages = {sroot: [None, 1]} + for node in sleaves: + lineages[node] = [gene_counts[node.name], None] + + + # sample lineage counts + sample_lineage_counts(sroot, sleaves, + popsizes, stimes, T, lineages, prob_counts) + + # sample coal times + tree, recon = coal_cond_lineage_counts(lineages, sroot, sleaves, + popsizes, stimes, T, namefunc) + return tree, recon + + +def sample_lineage_counts(node, leaves, + popsizes, stimes, T, lineages, prob_counts): + """ + Sample lineage counts conditioned on counts at root and leaves + of species tree + """ + + a, b = lineages[node] + if node not in leaves: + if len(node.children) == 2: + # two child case + + c1 = node.children[0] + c2 = node.children[1] + probs1 = prob_counts[c1][1] + probs2 = prob_counts[c2][1] + + if b is None: + # special case where no ending count 'b' is conditioned + k1 = stats.sample(probs1) + k2 = stats.sample(probs2) + else: + # condition on ending count 'b' + if node.parent: + t = stimes[node.parent] - stimes[node] + else: + t = T - stimes[node] + n = popsizes[node.name] + + reject = 0 + while True: + k1 = stats.sample(probs1) + k2 = stats.sample(probs2) + if random.random() < prob_coal_counts(k1 + k2, b, t, n): + # accept + break + reject += 1 + + # set linages counts + lineages[node][0] = k1 + k2 + if c1 not in lineages: + lineages[c1] = [None, k1] + else: + lineages[c1][1] = k1 + if c2 not in lineages: + lineages[c2] = [None, k2] + else: + lineages[c2][1] = k2 + + # recurse + sample_lineage_counts(c1, leaves, + popsizes, stimes, T, lineages, prob_counts) + sample_lineage_counts(c2, leaves, + popsizes, stimes, T, lineages, prob_counts) + + elif len(node.children) == 1: + # single child case + + c1 = node.children[0] + probs1 = prob_counts[c1][1] + + if b is None: + # special case where no ending count 'b' is conditioned + k1 = stats.sample(probs1) + else: + # condition on ending count 'b' + if node.parent: + t = stimes[node.parent] - stimes[node] + else: + t = T - stimes[node] + n = popsizes[node.name] + + reject = 0 + while True: + k1 = stats.sample(probs1) + if random.random() < prob_coal_counts(k1, b, t, n): + # accept + break + reject += 1 + + # set linages counts + lineages[node][0] = k1 + if c1 not in lineages: + lineages[c1] = [None, k1] + else: + lineages[c1][1] = k1 + + # recurse + sample_lineage_counts(c1, leaves, + popsizes, stimes, T, lineages, prob_counts) + + else: + # unhandled case + raise Excepiton("not implemented") + + +def coal_cond_lineage_counts(lineages, sroot, sleaves, popsizes, stimes, T, + namefunc): + """Sample coalescent times conditioned on lineage counts""" + + # init reconciliation and subtree dicts + recon = {} + subtrees = {} + caps = set() + + # sample coalescent times + queue = MultiPushQueue(sleaves) + + # loop through species tree + for snode in queue: + # simulate population for one branch + a, b = lineages[snode] + + if snode != sroot: + t = stimes[snode.parent] - stimes[snode] + queue.push(snode.parent, len(snode.parent.children)) + else: + t = T - stimes[snode] if T is not None else None + + if t is None: + subtree = sample_coal_tree(a, popsizes[snode.name]) + tops = [subtree.root] + else: + subtree, tops = sample_coal_cond_counts_tree( + a, b, t, popsizes[snode.name], capped=True) + + caps.add(subtree.root) + + subtrees[snode] = (subtree, tops) + for node in subtree: + recon[node] = snode + + tree = join_subtrees(subtrees, recon, caps, sroot) + + # set name leaves + for leaf in tree.leaves(): + tree.rename(leaf.name, namefunc(recon[leaf].name)) + + return tree, recon + + +def join_subtrees(subtrees, recon, caps, sroot): + """Join several subtrees together into one subtree""" + + # stitch subtrees together + tree = treelib.Tree() + + # add all nodes to total tree + for snode, (subtree, tops) in subtrees.iteritems(): + tree.merge_names(subtree) + + # remove cap nodes + for node in caps: + # remove cap node + tree.remove(node) + del recon[node] + + for snode in subtrees: + subtree, tops = subtrees[snode] + + # get lineages from child subtrees + lineages2 = list(chain(*[subtrees[child][1] + for child in snode.children])) + + if len(lineages2) == 0: + # noting to connect + continue + + # ensure leaves are randomly attached + leaves = subtree.leaves() + random.shuffle(leaves) + + # stitch leaves of the subtree to children subtree lineages + for leaf, lineage in izip(leaves, lineages2): + tree.add_child(leaf, lineage) + + + # set root + tree.root = subtrees[sroot][0].root + if tree.root in caps and len(tree.root.children) == 1: + tree.root = tree.root.children[0] + + return tree + + + + +def sample_bounded_multicoal_tree_reject(stree, n, T, leaf_counts=None, + namefunc=None, sleaves=None, + sroot=None): + """ + Returns a gene tree from a bounded multi-species coalescence process + + stree -- species tree + n -- population size (int or dict) + If n is a dict it must map from species name to + population size. + T -- deadline for complete coalescence + leaf_counts -- dict of species names to a starting gene count. + Default is 1 gene per extant species. + namefunc -- a function that generates new gene names given a species + name. + sleaves -- you can specify a subtree of the stree by giving the a + list 'sleaves' of leaf nodes of the stree + sroot -- you can specify a subtree of the stree by giving the + subtree root node 'sroot' + """ + + # initialize vector for how many genes per extant species + if sleaves is None: + sleaves = set(stree.leaves()) + if sroot is None: + sroot = stree.root + if leaf_counts is None: + leaf_counts = dict((l.name, 1) for l in sleaves) + + # initialize function for generating new gene names + if namefunc is None: + spcounts = dict((l.name, 1) for l in sleaves) + def namefunc(sp): + name = sp + "_" + str(spcounts[sp]) + spcounts[sp] += 1 + return name + + # initialize population sizes + popsizes = init_popsizes(stree, n) + + reject = 0 + while True: + queue = MultiPushQueue(sleaves) + + # init gene counts + counts = dict((n.name, 0) for n in stree) + counts.update(leaf_counts) + + # init reconciliation + recon = {} + + # subtrees + subtrees = {} + + # loop through species tree + for snode in queue: + # simulate population for one branch + k = counts[snode.name] + + if snode != sroot: + # non basal branch + subtree, lineages = sample_censored_coal_tree( + k, popsizes[snode.name], snode.dist, capped=True) + queue.push(snode.parent, len(snode.parent.children)) + else: + # basal branch + subtree = sample_coal_tree(k, popsizes[snode.name]) + lineages = subtree.root + subtrees[snode] = (subtree, lineages) + if snode != sroot: + counts[snode.parent.name] += len(lineages) + for node in subtree: + recon[node] = snode + + # stitch subtrees together + tree = treelib.Tree() + + # add all nodes to total tree + for subtree, lineages in subtrees.values(): + tree.merge_names(subtree) + tree.remove(subtree.root) + del recon[subtree.root] + + for snode in subtrees: + if not snode.is_leaf(): + subtree, lineages = subtrees[snode] + + # get lineages from child subtrees + lineages2 = chain(*[subtrees[child][1] + for child in snode.children]) + + # ensure leaves are randomly attached + leaves = subtree.leaves() + random.shuffle(leaves) + + # stitch leaves of the subtree to children subtree lineages + for leaf, lineage in izip(leaves, lineages2): + tree.add_child(leaf, lineage) + + + # set root + tree.root = subtrees[sroot][0].root + tree.add(tree.root) + recon[tree.root] = sroot + + + # reject tree if basal branch goes past deadline + times = treelib.get_tree_timestamps(tree) + if times[tree.root] < T: + break + else: + reject += 1 + #print "reject", reject, times[tree.root], T + + + # name leaves + for leaf in tree.leaves(): + tree.rename(leaf.name, namefunc(recon[leaf].name)) + + return tree, recon + + + + +def make_tree_from_times(times, k=None, t=None, leaves=None, capped=False): + """ + Returns a Tree from a list of divergence times. + + The topology is choosen by randomly choosing pairs of leaves. + """ + + # initialize k + if k is None: + if leaves is not None: + k = len(leaves) + else: + k = len(times) + + tree = treelib.Tree() + + # initialize k children + if leaves is None: + children = set(treelib.TreeNode(tree.new_name()) for i in xrange(k)) + else: + children = set(treelib.TreeNode(name) for name in leaves) + for child in children: + tree.add(child) + child.data["time"] = 0.0 + + # perform random merges + for i in xrange(1, len(times)): + # make new parent and merge children + parent = treelib.TreeNode(tree.new_name()) + parent.data["time"] = times[i] + a, b = random.sample(children, 2) + + tree.add_child(parent, a) + tree.add_child(parent, b) + + # adjust children set + children.remove(a) + children.remove(b) + children.add(parent) + + + # set branch lengths + for node in tree: + if not node.parent: + if t is not None: + node.dist = t - node.data["time"] + else: + node.dist = 0.0 + else: + node.dist = node.parent.data["time"] - node.data["time"] + + # for convenience cap the tree for easy drawing/manipulation + if capped: + tree.make_root() + for node in children: + tree.add_child(tree.root, node) + else: + # set root + if len(children) == 1: + tree.root = list(children)[0] + + # return tree and remaining lineages + return tree, children + + + +#============================================================================= +# popsize inference + +def mle_popsize_coal_times(k, times): + s = 0 + i = k + last = 0 + for t in times: + s += i*(i-1) * (t - last) + i -= 1 + last = t + return s / float(2* k - 2) + + +def mle_popsize_many_coal_times(k, times): + + ntrees = len(times) + s = 0 + + for times2 in times: + i = k + last = 0 + for t in times2: + s += i*(i-1) * (t - last) + i -= 1 + last = t + return s / float(2*ntrees*(k - 1)) + + +def mle_popsize_tree(tree): + timestamps = treelib.get_tree_timestamps(tree) + times = sorted([timestamps[node] for node in tree.postorder() + if len(node.children) == 2]) + k = len(tree.leaves()) + return mle_popsize_coal_times(k, times) + + +#============================================================================= +# helper data structures + +class MultiPushQueue (object): + """ + A queue that requires multiple pushes before item is queued + """ + + def __init__(self, lst): + self._lst = linked_list.LinkedList(lst) + self._count = {} + + def __iter__(self): + return self + + def push(self, item, needed): + count = self._count.setdefault(item, 0) + + # must be queued 'needed' times + if count + 1 == needed: + self._lst.append(item) + else: + self._count[item] += 1 + + def next(self): + if len(self._lst) == 0: + raise StopIteration + else: + return self._lst.pop_front() + + + + + +#============================================================================= +# allele frequency + +def sample_allele_freq(p, n): + """ + Sample a new allele frequency using starting allele frequency p and + population size n + """ + + if p <= 0.0: + return 0.0 + if p >= 1.0: + return 1.0 + + if p < 0.05: + return min(float(stats.poissonvariate(p*n))/n, n) + if p > 0.95: + return 1.0 - min(float(stats.poissonvariate((1-p)*n))/n, n) + + mu = p * n + sigma = sqrt(n * p*(1 - p)) + p1 = random.normalvariate(mu, sigma) / n + + if p1 < 0: + return 0.0 + if p1 > 1: + return 1.0 + return p1 + + +def freq_CDF(p, N, t, T, k=50): + """ + Evaluates the CDF derived from Kimura. + p is initial frequency of the allele in the population + N is the population size + t is time (units?) + T is the upper limit of the CDF (int from 0 to T) + k is approximation for the upper limit in the (supposed to be) infinite sum + """ + return freq_CDF-_legs_ends(legendre(1.0-2*p), legendre(1.0-2*T), + N, t, k=k) + + +def freq_CDF_legs_noends(leg_r,leg_T,N,t,k=50): + """ + Evaluates the CDF derived from Kimura using two Legendre polynomials. + This does not include the probabilities at 0 and 1 (partial CDF). + leg_r is the legendre_lambda associated with r + leg_T is the legendre_lambde associated with T (T', really) + N is the population size + t is the time elapsed + k is the upper limit to approximate the infinite sum + """ + s = 0.0 + expconst = float(t) / 4.0 / N + for i in xrange(1,k+1): + newterm = .5 * (leg_r(i-1) - leg_r(i+1)) + newterm *= exp(- i * (i+1) * expconst) + newterm *= 1 - leg_T(i) + s += newterm + return s + + +def freq_CDF_legs_ends(leg_r,leg_T,N,t,k=50): + """ + Evaluates the CDF derived from Kimura using two Legendre polynomials. + This includes the probabilities at 0 and 1 (full CDF). + leg_r is the legendre_lambda associated with r + leg_T is the legendre_lambde associated with T (T', really) + N is the population size + t is the time elapsed + k is the upper limit to approximate the infinite sum + """ + s = prob_fix(1.0-leg_r(True),N,t) # leg_r(True) currently returns p, so this is probability of extinction + expconst = float(t) / 4.0 / N + for i in xrange(1,k+1): + newterm = .5 * (leg_r(i-1) - leg_r(i+1)) + newterm *= exp(- i * (i+1) * expconst) + newterm *= 1 - leg_T(i) + s += newterm + return s if leg_T(True) < 1.0 else s + prob_fix(leg_r(True),N,t) # add fixation probability if T==1 + + +def freq_prob_range(p, N, t, T1, T2, k=50): + leg_r = legendre(1.0-2*p) + leg_T1 = legendre(1.0-2*T1) + leg_T2 = legendre(1.0-2*T2) + return (freq_CDF_legs_noends(leg_r, leg_T2, N, t, k=k) - \ + freq_CDF_legs_noends(leg_r, leg_T1, N, t, k=k)) + # uses noends because probabilities at 0 and 1 may be determined using other methods + + +def sample_freq_CDF(p, N, t): + """ + Takes an allele frequency p, a population size N, and a time period t. + Samples from the CDF derived from Kimura to get a new allele frequency. + N.B.: The current version fails sometimes (on some N, t pairs), presumably + due to errors in freq_CDF_leg. These need to be fixed. + """ + + # special cases + if p == 0.0: + return 0.0 + elif p == 1.0: + return 1.0 + elif t == 0.0: + return p + + y = random.random() + leg_r = legendre(1.0-2*p) + extinction = prob_fix(1.0-p, N, t) # probability of allele extinction + + if y < extinction: + return 0.0 # sample an extinction event + elif y > 1.0 - prob_fix_leg(leg_r, N, t): #prob_fix(p, N, t): + return 1.0 # sample a fixation event + else: + def f(T): + return freq_CDF_legs_noends(leg_r, legendre(1.0-2*T), N, t) \ + - y + extinction # trims extinction probability, assures brentq works + + try: + return brentq(f, 0.0, 1.0, disp=False) + except: + print p, N, t + raise + + + +# new function for determining Legendre polynomial evaluations +def legendre(r): + """ + Returns a lambda that calculates the Legendre polynomial based on a + recursive formula (43) from + http://mathworld.wolfram.com/LegendrePolynomial.html. + As the value r is constant, results to calls for different n are cached, + which reduces runtime for repeated calls. + The old legendre_old(n,r) function below is intractible for n>~10. + This function can run with n as high as one million in a fraction of a + second (using isolated calls, so no caching to build higher values of n). + """ + def cacheleg(i,d): + if type(i) == bool: + return (1.0-d[1])/2.0 if i else d[1] # utility function; may need to be removed + assert (type(i) == int and i >= 0) # if i is not type bool + m = d['max'] + if i <= m: + return d[i] + x = d[1] + for n in xrange(m+1,i+1): + d[n] = 1.0 * ( (2*n-1)*x*d[n-1] - (n-1)*d[n-2] ) / n + d['max'] = i + return d[i] + d = {0:1.0, 1:r, 'max':1} + assert -1.0 <= r and r <= 1.0 # ensure r in reasonable range + return lambda n: cacheleg(n,d) + + +def gegenbauer(i, r): + return ((i * (i+1)) / 2.0 * hypergeo(i+2, 1 - i, 2, (1 - r) / 2.0)) + +# this should be the fastest gegenbauer method now (21 July 2010) +def gegenbauer2(i, r): + leg = legendre(r) + return ((i * (i+1)) / float((2*i+1)*(1-r*r)) * + (leg(i-1) - leg(i+1))) + +def gegenbauer3(n, a, z): + + tot = 0 + for k in xrange(int(n/2)+1): + tot += ((-1)**k * stats.gamma(n - k + a) / ( + stats.gamma(a) * stats.factorial(k) * stats.factorial(n - 2*k)) + * ((2*z) ** (n - 2*k))) + return tot + + +# TODO: determine proper k and esp values +def prob_fix(p, n, t, k=50, esp=0.000001): + """Probability of fixation""" + r = 1 - 2*p + leg = legendre(r) + prob = p + for i in xrange(1, k+1): + term = (.5 * (-1)**i * (leg(i-1) - leg(i+1)) * + exp(-t * i * (i+1) / (4 * n))) + if term != 0.0 and abs(term) < esp: + return prob + term + prob += term + + return prob + + +# added 02 August 2010 +# saves information to leg_r +def prob_fix_leg(leg_r, n, t, k=50, esp=0.000001): + """Probability of fixation""" + leg = leg_r + prob = leg(True) # gets p + for i in xrange(1, k+1): + term = (.5 * (-1)**i * (leg(i-1) - leg(i+1)) * + exp(-t * i * (i+1) / (4 * n))) + if term != 0.0 and abs(term) < esp: + return prob + term + prob += term + + return prob + + +def hypergeo(a, b, c, z, k=100): + """Hypergeometric function""" + terms = [0.0] + signs = [1.0] + for i in xrange(1, k+1): + term = float((i+a-1)*(i+b-1)*z)/(i+c-1)/i + signs.append(util.sign(term) * signs[-1]) + if term == 0.0: + break + terms.append(log(abs(term)) + terms[i-1]) + return sum(s*exp(i) for s, i in zip(signs, terms)) + + +def loghypergeo(a, b, c, z, k=100): + """ + Hypergeometric function + + Performs computation in log-space + """ + terms = [0.0] + signs = [1.0] + for i in xrange(1, k+1): + term = float((i+a-1)*(i+b-1)*z)/(i+c-1)/i + signs.append(util.sign(term) * signs[-1]) + if term == 0.0: + break + terms.append(log(abs(term)) + terms[i-1]) + + sgn = 1 + tot = -util.INF + + for s, t in zip(signs, terms): + sgn, tot = stats.logadd_sign(sgn, tot, s, t) + return sgn, tot + + +def hypergeo_mult(i, z1, z2, k=100): + + h1 = hypergeo(1-i, i+2, 2, z1, k) + h2 = hypergeo(1-i, i+2, 2, z2, k) + return h1 * h2 + + +def freq_pdf(x, p, n, t, k=8): + + if x > 0.5: + return freq_pdf(1.0-x, 1.0-p, n, t, k) + + q = 1.0 - p + prob = -util.INF + sgn = 1 + t4n = t / (4*n) + + for i in xrange(1, k+1): + #term = (p * q * i * (i+1) * (2*i+1) * + # hypergeo(1-i,i+2,2,p) * hypergeo(1-i,i+2,2,x) * + # exp(-t * i * (i+1) / (4*n))) + + lcoff = log(p * q * i * (i+1) * (2*i+1)) + s1, h1 = loghypergeo(1-i,i+2,2,p, i+2) + s2, h2 = loghypergeo(1-i,i+2,2,x, i+2) + sgn2 = s1 * s2 + term = (lcoff + h1 + h2 - (i * (i+1) * t4n)) + + sgn, prob = stats.logadd_sign(sgn, prob, sgn2, term) + + return sgn * exp(prob) + + + + +#============================================================================= + +if __name__ == "__main__": + from rasmus.common import plotfunc + + if 0: + for i in range(5): + print "P_%d(x) = " % i, legendre_poly(i) + print + + + #======================== + # hypergeo speed + + a, b, c, z, k = 30, 20, 12, .3, 40 + + util.tic("hypergeo_fast") + for i in range(100): + hypergeo_fast(a, b, c, z, k) + util.toc() + + + util.tic("hypergeo") + for i in range(100): + hypergeo(a, b, c, z, k) + util.toc() + + util.tic("loghypergeo") + for i in range(100): + loghypergeo(a, b, c, z, k) + util.toc() + + + if 0: + p0 = .5 + k=30 + + p = plotfunc(lambda x: freq_pdf(x, p0, 1000, 100, k=k), + .01, .99, .01, style="lines") + p.plotfunc(lambda x: freq_pdf(x, p0, 1000, 200, k=k), + .01, .99, .01, style="lines") + p.plotfunc(lambda x: freq_pdf(x, p0, 1000, 500, k=k), + .01, .99, .01, style="lines") + p.plotfunc(lambda x: freq_pdf(x, p0, 1000, 1000, k=k), + .01, .99, .01, style="lines") + p.plotfunc(lambda x: freq_pdf(x, p0, 1000, 2000, k=k), + .01, .99, .01, style="lines") + p.plotfunc(lambda x: freq_pdf(x, p0, 1000, 3000, k=k), + .01, .99, .01, style="lines") + p.enableOutput(True) + p.replot() + + #p.plotfunc(lambda x: normalPdf(x, (.5, .1135)), + # .01, .99, .01, style="lines") + + + + if 0: + p0 = .1 + + p = plotfunc(lambda x: freq_pdf(x, p0, 1000, 100, k=25), + .01, .99, .01, style="lines") + p.plotfunc(lambda x: freq_pdf(x, p0, 1000, 200, k=25), + .01, .99, .01, style="lines") + p.plotfunc(lambda x: freq_pdf(x, p0, 1000, 500, k=25), + .01, .99, .01, style="lines") + p.plotfunc(lambda x: freq_pdf(x, p0, 1000, 1000, k=25), + .01, .99, .01, style="lines") + p.plotfunc(lambda x: freq_pdf(x, p0, 1000, 2000, k=25), + .01, .99, .01, style="lines") + p.plotfunc(lambda x: freq_pdf(x, p0, 1000, 3000, k=25), + .01, .99, .01, style="lines") + p.enableOutput(True) + p.replot() + + #p.plotfunc(lambda x: freq_pdf3(x, .5, 1000, 1000/10, k=40), + # .01, .99, .01, style="lines") + + + if 0: + p0 = .5 + k=30 + + p = plotfunc(lambda x: freq_pdf(x, p0, 1000, 30, k=k), + .01, .99, .01, style="lines") + p.enableOutput(True) + p.replot() + + + + +#============================================================================= +# old versions + + +# Legendre polynomial +# this function should be depreciated +def legendre_poly(n): + + """ \frac{1}{2^n n!} d^n/dx^n [(x^2 - 1)^n] """ + + return simplify(('mult', ('scalar', 1.0 / (2 ** n * stats.factorial(n))), + derivate(('power', ('add', ('power', ('var', 'x'), + ('scalar', 2)), + ('scalar', -1)), + ('scalar', n)), + 'x', n))) + + +# this function should be depreciated +def legendre_old(n, r): + l = simplify(assign_vars(legendre_poly(n), {'x': r})) + assert l[0] == 'scalar' + return l[1] + + +### TODO: distribution problems arise from probability masses at 0 and 1 +def freq_CDF_leg_old(leg, N, t, T, k=50): + """ + Evaluates the CDF derived from Kimura. + N.B.: Appears to fail sometimes; this needs to be fixed + leg is a Legendre (lambda) for evaluating the CDF + N is the population size + t is time (units?) + T is the upper limit of the CDF (int from 0 to T) + k is approximation for the upper limit in the (supposed to be) infinite sum + """ + def innersum(i, T, j=0, s=0.0, c=1.0): + if T == 0.0: + return 1.0 + if j > i: + return s + newc = 1.0 if j == 0 else c * (-T) * (i+j) * (i-j+1) / j / j + return innersum(i, T, j+1, s+newc, newc) +# if p == 0.0: # none have the allele +# return 1.0 # all weight is at 0, so CDF is equal to 1 +# if p == 1.0: # all have the allele +# return 1.0 if T == 1.0 else 0.0 + s = 0.0 + for i in xrange(1,k+1): + newterm = leg(i-1) - leg(i+1) + newterm *= exp(- i * (i+1) / 4.0 * t / N) + newterm *= .5 - .5 * innersum(i,T) + s += newterm + return s + + + +def hypergeo_old(a, b, c, z, k=100): + """Hypergeometric function""" + terms = [1.0] + for i in xrange(1, k+1): + terms.append(float((i+a-1)*(i+b-1)*z)/(i+c-1)/i * terms[i-1]) + return sum(terms) + + +# this is depreciated; replaced by an equivalent (but faster) gegenbauer method +def gegenbauer2_old(i, r): + return ((i * (i+1)) / float((2*i+1)*(1-r*r)) * + (legendre_old(i-1, r) - legendre_old(i+1, r))) + + +def freq_pdf_old(x, p, n, t, k=8): + + if x > 0.5: + return freq_pdf2(1.0-x, 1.0-p, n, t, k) + + q = 1.0 - p + prob = -util.INF + sgn = 1 + t4n = t / (4*n) + + for i in xrange(1, k+1): + #term = (p * q * i * (i+1) * (2*i+1) * + # hypergeo(1-i,i+2,2,p) * hypergeo(1-i,i+2,2,x) * + # exp(-t * i * (i+1) / (4*n))) + + lcoff = log(p * q * i * (i+1) * (2*i+1)) + h1 = hypergeo(1-i,i+2,2,p, i+2) + h2 = hypergeo(1-i,i+2,2,x, i+2) + sgn2 = util.sign(h1) * util.sign(h2) + + if sgn2 != 0: + term = (lcoff + log(abs(h1)) + log(abs(h2)) + + (- i * (i+1) * t4n)) + sgn, prob = stats.logadd_sign(sgn, prob, sgn2, term) + + return sgn * exp(prob) + + + +def freq_pdf2(x, p, n, t, k=8): + r = 1 - 2*p + z = 1 - 2*x + + prob = 0.0 + for i in xrange(1, k+1): + term = ((2*i + 1) * (i - r*r) / float(i * (i+1)) * + gegenbauer(i, r) * gegenbauer(i, z) * + exp(-t * i * (i+1) / (4*n))) + print term + prob += term + + return prob + + +def freq_pdf3(x, p, n, t, k=8): + q = 1.0 - p + prob = 0.0 + for i in xrange(1, k+1): + term = (p * q * i * (i+1) * (2*i+1) * + hypergeo(1-i,i+2,2,p,40) * hypergeo(1-i,i+2,2,x,40) * + exp(-t * i * (i+1) / (4*n))) + prob += term + + return prob + + +def freq_pdf4(x, p, n, t, k=8): + q = 1.0 - p + prob = 0.0 + for i in xrange(1, k+1): + term = (p * q * i * (i+1) * (2*i+1) * + hypergeo_mult(i, p, x, 100) * + exp(-t * i * (i+1) / (4*n))) + prob += term + + return prob + + +def cdf_mrca2(t, k, n): + """ + Cumulative probability density of the age 't' of the most recent common + ancestor (MRCA) of 'k' lineages in a population size 'n' + """ + + if k == 1: + return 1.0 + + s = 0.0 + for i in xrange(1, k): + lam = (i+1) * i / 2.0 / n + s += (1 - exp(- lam * t)) * mrca_const(i, 1, k-1) + return s + + + +def prob_multicoal_recon_topology_old(tree, recon, stree, n, + root=None, leaves=None, + lineages=None, top_stats=None): + """ + Returns the log probability of a reconciled gene tree ('tree', 'recon') + from the coalescent model given a species tree 'stree' and + population sizes 'n' + + This definately has a bug, that the current code fixes. + """ + + popsizes = init_popsizes(stree, n) + if lineages is None: + lineages = count_lineages_per_branch(tree, recon, stree) + if top_stats is None: + top_stats = get_topology_stats(tree, recon, stree) + + # iterate through species tree branches + lnp = 0.0 # log probability + for snode in stree.postorder(): + if snode.parent: + # non root branch + a, b = lineages[snode] + + lnp += log(prob_coal_counts(a, b, snode.dist, + popsizes[snode.name])) + lnp -= log(num_labeled_histories(a, b)) + else: + a = lineages[snode][0] + lnp -= log(num_labeled_histories(a, 1)) + + + # correct for topologies H(T) + # find connected subtrees that are in the same species branch + subtrees = [] + subtree_root = {} + for node in tree.preorder(): + if node.parent and recon[node] == recon[node.parent]: + subtree_root[node] = subtree_root[node.parent] + else: + subtrees.append(node) + subtree_root[node] = node + + # find leaves through recursion + def walk(node, subtree, leaves): + if node.is_leaf(): + leaves.append(node) + elif (subtree_root[node.children[0]] != subtree and + subtree_root[node.children[1]] != subtree): + leaves.append(node) + else: + for child in node.children: + walk(child, subtree, leaves) + + # apply correction for each subtree + for subtree in subtrees: + leaves = [] + for child in subtree.children: + walk(subtree, subtree, leaves) + if len(leaves) > 2: + lnp += log(birthdeath.num_topology_histories(subtree, leaves)) + + return lnp + + +def calc_prob_counts_table_old(gene_counts, T, stree, popsizes, + sroot, sleaves, stimes): + + root_time = T - stimes[sroot] + + # use dynamic programming to calc prob of lineage counts + prob_counts = {} + def walk(node): + if node in sleaves: + # leaf case + M = gene_counts[node.name] + prob_counts[node] = [0.0] * (M+1) + prob_counts[node][M] = 1.0 + return M + + else: + # internal node case + assert len(node.children) == 2 + c1 = node.children[0] + c2 = node.children[1] + ptime = stimes[node] + t1 = ptime - stimes[c1] # c1.dist + t2 = ptime - stimes[c2] # c2.dist + M1 = walk(c1) + M2 = walk(c2) + M = M1 + M2 # max lineage counts in this snode + n1 = popsizes[c1.name] + n2 = popsizes[c2.name] + + prob_counts[node] = [0, 0] + for k in xrange(2, M+1): + prob_counts[node].append(sum( + sum(prob_coal_counts(i, m, t1, n1) * + prob_counts[c1][i] + for i in xrange(m, M1+1)) * + sum(prob_coal_counts(i, k-m, t2, n2) * + prob_counts[c2][i] + for i in xrange(k-m, M2+1)) + for m in xrange(1, k))) + + assert abs(sum(prob_counts[node]) - 1.0) < .001 + return M + M = walk(sroot) + + return prob_counts + + + +def count_lineages_per_branch_old(tree, recon, stree, rev_recon=None): + """ + Returns the count of gene lineages present at each node in the species + tree 'tree' given a gene tree 'tree' and reconciliation 'recon' + """ + + # init reverse reconciliation + if rev_recon is None: + rev_recon = get_rev_recon(tree, recon, stree) + + # init lineage counts + lineages = {} + for snode in stree: + if snode.is_leaf(): + lineages[snode] = [len([x for x in rev_recon[snode] + if x.is_leaf()]), 0] + else: + lineages[snode] = [0, 0] + + # iterate through species tree branches + for snode in stree.postorder(): + if snode.parent: + # non root branch + a = lineages[snode][0] + + # subtract number of coals in branch + b = a - len([x for x in rev_recon.get(snode, []) + if len(x.children) > 1]) + lineages[snode][1] = b + lineages[snode.parent][0] += b + else: + lineages[snode][1] = 1 + + lineages2 = count_lineages_per_branch(tree, recon, stree) + assert lineages == lineages2 + + return lineages + + +def get_rev_recon(tree, recon, stree): + """ + Returns a reverse reconciliation + + A reverse reconciliation is a mapping from nodes in the species tree to + lists of nodes in the gene tree. + """ + rev_recon = {} + nodes = set(tree.postorder()) + for node, snode in recon.iteritems(): + if node not in nodes: + raise Exception("node '%s' not in tree" % node.name) + rev_recon.setdefault(snode, []).append(node) + return rev_recon + + +def get_topology_stats_old(tree, recon, stree, rev_recon=None): + """ + The function computes terms necessary for many topology calculations + """ + + nodes_per_species = {} # How many gene nodes per species + descend_nodes = {} # How many descendent nodes recon to the same species + + nodes_per_species = dict.fromkeys(stree, 0) + + # init reverse reconciliation + if rev_recon is None: + rev_recon = get_rev_recon(tree, recon, stree) + + # iterate through species tree + for snode, nodes in rev_recon.iteritems(): + nodes_per_species[snode] = len([x for x in nodes + if len(x.children) > 1]) + + # iterate through tree + for node in tree.postorder(): + if not node.is_leaf() and len(node.children) > 1: + descend_nodes[node] = 1 + sum(descend_nodes.get(child, 0) + for child in node.children + if recon[child] == recon[node]) + + return nodes_per_species, descend_nodes + + +# this is depreciated; replaced by prob_fix method using new legendre method +def prob_fix_old(p, n, t, k=8, esp=0.001): + """Probability of fixation""" + r = 1 - 2*p + + prob = p + for i in xrange(1, k+1): + term = (.5 * (-1)**i * (legendre_old(i-1, r) - legendre_old(i+1, r)) * + exp(-t * i * (i+1) / (4 * n))) + if term != 0.0 and abs(term) < esp: + return prob + term + prob += term + + return prob diff --git a/arghmm/deps/compbio/coal.pyc b/arghmm/deps/compbio/coal.pyc new file mode 100644 index 0000000000000000000000000000000000000000..18c6d76675e2072cee4744757b06cea603bfdbe7 GIT binary patch literal 65385 zcmeIb3zXc~ec$(=nSJl#wOC+xxqJ*ElDpsoB*3=>@dbz!B@&_rkRU+>mxGyE>=3&% zyF0VwLLz8YWJRFS!%bv6iWSF|9lNp^=a&!rcIsNM>%cn_QXea z<2Y&4x}Wd&_n*fu78Ipj+Kyw8bNBxL|NFoH`}nu&$%R~UCg*28$}jXJXL^!_-sDVgl;}+^ z^d+a#vi-@KewXb_E(|0KgUOk}DAk`_7)lm~lQYA~!boyvBw5&zoY|2q>`cz=OfnaD z`X#E`m7F?V+ne-Mb|)_lCH4L!!@nAh`B-@%DGw$W_9P3V$(d2!8S^_s$%Vbi!dP-< zEXka%-PC?_uivcfOUlE^8yP*0T1JvL=znS4?ncjo4#JmJiNq&(@&yOQ#C&fJoer<}Pp zDPQl*ZAtkCXQq?#jn2$y?DhXm$s0_^?XAnWNt#dIU=r>~$~U|GomUk&-~xADb^l$y ze<&&6;ur5u%C|am&s8OFbIHR=dD;bzBpS{c59htkA9Vgc=WloZe&_FS{(+=?r(b+< zea&|zZvaFOt-l%lI+VQ8mz;SxDc|iE-<_22appZq`LHub*S~edRlT=!Q!TvLHy=sL z_xY7alk)w}JeHInaOUx({9safBAElgAJU!IljP%PjwR)X{rGrN=ELuCAKvHuQRkm@ z{=LpW<@_VgzdtEI>KC6*%8xnofu#JnGapRKPdM{TQaB#{XM7k=8GDvOj^fBu35)(o7VBcP3rjQHS75BrgeO=yN()ND4+Jg z7L)SFoH^$q{kX@cpLhNR=jWZDb-tREU-XMFb>K{x zSu~toNXqB@biq$ceq}K!mwmJ5n-$-zyH37%-gCXAM<-{h5aPxkkPHpw^Z8@-;zFfS zs??hK#Y%I&UTzEx4LzQ()ti-k@mzhinO~|etu7Rs)p{-8sD83?D4&12mT%5i8~IYb zOreE^jYS&y6Z!f|zLqbpREAb-rTWTBrPQpH_3B*yVtqAVt5nMQW}O-qmsa&9a_x4^ z@oq6+t}ZRqtL1!SsZy#|8bf+*akW7+R~MQ*yU0h*UCbZJpR4dfCBIZ`G^(}p`D#-Y z7i$;u^*NeZX;kuav_b7}92y#`{t^FXGIYN=KvJ&0Ty5~^#7tJVU`^EB;T z@mzJG+Ps)AS85IYW#;SW8kLoo1rRDUS6u;eS}>;P(-)>)RkbD%D6bY58u=;^yAhUM zwlT>D8wb5P zNR=9iS@rP<7Y%_y(m$FR%9Qk3jlRC?88}Er@B<{bXCSNLXHGO1DTL)jDbukp@XETe zFXkJ?#ifNxdsLwy&FUg!4f-;=jOMjRcDi=e_^FXiHF~93E7und!3<^n<>E@U*sK`O z3ZpzP2nPydO7@X#=O_wKqgNisM|A0N>{~aEy5(_vh72QkK9gL^CTkBFY`K#ap#?3E zpS>j!WM_@9zB>sF2+}jj>)B*n3~~AT86wJWTQ;2n+>&q8OWWvaM$63ol!4U z8toy6##D;uxgkh)68Nckw z9cSu}pRFgC`jS`s{O+ZG)%0q=OYmo6jFP&X((S%vV#>8a+Isa>?hNyE;}^?3@1qTF zjB6bCdk@l#Wlg5;&i?ZA8^R& zb6wZKi*dI_*IubM=dYrKEu<|8WQ4AwDfi|X^}EQd-6nc+_NL@gCQYPoQoZtW&9shum9nsw;X!pwl~tC`VYL9PjuyK>BF!(=On z|6%tgdf0wz%bjbgVQumc?OLrh8>hI}(1WCJCUa|M0*o2T+?IK7)>tEnML%QB&yfL5 zz=^e=5R5TlOrap&p#J3q2&5|j;4r_^`;4^!Qg0X5jtS?CCJ)ls%#eslfm;a0k{rYL%IM;8!H6+=^Z5X0l+76-q0ClUmybIpLws`G9 z8n^rg7|OfdY>O@ok0CwKRxt)T)|0$Wqf&PE3k;^Q>um`6>(s`1%PK~%R1%$D3n}wl zrTI#wQd`dgOLgQ^X@wv!sx4Xq2z;)h_t&vT1b?wcHwpF#I-Ad~SBmp!!yUOw{IP}A zk6n$u_w#t|kh$gbdTt4zO~5$KG1GjTYf2Iqm$blkVuoCbyMLUh{2E_PQ8_rJ1h2qi zA#fLF8>w$oBZVPG+qKP5+bZZnaj{rwfLqV>JsdUlG3%h zAYT*P_%j0e!BI5zh5aN!@(=2=J$Hq+m=rV1!>vnr&FiV9n|`St6Ox{+BH}uTvgisivCTMMt&VG|l$hPEvh&W1u2q-t8pF!8T74&u>S^N4X zME&~{k@CSH>X70=i26_$Q6Dxv9qJHuh_nMMUv;kT?jtk;abqRA0C z8f>w!Z$A3ANxkr?a4l3X+^(mh^5zDm4Ya;+2T$G^LN8KYD$m)bL(--TU*djaf+Xnt zc;-kKrO#zP7@Cr3z03}|Jf;5|G6HkEaIRR0StspXMv%FJ6qk`z@J;>?6GTXRC_^dk6YcufN)# zyg87}K`MGBX3ExKy{X)%m&;J3et1#lq_x+^cdLUktDdw$FU+dw$sL0!roByNsB0KZ zLsqIy--(s#+Y++!60rQXrJO&TGSp8q+#4~>LZaxRetYv2xC3qkg8(vK$|QB1+FV@j zO(sq{)1SQ5o7DHP%m9l9sD1VBV?7%659j%7$it>Fdk*f6I0#mdBSA`i#mG5Poif>2hC zf&Hy(8P{!In6^x}%yAHyRM{9cQx_bE);>scmp={1q3_u2JSsC9xD%jdTk?i%t&_up zh>5b5mD7tt@1u5#g!ZrT^4Z!C3eKeqB5yT^2F%apkwdiwYI zHLfQ~GCMMZ{Ptw~t|C+|Oi2p6C>TbmadI0)^8>nfn-Wnpn{c1R^HB*QZ4>#-O$e&o z83^~@_PKjhmF@9|mD31?Yn{zj%!!2`d8#dPCgI#JJg{>)oSX?O;#ULBazzmD&GP}ea zTREH7WrtUDwG9(3*s|!Z7W|s36zC^?eVN^;Ui(qOJh7V8etw?VA19-UfI2nL^87A$TG4`AOOdmDR2lXJ1)#)tN5{TCcb=459V8g2dq_~VJO)W zcyvR>Hy>>SODmIF#7#HbHt--YZwVa*6NEf{4(dT>8{k9$AW2SO@|&vB(p%q-OfN)H zTKfJ>fXsDR=nOIf7fEminH4g@RLA+#@;nPwi_KxqLP(P2!d zVnDd3Li~wZ%mxK1DZ_i18a?wePn#@=!8|U3h}Zz+snrH{C-AmZTqqe3usaF4yQd?0 zES-gKJ~|^l6Ff^jDoiWs(CfS|Z&q@Hl7l4MIP3*ktbU(gL$o028O`)%HP|~c{n=55 zyd9>{9_R}cW6U6mFZBd&V!p}<8M7WpV&^0X@W00!LYmR2Zn!7pf)0mK%rb?e9mdl8 ztyi%edW65gN8c9VZB%UL>(VewP~xTuv3zB&L@w-W64)+ZYTmvwB4vWBVa@nov0lqgf|g=W)D(sf#K#gwKI?u&8z!bkm;Z9|+E9W<#pTB4gwchXc_w{LHBF4sLg`&7V5nZ% zWDd3xdS3T1P|}4{82voMYVd;o7gPG*idyNG4gT0bbUnQ(iOdK>g~|8knf|B9tUV?q zJo|9RSY2kwCWWm4Mglk>l{4Z~42?iy0$szFElQ20PG5MlC64vqI}vnEUdliW8?!~j zj2Iw#l00cK6)W&;HN+4>^9;YO^ON*|L_m?VgeyM7?o?3y+}BT~6vElj~y%BG&J zdxT~Sjrh6bwZ7zv%{uGsEm&YMx#W^LTy8x!x!A@6SwbYFS`06Q8`|X3ubKT6iOE2i zT=HG4sO7stDQHDfwb>AnQQc7)EeSu|R~=&unuWYps27qbF zikY_Jv^P&1zl#g#o#s6(5M0Q}fQ^C7K(_^gyK&DJ2&RJxa1}wCJV3WHj7NtN&9*rwT&X!g>^*;nvnscs+%BO1PU+sGL421TP#_ zB3c#1)Z}qM_vDx=JgUS^n^w*(?P%;GS)JFmOmS>3cucLdYTmY3OhQ>&pqAPwr1CY# zNKysAn?@Ze*m$8nS@tyjjefev(p3xhl7u4CmYjkDzgJH!2aE6)9#bM}<{61MIwRxp zFyZcuQG0l{JRVI|wMhqW(-2vA>EDsr**BCK$%z#24^o`FWvHZCmQ<_Xu{@<8AOkK! ziKnoCiT7eV7UKnD(0C!f;1RJl2s#iiC@c4eDT8z@P-$EudAlU%y+^ua-)$p0qAn>A zpwOse5m2S(E!59@&K*g;TB}0(7NdwK!xJQA9!dux>G3JoxqOmOL6=j(_2(Iaw+lYo3_SkqM;e5+u2d9- z?v)5~RiJNaNkKFJN`2)5OpcfSxjQZ7UC%R)dWAil6~zZ4`46b)^O{P7^}EZzbajmxGRpdTl1?hCbf`^><3dT@s|L zIfI!KGa7_209wX9%B3aKQ!A?#NBkNiXM_n|f!9XGHi>nRu#Fmxu%qWyi!gmaIvYUp3H9`VnlLZ~WgW@plKzZ-yI@K~XuG}HiOl}&PQ>1l2#^#W z>AlAHZ<3iujs-Su6rDYOcE4yWh{ONqnRu`zmmFQ*k(^#TE#RrzA9(z1Sd>iyCV2q~ zG9=>qL?Wq)mnNwJ;ywwDelT|wJ-B^8+q(ad@1I_q*QZ-GpHTyNKk8e$17Z>0McQPo zQ_l!F4=nF|>!|E1t;_gC>nf8>stI36L{U;S{}MenzI8`mIi6HTXNAvpj3P@bEUv+n zg{)aAmgZSA#Bv}-5+iuFR_(~GsD&}uR!FDw=P`I{m4$2SejqM`;k#Zdv6LfSWZ~;{ zp#bd)14AU$SZ8pyJ+@l(TQo!H~!T%&!jp;>S- zW!#C7?>?{{-DzqefWd)zRWQ_akWZ+gnH!u&1Ain_b^WY$^$xj=_kZ2MxaYbkezXWD+kOS^u zl8lV3J_H5eojjQAV3>-Ty)7NKhX8^lJ_)zPVq?!Hd@|}vac5kVomlbZ?}OjMl=;cK z!(A2=q-b)2<{C{-0qbqMO}d&52YO{?vB4rYyf%#@+(?StqL!pJ>q~V+or@i_FZN(p z1fFjMa1D?FT+-^=g9h+>x&WL;(%nf30{g&m(~96!jGiGs;_cheuTO-hNGDmzn7&V;I&7J7a;&vKJ~B z8-)+*ky^M=sTH2(Z$T{F=o0dxrY2hd966gl-D<;WN|xM%=%>0E#Xm}J>s)D+S{T^b z)=bZODd1mH_r`dX^o{2Fa>9!J*jvGi%+0xyut55f_|LICCI1e^!3IzPz!NHjy`m?1 zX&|ZZH|dAzFxQkoLczY#@=0`x*i_Of((58%76RZ}J|oTo_6VUyGZ8Bcn$&**Btixq zNG6}>tAhy!*Ne|9c%+IwgNS0{&oH(QBvmrBiOf(kN$@Uc$bzlvh{Rj)ak9EY58Hak zcynjc+?Bpge<(h2X*fAudt8g=H?Q@rJ~!8!JoWz1BfH4QO;=q^VgWg zt5_&VpO*!`wNepgT5R(>JV70PI1`-m&ir(+zk{yJOL)XsTWc8MyNe*>LYt$epq+AC zGXz#G*2;dpYZ;ibXe#Z!Tf_Uk|NuC!zp~ToI z@Lgi5Z>?D{;wotr=PJ_aZX=&!a-$h+skm5)wKdYoDbrp{Q#i$q!g0N^kEFJ`IJ;0h zS6RSc!D?fzx4{WM$dfAcmEU4xMp#o=kMUtd6}NL)@Gwx&WTf2}Q;^zH;lrw+<3~HJ zOH(}+DAk%lDadaFX^=@GoZMQ$tcgTcMdE4GiXRu1IHHdYNJ`zWUk*2$cxZNaHrMk! z#Bo0o>}amPZ)bLA=BCV4c26<_pDk%z6#Jn;H4Ry&G8r<1BJ2P)KzXn6xtdlut^w&0 zdH_gcZdDOxE0k4;MyhOrR$=gsN`Ht-Q77X9Nmi<4Ul?J*yBhUU;wXwwL|+8;7> z&a(AyL15Gmp0x=n$cu9*T0F@!)_>7J_g|fbmi9&G`F@MSg6JZOn2#|$_a~R2Wh03w z95kQ@NnS!dKSnO403T=Qja{c@heah7;%=k>w$XYmdkb^BP#Y=wY6&{U>CM*W8KH&k zdBXPOUN$|6bzu2vUEuQ5qOg#rU#YTaf4)fUK`~6kZcM#YT3zwl(GchI(+#a1pKe)? zTdqx3(MzmH4}`C>4M=4f+WCm7q`^0;rWy zm0~%66(Q_ewb~(D3^g*1>A+dCCNXNYWrRkhX$2LD`sGTa@No(hPAmDClA@C5l}P!s zsm`1m@j6zVHZd|Zn(~#%OFwJejbWT^7SAnIOzsM^Jf9KjdC78DZ^IfXDJ0yw`ie)? zf_@_!<@T{EjCHhKgv|Q&LB07Hi92PjU`|&O6S~^6og_*+bt}H2?KLux3ON542D>4O zp6u8GZGt+XJ@@96yDxi>>C{cxU0hG*c0r{|Vgv*H^w(0pJl^xj^mqXf!*K+JBu>~W zx@>PUXWP_gDS)_b?OD{w&ZJoTa-yW^?&GI1*qf(0|3wxwMGziJUg}HgcOnOan6LR(STTVwDuyzhi8A!%aFUzLDIVW6wwyd4SibI4TPGF&5lu1 zE04OBj6zo{$2+aOOcxm@T98>@WH%(Z4DejG+x|1uY9gwsAS7A?*>C?LzN5I9to1No zh*S&%Q$vgm;_XeSX?!iGf$E;ViLr4praFDW5$Gx=C$1P`m|`BSo!X?akE*ed?sP~4 z@PM~|8%YP`s$(ALL{|^H#`L=EX@N_xjXq2!Y9TCx$>A==Xyt_d49ar4#12qI(W$%v z2yO8*>?1|RCufEL*AwfBR(h32*E2|9o}e3e?s(rNU#%Tig=gQZd7?59L;kqit?_X^ z!3T8%ZpL(QV^TV>r)**}ER)w5(nl`2cJg%@7XeKWrtx#GD7$_cS%4K_g0^Y^W+)K|jT zYspsKf7X*(ia9MEaaXB~9e`8?GjA2Vu)>95;g#aTg~Bpdg*hd*0oRpd*i$Fm%es6; z$(oXjNB@oLldIjj9IJ{{b>dFMT{Vk}XMxk<)aHUoOwn@1=-z zaA*@$Rq{VtJcM$<@+~NmK7nNd473yuwazl;Z8OBgTn}W5B)G%sdnc)|X8l zP-GScXlS3!0#r{ykdXUUHz4oPkgwAa#QC^buQTir;#y~b(ptQ3$^Sc2Q-TZ~v$VIP zXYA^J2Mfc_&>4O^d2N@dGc1yK*6xsrph^IS>>cO~yDiY8IqdG}3}KHSLzB1UL8th- zt>>VddIeXbwmk_nzO4?ty2m`6VTL6zq!eR(Y>_BiC3(H4_tL0@0GPa%N2JwC<51ib z>>>+G`F3mEs~XeV=osycnySOeeIFYWVV%QJfHhWm5s zSEsPq;-&q`t2A+6GLF#)`^l?}z*zF?ev3I1!3`T<22ia>$b|ab@h4j$@UIkEmE8`c zOrv0=Ky6%+R6+58NvzkDTAIu1wBs^)TFj@B1+j71Cd$^#6VDzyDC>;OwV`CR`Av>i z?iD(T3^O$FX@F=+=mchjc(uoz7u@2O(F74@ZOWx3)u98!n9RHNbou^uTcGFzHes6b z7RzNDHD^HHFhmqSLM}{|rYn3s{A;TwJ6k6wst{2n-CBoeuszu!2q)cfZ)wH`lGir3 zgrUfsr-1yE6fP){mQz?%vO&{Y(7orCtST|rlCj6Dgw2n`EF>0>vfNlbHzR@;L3aNs zIdduYQ-e#X%&Jwf!UiI3Msuwe7uM@lg@)=k%|5L9HRwx6;?KSD=f3!Jf6#bOmIf?1 zfEu;s39z9pRp+n>om(t@RI~9UbtglpRxq!@%)Y*zJute=^=5J{Wos(82X$){=C&^@ z78tqR%wXhh%8q1i&X!~n5?g4qhJAcMhMNv+fS_8_60H^;izjc3HCWgW)_`dzUQAho z<z(XK!C@#A;V-1jP#LM-pA?!l1hTUcjCd@Jx?KW#* znA@y@@qsnaMOecyL(mI15hsW$Q$o%T8q_aB?wpy!w^iq8i;z<@|99#`SD;bR`qXHD z2{H|F-oul<$#_bcza+?$g{RvGywQAx0lTLC8{4}!+IqAi$_>#?$?&SHp{VG0i}Kp* zTV8e1w@IA;#E5d?4aVYaP~?J0!8Q`)zwsRs}-737P98zGijkLCa zV3`#DGm>|ZvhJpOQ`TDDD;`8#e?TMqZXP59Z;!Z+W_D#eB-I3!Djb(5R(C8DH&L7WC!0R4LUITVcuE35eIY2wcN52Y+SxNW35$spw(2H z5Nll~#F~trjnK2K&{J}|AhJYNzoBuH86!cH(q3ZMb@r7MH}*KZpG-Oq@43b}psafw zWTqn1H53R(UfH|ZXdsHH|L6JX-(w*kSu2(Z5t{oK$rZj^iG_e|9tDR!Km}(v^-FzY zz~G^?;jIX54kDix zkqrt1BJ9U@N_ekbAU40nTjD>w&Ua}Be6J^qCzeZIfIr%kA)gAGoQ<8jH-XO+FN*Y_ zV867RDwq@=+5>DI0iHIFaauf%x2c^kkpU!VPAErW{5dp$0fwdV4@lk)pXv4$m6{E+G~OTacQZem!yI8g%8k&LWRTzyeXWxs?cp;KW@!W7B$tkRUo2k0 z&m?D)U^YBX8%&Ue>-FvpB*7)gY=gofwwMM^s+VrVZd98f?>*}_R~7gS@-ulCsHT&F zQRSFv8F##V17dXi?A=~2>!3$S07AC6yEXQU9>guh)V6oN)x2=q^g?7pgd`htq)#`F zLNd2)jyhov_My&I*O=(V;u@2_*xvs%-QX>S1jQQTz{Snky;pO>T_|LT+_&@8rb-xL_FOq&qI=L6o%M-80E81usSkc7Q> zRF`I^CO6-w%Xb7MC#ZR|SYCDfx;AJI258=uDG8?pANmf>~g#H|xihE;mWI^sCf>BCDmdD!mNzzB-g>M^5 zy(Iox{7*4ro?})e+V+|+vhDt+6gJ{VPC?j$kmg@5Emo}xma+(<{-WkfP@3&Bv z(-YXmICclHRPYE5i!G!_0-kR~P#ek&#wM8kyxNux=DA2A8dv<2DMU-`x1w_V?2&Xk z03;)bEq5&&pqOwCC5~GYf>8F8vZ0)f@l(ni@M}@Q@GJ7)SpD|lx5&z*w*$YMZCqAe zfsqSBHrpLGHZC(X{Ko>;l5K-pZxC?rV{!tL1t49)m4;+X6YrcLA?c(A&XR2O%c6b^Zt$>FGoj@W12!A-eVJ(5=JAiMnSOC=A}Qbn9%W z(-9Eps=S!A76;^^N+E&r$RC z{yXsgQ?xqFE^5D}!fjMatR&C3C#QG6w3%+MQD={co}G^7f3(}gimMd7=naCWo8R|wU@ zi^e2-DHay4bygQZLc0>sqMr2a)<-av5X@@hi%1?s6pM~hd?q*vi19*<9ZJ@IL&lWj zXa8xE+G_9>3=)xsod(+t5-Q6_&)a*VXjUE$%Lfn{0#&JmCS(+|&Rs}uyskfZ$=SGp z5cgc>d0M$c(XBEDxemTj5}oFyovMV@VzqZs4_=GzdelAaLil6|+M>PoweSXBMcI&s zlcQ@BdTduH{uP=`-*&OW=BG2Km;VqdKblncYWi@YtUa!ms$;r5e)e7m-AAi-C)Itb zMgxT(ql%N+pY$~q`r^IIznwX~_G7B!_}Q;|xcFRe@)DtEU*tWv$BP?WDLc%a*Papd zh#sqn`x5!qCknmMIMmw>z12TD{(HK~L0OL^pa! zEv!03@g{ft?0b?+45mkNvU-EYtA0z23~LtgyHhVUrOz?E1rRLv9f3*e?}oJr`I#=# zb3ivjnb9!bO`adz` z`VL1=VN9r1UWuCYMkC)^PnlM>SzlE!K2lDLH9}6}3-_e)g=L2Wyq$%U zFh@M~+sFjg2L*nCRz`4KCopn*8eaG+4S&bl{TDZBw?vD*dHQoF8AcnPGmgY?*%ykR zb?mUOL~)T7ptTYq9Qkp=BRy-IqWwUKA=K4eF+X;MLSxBB<9Irtr%Ms3?rv^9NI_M$ zVDvAkgR;CN1kLTvTn$wRcoYFJXU9^g!gRPfV5ypP7pNlHkT3+6+bvRNxfQV=e7 zcF<_dug=Y}Kq0G|MBmY5XRXABNBbGw*A%j93-S+JwB zFP}tQ0V-C{iOM%DKQ6oTAJ(ffi^QUbC=hOwf*v`V$YD8_y4BW}3VtM98;#b>>3u_V zxI=%0M_Y&OeX7M|iZtUv?zNTIcKz z{=13wS+`PRKOs50vswW?+B3px&k_6_y9vzqav=w#Po%;ihK9u3*OqldUX{ok6%fCp zVj=6ci(T0u<%T#dV<_#jbb;84P?1)m2p%j_>4GriQYum~9P(HQsfytFWo^2LSS%Sb z^<_F@Mm?1;GnSf$U?1*K1xq5-AY1ZPzQ^a0Xo*5%W!6YCj||Q@u+^At3J)NduH71+ zChil-lvf=iI!Q5=L2%uPB>DtaT?>k;j9M*{>*0th&PW92BEHprOtbqfpzd-=Dbm>P zkf?-NjY*)#{J0fH)4l6KDp9QqP5iSfS)J;VizHdGyi3SzmA2(E>3G~qAtrVapg)`9 zSh@ntW5cNEfZGy2Ad7Ny7xTq@N={pmu_0G;<}E^>gim>f;*`g(;Z;^=%(8fo0c6#i z_mA$rOK2@8bm6TD4Pthi3k~n6WS2vXyQW0? z?Fc=8LhbM0r2SizObb86RCEN>!Vhy9GTdJ$*O5sJV%6Iti&`2PP*+4|Lk4vVPRxj9 zMm+K+l3T=Kd`v}(N^FQIas5NDl`pgE$?xr9X=iS)w|ALRtN?-xyN34jmZUsMfyeUH^C*>xU$Xd5h9#IYoY!)KOffg+ z(!HLUIJ6%G60N=bY4(CJG|4+4St2e7BT%H28FR!Z!Kt}o>j~^_wt}^Vjsx)Ou+b7= zi!F-4rMQHT+cZ`CQEVi(^=ZYNEF`{|j2xj(yOLV?(z@=1w1FryiG_;BVJ}*nl|)-V z{jI(y3C7XIY4p@i!UAoRK~})i(ef?Z*2rQKR<2X|@^9j04$(wgjMGG^ghxZ%(fEzw z#6vH6*Sbd4 zw&}Cw_iUI<7AP{AcWj)@_4C)BF-dvrXRP+6ZJMHIJVnQ^`oP9H`PTFCnQY9*cRUvw zz-tYz2avcqubshDAXs2_PudOMcpi*0%RFfnrRAVRT+(`e{;Z2kN$+AZGT+sX03imr ziY{xG5Z?E&W`Y6yf{bsr`ZA4U>nN>OjzxW~Lk3X^#nzPdNuIr>7og`gPOlf_dZSHPb%Q7ak!H`;^PA2Q1G^!DOGT_ zWnd6f-rl;p= zEhDD@v;q?K(qKd)fF~|CIiV%qqL@^|vI|O}*xd$3qF|v@MWt*vB>fNl$!2T$5OycE zhbaZtgyzNT?XqwSbhX!e?6C;d3y%Ti{5EaMkPB5zq&D?G>nF`{N0f#^2$$dJ(h;G(bsXpaOF(1;MSyDTEu=X0Fn&4o6-PWyy(z`0NZ zHVNDTR_frx-Qm_yTi(wPSI=cK9wVlrA!a9P>&%jPQgl2GHomMg*UQCw$;sK&REb#=F zL3qnU-g*|q;eQaS;1i+rS;T7Vt&f7Yq;e3wl)?g4RW z3a{JJq6US0H*WG0cWI+24DVX>(J?7Grl4^*$p?T`Vtd%gl{m;2#Nv5jcNPUW;vcGW zWG^djsjR`L@{{wos0t#sosmOiWd#v+;cf?s`IIWfe--%=t} ziaG`Nu}$1RQqGuSh&J>K+zbD;l7f=9IkWKhbm{5^`Gub$*-r5PMe6%mf&V>Z5}O)F zGkbD?I6yyabK`DU(;eBp+5Wb{aRP(mLD-I2i2{*E_~r7{!j3G0x76XSedd5zP+_BM z9b4o^*LrG#Yp6d@{8=id~NN^Q5;wjpP>mUQ;P(#Em6?TC4wY#bg`y)(&XKnsv?sv4)4Bn+1 z`ZE8g?s+Ogmk9SfJ(EAL!|dn)*e+)Gk70KI56o^)QXMrdfX6*9+z;P-%h?_b6;6jI z+ilK>4dS|nyQbb~D)hyBm;di@wvRA)Uu^{e;Nm8lP0n^V-gM2Cww&$ZL{}3bYWA0_ z}(&kvz_ud+ey>z!P)7dua9c9ZqNl^y@aoy|NUT6w5}Bw{I}_= z7wJ3*L@sl6Iu#yLrh)K4Yj_XG{J0(S7LLeIiY%wX!%3X-2oJ+uPWeAKArlW3zUx0X zA$N?ta@obG{J9BPyo+B${ObUjx9fLC%KH;BA!Bv?588yhN&68%wi4X%CuTxcSNdps zYC=}7B(RYD_Z`8MYS`Q@v6m;?DEO_9W!Tr5lTIwdiJ7+yV+doo6~rH-ZH2EXvHI8v zNU)k|g|nk;w&N2D7O?-OUTX{3u@$q1a8qd-c`RM?XuZ=h@XzU{srmjY8&UR^S%{fH5wh2b|P~kyrs3I$}yGjiLAAJ zZ-TA&5FL4gw~^sH*X(k|wX}33Z3}a)3!WgU>Ip3l3&8S!Pb25misNTrOoLNVhnJ2} zE~1hdXpV~D0y!|+G&w+#pkd^u`Y{0%m7)X@^?eb%KgL(7t&$w4%YTMQU4&F-<{PLA81Fj7WzW6;AX zEX}QTZT3uKg)XxPEMZLS8XKmt_9`<4Y^Gb}6Es3l;J?PE zK@42w6~|t6#i>nmJHnAfa`d^aQ<3!q{^bsVFMUe}75xkRx~gYlZDAkz6}8ou<`W|Q zZu&dX{VU)IR-wrA2ZKBV#X%0YRb&R zy&hRN*#o(L^13sH!*)DZ(jfPxzhimoFL8lwjJpEb+FQfd|H12`}?Q^34G=A4Hjzh6K z!2nPC`FZ>dhvt~#Cs1%w_LTaF*xT{5&uUaOu&@&zaOBrR8a)pzMkJ3LLl*o zZ+l=FYux4>HGTxcs|MF+QE6~FftGsx;#&-`raKIr9%Z#WAY7wt=U7c%z(zp>?rV?0 zkpT4nPQ|x-+^49td)##>O4s23qy}H%7706-_1S*eJq0@>)_5HVU!-tK^1=@8650=fG{BpwAL2Mab+Zc zU#6Q<+k+ZAhMHI{s!3`jt0|k6vJXkXJ4gh6-f}z)$RaHkQ9Wtf z3H`gNxqB7_1;41FS2GfJ! zL*Z9qyLi(w0l0p~MX|AN-p9PnW@X@2hQ@W+dk-ZG!UhlfGu7IKO8FVw0_G^Q3uH8T z5&dHp=?9I7re^kzw zO18{^Ea}iDj@g)<{by>EMmdRt($on3+@cSY5rSBz0*OE>@IoxX;4ltq3CRdoQ7Nkj zlmfRhwe+5;C$pKZc_^!UIcn`<7qLmjKjWq+j78t&SXWc{AGlW#O^G5J2G5lMCJ0=m z!U}Gb0svX@65BXP1*ymXT!W(-Ofvh}g_v_VS`(%b^AH^-16->A?s-agh7hUQARNVs z6P4p(kgAg&0=*tX5M8qlB6R!PN|E2mX z>`3sb4rSw@rhs4>C1+}5DbG7V$R$|M-H^_D7!?`U( zu1VIfs*xgP3eF$w8O#k1np_A9)OJtHH_0&UnhiEjXTdJH)?wB!aT1d5XkD+$5YU9F zpQFH-M^vLwb9n$L>-b0hCpRU)ZoNTZ7rGbvGj(*oq@XQMmE7z_&Q(sUgmH=QP^*Nf zc%ya7YiN@ezWA6vD;WcliuC&LbF9;I6$2A-n# zX#tVf4=QN<7`rWV-XJ=7ITZ5`vIEjeF*N8UaoTh>dKWmLv^(BBzy$xit~xXCubj}U z>+iFpR=O=n9E5|6lb_*y-)7^{SlfB3_0B5WA*~SIw#a(U^kZtLSA%-B#64Pjc`Dwz zptsmSX{FBQO>tVJHt5o~yX#4tIn&@KhJ># zlQ~OUg0?MG&NpV+A-~~gT#;X?bf69b0qKsNY91oH-O^j(cd2w3Iu?S$;7rD#GsxXG zar-hC00@eQI8RO#3h0>vM5d&Mtci*Ru#m>tBPAKhkk>?m$b{Ztqc4t|K@4^oUw%VU<+}hR62FYaAh_^GfgWo_>e(Gn`wdZ5Mtza5;cIQH>k1cLi3_W_K4aY5rh*v6Cy>co)b81>U z&Oj!l$bcwd3;+*m2-u|*f;fa@fJBkM z3W<<$O>w^yBm&dEBP2Qw62So4qGT|A;gw9yLibh|+C5-jXQSNWJA`u8z_gU~UlY~l zRylO+KSn%5@&Um3cNLy#TFgegfhN0UMaZVbpY@nm_%p{=KOoso z1QQOeM=+t+ogMVLej|E`3kkj2f=rEzK(8(l20qlemr5~m(-;j^t|ufL*b2Z7laceK zE#=^e=&_fA`xs=_-%q8VuAP)60|p;b<8ytgh!?xIB=YC{7KkMotsG|p0G5{)wdvD8L#iKIK-&gFS7r~vCBU(&TV zs50ptDJGXKHKt{F!J8JLy1(uN*CY~z2$>3*SC}c^XLm>I{s<&rmhe3)5J@>v_5FJ710Ov#m zsIyYTp;g<1QU!&!o)ZoU?~`mnD0U+3uJ#p~ z68s@IS>TPL3GbSP3Tr3}La)NTBw>I#tjkW5Rord7W`_#@A4Rr@D5Sb&Yo?!8hi;?x zWU!?|O1b2~t5m7Hfvc&W9A?}c>SlX-HBTlO@@6rsZ`e^kZ`?(Mc|bIzCwYJ$IQYO7 zrXA|ap?ug!Vn+dRoQox`hfid>$^=&9{Uvhqv$EU`P;;Y?!8S^E7QUzAb8L~zS(>X0 zZDsVCtL6FpxuTRc@F88^%vqtFOsaE3(ekuIbAIN&dk-2v{h|DPv$^!{yYEI7oM)5L zh4P_S>I-u##l=IcF2B3=h3DD|!=d@+;zHmArk?(6iR=KOBEte?{5|LGLR?P=jMfU0>~y*ACy)s)pz7H%gE|J{E@ntLNwSS0VF@oc!uNjd{ZOsYZqweLEfLh7Kg=?A5UTR2y|o6uV)o`*%W~eCP)~ z_WE;_>D4Oj*9XwuW0Mk9R!eoMw%M&8xHy>yHjHa#R|we<7jdJTZ2+xvj1oWzF9*yf zCjiJPMB2ED?e*+}fhFmk6d++ky#bIp1Ek8g&+SA27VHjEJ@vANvR$8?bZ5)rj%oHg+{HE``qs$q-NQH|-c4Mun_i7BV!hb^jl%qe^Bv za;n2|7%O;;EU7_X^?vY=y~=553OY!^{dJM86Iqe1qd}t{xvZ{noqkj!9iM-{a(=MB z4O{-aS!7+GRXdX5!I3G()VQNB_47NH=c$;Hz{d z+gO+aPjqYe$zuUw0v1o}JP<}WPCZ7~(A4QIg|>l@iJhJoojX@tt*rb7^;VrrqMrhM zZbd%}WYjV1xZ%ZZ^*Mx8uEyx4`%Jg7!cWKqCHqzsl#R4~5w@$AS4n-2-g_1;0N6dQ zCWH#5u7{_uay}-U8{04hin8kr!I3X&4Akj_-GUB=TrY)CK;ZfQ9x`-O#OksJgwCsb znpY@GY@&bs?1G~Vn0EUC_Cg==BT)`QBtL_CKbGF~fGY2ztp~RGQ7bQ4!W)~?MLWd6 zxAzk9kxQ0ub`V0C6H*7AM$xcU^iB>$!cRmZ`hz;X|jb?y67DOo;G4Y=0PeMaGhLPkb+AlWM`DSvrv&}>}< zXfh)V_b|`Lq71l=)s%VDM2~3^Am_WpG9>o(W~?I6keQJZCjN#FJcuZsW*($f%l9!? z{Vgp?!tQfz%gjabXyjZQ&SUa4jVW#Ztw%kT07?56)1fO9s+ir~x}^V!k2dbOTRMh7 zPicjH>4Z*&LGaGZOfq&@1XbfUn*$3HFaDb5ct4qhmDW1VbdN5DmxWzoyPC4?58I-&-b(vL z#{)x5!5R)>1xqKC6+DWmhRIr)VP6R|Kgl#=Kw$R z9%0shun-N}z_;K51hEfm0}9TA>xS*Kw;T3`y&|368(7l{nVjtq?Dl0MknxrY>Z5*!R80re17HRJCiuZ2$0ajyBbU7H z=-^GCQzV{46Jd?L(pH4Hp{;b3l!DMF%yt?d1SXI>O!S`&6TNTE$F2EowaRPGS|jBB zLBFA4Qs0wo9+`qwB`H7B^ytE4@&ql`NWc+@!r??{4FLU>j^>d`J&g350XnHbA%@Em zW=33N815w`f(wNdi9jwMwDvCq89d=d(vUWP#mHviLW2mp=%>e)B)N1Rh5{-=^z`~5 zGewzRW{S~grs9IeJWm8)Am(Nz>DIcEu~!6Wy8}^`b?e~p2zy*fd-9p%VzRhkEXcVVsJe?3?PxnVof&uC*?Ua zhr)iLj?vesI;l(5SQuB%49(Jwc;coqEX9)Lkai}>8z-Ch)r_TP9t)-V+?;{0aky~3 ziU>9vM+!G6uLgp`M~u+hh`EKEsQq^YhelT-iEbjMXb4$%XBM{7JCnJc|AsO*vipgL zg@`=Px0#)_CBt`?7V1se3|W)TAdV@KUUdFm|NHevt7@*JtJ=Unr$~HZVnG9G$%4Q3 zLmA{hp*IZj05c#aX%~kA8Xl8GPBS_M5f}UdS>DmM%g>b9z|7E`}P6-bnsEYTkLpZ34wIN)ko|oqtOUp&yM!a7*C?6;K zj@gi_Lw?DQTk<`)M{qC5q3EX^F5^?Z{27MRfq0#uteLQ);=#b@QHiL-)?T&5e)z4o z5$^P-G)%AxM6}E#?aVhHHRuSE^xK;UbYwLS?2)sg9fND!O5NhjCyKoZrOMz`1JG&& z8A}q%B*R+a!((`-|4smUVF zri%5~H*haxO({yIpSZS59kx-eaS#O#X}`T?^IYLx0QQYGiLxprb?7}AH}SlhsIZwd zLu6W(>yEu57Twq)@q2n;X5J3EjE+p}K_@DP58~D(-E!mE!)JYHjNEshRA;1GCPNd$ zVS2iepx>!h+!>EMa`PG&5Ez*%pe5%_O8^nOnSz$EgbZ4O86gO>>53%l3CT|~Sa?xh zYOvr|$lVw$L{1CDQL3<4 ztLu=j^$K<JkOsYBZTpgkB_y* z@l8DmqRuWT6FOwKeLWu66PqY122GF4VZiGhbfqYWp<~lNBCz|aMnKYL(lRBe&;UR8 zeT4=^f3{f}*&%DRw23Zdb)wL{1R{=MmMn)eQOV{tN7YCZor z)D;D&NBhD?Sx{j-y>@wBQ}M7WZz(S0Yy^hv#>N4u;u2fptyf$!%c=5${^RWlfDk6- zZ744$$+k3jB~MF(IkKq=WAWR@2j=rCWk!=8&fmkzZ7|>Ffr5@MQ@B}|Le0Vf<*rvE zt)-w<1KX{!IKHOUH9_o;3StlN^<)$6<(AAXnd`a#E<##v3yb|ax6I$;wUEp}N4sUx zD?4c*dRCm*zL=^bf==eu(%C2Nn1RZok?JOYMJUmlC&q;3T-r}FZjYj*VHc3-m0oyf zh%r&p+y@fuB-fq`QoACS$1sP>#NtO+Gk!Zg1?eab{?y1J`2 zzf7l#N#;@u74Wg#!c4US3|lf(JBXKwYR{dO5Ti?&%lIezwO zhXDkkgWk#2*wPdjZt4(m>jeWKkr1c}O@m9v7bdW?gpZ&7ka;1jy--*FZ*O^PK2QX2en=uugfv1ez{vQ=)%eKG*tf{3oC*JRl$4Vev+^pTRD(u z9(?f@<))R~p(6aY%g`WTE0yPF7gtKfBmZZ6zO@PvGx~1aKKof;U(!6QO|seK^5Ez1 z06jY9l+W_nOtN-CM(pEf zOWKND?LjHK*;}3J?tX2|dYP|cw&s6`Sq`1*%bXj<{;OP>5lOtx)5hp2o215Gt~_>U zHDJrHXEOZc(@I0w|2F`@u-%H674G1qT8C*MNl~~|$7H)eL?)3`1cGa8rI~5{VM*Dv zA5X`cvUcox*RJEN%C0PIJTfvWNzW3#)|%c%Uijxm#wxoC3n;N-vLDktk)=ffLC+Jl zuA?f|_*STUNyq-kT$HZRV^HH?Rey}ir5rhFE;7H_cEZGX`n@ngQK5ob#@zZv zn%9{+cEJ`TD1?Hogf+WyAAoD!zp4UEEU_OH6?j@%wm|h@4yiFKvs_J1!%$kzC>j)v z4dNnA(z_0LZ=jIa3cB~xN*0tT!rB_3<=%9k>RInQ1Wa!ep((hkfi`Dn{~zp_6PZ2P zA!v-P(XFokf=M-4m+nLrR`gVts^=rF4)!pj<`CoDu=Y#?9n^O7}Wr>tW?q zs$Cxyf7c*v-IjARwK)90a{{2Tsna!dmVe*Rr=HOFhj(GCVrxE5e&9quMjPiSdohh= zZXo992Jb_!y9YA+&|-GxMu)YF5T!`Rh?r&TJ*x65sXqRi`m0_b@Uo4`oVwui9Imi& zUa!~8WD@Y=P1eu!By%V@-be2WkZUm-;sdj|tR?x-!-eRPBgFE2HN+NHX| zPw?x^Gn;lHYY8gCZ>IY{2o91VJ}{C=O*D3u&p--k~c{i;aHy3@1fqQ zL1w7u@J;oJH>fHa4$o^x<JhkDX+;wFQp2)}Z_Li4&^Bsj24Q9#$WO(3Nz z=efl{ENz-RdbGj0c5fcA`wVy2KBaLy+tdglD5L;E2WU5EIWq={n;T;mvS6pldV|GR zgMM!)c@v1@C0hv9M_ytswA71wQf--@@P8{FNK#=>t8r?oVAqYiPmR0FjbpFt2qkJw zoaT|$LS8T=NSbU_w;{afHtLvBtr1={txNHQLJy#ev`P~0{o3Kecj-R;*(O5T7^JGB z;;3s&E03HNMb-BPv%#T+OzXNU3o|>AYc6i@>MlLT*eY-ZQs^%r4WNIUUDlXJZ_~Ah z>VDzVt-JV=->H7T*y;C)n24i%)jI8rhQRX@;hx9TrpJ^h%%LDVc0t-o;c;EIXD6Zt zKh1-vKwa4~slL`^^_~CsU;DjZd-mLOkA_#EsP=qZ$+Jr2kqQ0W_G@>I2}~o}%wk?s zk^e;ZGzoQr^I{0J(BfQJGt)oC6XpY0A;b@%w$#WVsK zDomH^A8@2Kw7Ewv7&sb??fBUnZS?3Db5o(=&~bExav7#NI@0Ldbnm&TV!uF`Hit5W=}#-s-yHBfjeZ1>87+Gf7OA~T;@ zsx_|CDY{R%x84(KPg|YoC`9Q4E$?k9Ol;Yqb#AGK6ov%a6;l0)AZn729q_`EUiN5y znIGC#>}hQ}@<9s3{-39mGg+}bE!q;hh^`lS1C81>S>FAyUVX2UgGwZz6&_QvQ;7`Q zfoWv6PBNM|P>aWUwl&7Htn`^Z$KvwRywGV;=ybb2)*r&G1l1iV5l6CPJ|by|gUa5O zIRs^y%8q77wZ&OZrTJ-kjczZK5iYU8qNPmOCddJ(3A;`URsvmQrFtF<0|9{sU;sy_ z6KZ#1o2Yg21+Ic(X=q69MW0_B_rk-bi9g;_p0q8O2Cw{M)AO;CLx_pA|x2qyw zqLZ?MeZtBH*Q~sNht!_}6dhWzuGYh*gJ82{W@}*zPs>TxzRbJr1JcKl|G05<`Rjc9 z>p&<%AZmn`GD?zdX#NZ}r=Vw@1-p(;2Fx6YPNWUi(MECQqC;r3K`KEx>6X}Hi#Efu zug(S{R$}-qr3x$W&GWTK$gDkRDV3dkEsjeTrGbok+iGSNiu*twv*ZnooXPu7oy=!?nIXbHXIJH9P-ZDaW#&Y=o%IoRQn{%tp)fIN$6tspMuWekL ztQ>E|v3J603>sQ!-kgs;6oH{JfH1RPxJ8zDLQgD)}c${waw^v3^cR z(0Xgq!oSj!-&OK&l>A#I|F05lz!K{#2b3F9vQx<)>ggXT>DQ&$Y{8CA#ef(7B@ZmL z##Cv)3*2mlX+5)Sxl?Z*Qqo!Vl+#Phmza{b845p`lyn3!l6on^a*a6msjmB#JfP$u zB?<*7Jgns1O5UU7Q6=wF@_r=}9|}(^`2dOLvn$y;^%M{ujcsNhkviD;*Cc(tyE*2N zooP{mcjpddb5e=3y}8}F9JF(1X3x;z$lx&l?H(K*+{@p6g9it9klQo3YjA9EdT{69 zjf2+>PVTyQFtclB@I$0e@jE)$H#p$X>01VeMs|(t8`(cHHn=;N8Le~PXlCTt;ERKI z4IauVxcZa(-bcN=28Z__7@Qi}F*3$~gM+sXUe89fy@SKM_Uzip-SLtBksEhpb`Q%w vEK4yv%xee#$aU@=?ir 0: + self.read(* args, **keywords) + + + def read(self, filename, keyfunc=firstword, valuefunc = lambda x: x, + errors=True, useIndex=False): + """Read sequences from a Fasta file""" + + if isinstance(filename, str) and useIndex and has_fasta_index(filename): + newkeys = self.index.read(filename) + + # store None's for when indexing should be used + for key in newkeys: + if key not in self: + self.names.append(key) + dict.__setitem__(self, key, None) + else: + for key, seq in iter_fasta(filename, keyfunc, valuefunc): + self.add(key, seq, errors) + + + def write(self, filename=sys.stdout, names=None, width=80): + """Write sequences in Fasta format""" + + out = util.open_stream(filename, "w") + + if names is None: + names = self.names + + for key in names: + print >>out, ">" + key + util.printwrap(self[key], width, out=out) + + + def __getitem__(self, key): + """Get a sequence by key""" + + val = SeqDict.__getitem__(self, key) + + if val is None: + # if val == None, then we are using fasta indexing + val = self.index.get(key) + + # cache value + self[key] = val + return val + else: + return val + + + def getseq(self, key, start=1, end=None, strand=1): + """Get a sequence (or subsequence) by key""" + + val = SeqDict.__getitem__(self, key) + + if val is None: + # if val == None, then we are using fasta indexing + return self.index.get(key, start, end, strand) + + else: + start = util.clamp(start, 1, None) + end = util.clamp(end, 1, None) + val = val[start-1:end] + + # reverse complement if needed + if strand == -1: + val = _revcomp(val) + + return val + + +#============================================================================= +# Convenience functions for input/output +# + + +def read_fasta(filename, keyfunc=firstword, valuefunc = lambda x: x, + errors=True, useIndex=True): + """Read a FASTA file into a sequence dictionary""" + + fa = FastaDict() + fa.read(filename, keyfunc, valuefunc, errors, useIndex=useIndex) + return fa + + +def write_fasta(filename, seqs, order = None, width=None): + """Write a FASTA dictionary into a file""" + + out = util.open_stream(filename, "w") + seqs.write(filename, order, width) + + +def write_fasta_ordered(filename, names, seqs, width=None): + """Write a FASTA in array style to a file""" + + out = util.open_stream(filename, "w") + + for name, seq in izip(names, seqs): + print >>out, ">%s" % name + util.printwrap(seq, width, out=out) + + +def iter_fasta(filename, keyfunc=firstword, valuefunc = lambda x: x): + """Iterate through the sequences of a FASTA file""" + key = "" + value = "" + + for line in util.open_stream(filename): + if len(line) > 0 and line[0] == ">": + if key != "": + yield (key, valuefunc(value)) + key = keyfunc(line[1:].rstrip()) + value = "" + elif key != "": + value += line.rstrip() + if key != "": + yield (key, valuefunc(value)) + + +# DNA complements +_comp = {"A":"T", "C":"G", "G":"C", "T":"A", "N":"N", + "a":"t", "c":"g", "g":"c", "t":"a", "n":"n", + "R":"Y", "Y":"R", "S":"W", "W":"S", "K":"M", "M":"K", + "r":"y", "y":"r", "s":"w", "w":"s", "k":"m", "m":"k", + "B":"V", "V":"B", "D":"H", "H":"D", + "b":"v", "v":"b", "d":"h", "h":"d"} + +def _revcomp(seq): + """Reverse complement a sequence""" + + seq2 = [] + for i in xrange(len(seq)-1, -1, -1): + seq2.append(_comp[seq[i]]) + return "".join(seq2) + + + +#============================================================================= +# Rasmus FASTA Indexing +# + +def make_fasta_index(filename): + """I also have a faster C program called formatfa""" + + infile = util.open_stream(filename) + + index = {} + + for line in util.SafeReadIter(infile): + if line.startswith(">"): + index[line[1:].rstrip()] = infile.tell() + + return index + + +def has_fasta_index(fasta_file): + """Check to see if fasta_file has an index""" + + return os.path.exists(fasta_file + ".index") + + +def guess_fasta_width(fastaFile): + fafile = util.open_stream(fastaFile, "rb") + + numlines = 5 + lineno = 0 + width = -1 + width2 = -1 + maxwidth = 0 + + for line in fafile: + if len(line) != 0 and line[0] != ">": + lineno += 1 + width3 = len(line.rstrip()) + maxwidth = max(maxwidth, width3) + + if width == -1: + # first line + width = width3 + + elif width3 > width: + # widths cannot get bigger + return -1 + + elif width3 == width: + return width + + elif width2 == -1: + # this should be last line in sequence + width2 = width3 + return width + else: + # width got smaller twice + return -1 + else: + # previous sequence had only one line + # rest widths for next sequence + if width2 != -1: + width2 = -1 + else: + width = -1 + + return maxwidth + + + +class FastaIndex: + def __init__(self, *filenames): + self.filelookup = {} + self.index = {} + + for fn in filenames: + self.read(fn) + + + def read(self, filename): + # open fasta + infile = util.open_stream(filename, "rb") + + # estimate column width + self.width = guess_fasta_width(filename) + if self.width == -1: + raise Exception("lines do not have consistent width") + + # read index + keys = [] + for key, start, end in util.DelimReader(filename + ".index", delim="\t"): + keys.append(key) + self.index[key] = (int(start), int(end)) + self.filelookup[key] = infile + + # return keys read + return keys + + + def get(self, key, start=1, end=None, strand=1): + """Get a sequence by key + coordinates are 1-based and end is inclusive""" + + assert start > 0, Exception("must specify coordinates one-based") + assert key in self.index, Exception("key '%s' not in index" % key) + + if end != None and end < start: + return "" + + # must translate from one-based to zero-based + # must account for newlines + filestart, fileend = self.index[key] + start -= 1 + seek = filestart + start + (start // self.width) + + # if seek is past sequence then return empty sequence + if seek >= fileend: + return "" + + # seek to beginning + infile = self.filelookup[key] + infile.seek(seek) + + # read until end of sequence + seq = [] + if end == None: + lenNeeded = util.INF + else: + lenNeeded = end - start + + len2 = 0 + while len2 < lenNeeded: + line = infile.readline() + if line.startswith(">") or len(line) == 0: + break + seq.append(line.rstrip()) + len2 += len(seq[-1]) + if len2 > lenNeeded: + seq[-1] = seq[-1][:-int(len2 - lenNeeded)] + break + seq = "".join(seq) + + # reverse complement if needed + if strand == -1: + seq = _revcomp(seq) + + return seq + + + + + + +#============================================================================= +# FASTA BLAST Indexing +# + +def fasta_get(fasta_file, key, start=0, end=0, strand=1): + """Get a sequence from a fasta file that has been indexed by 'formatdb'""" + + stream = os.popen("fastacmd -d %s -s %s -L %d,%d 2>/dev/null" % + (fasta_file, key, start, end)) + + # remove key + val = stream.read() + if val == "": + raise Exception("no such sequence") + else: + seq = val.split("\n")[1:] + seq = "".join(seq) + + if strand == -1: + seq = _revcomp(seq) + + return seq + + +def has_blast_index(fasta_file): + """Check to see if fasta_file has a formatdb fasta index""" + + return os.path.exists(fasta_file + ".psd") and \ + os.path.exists(fasta_file + ".psi") + + diff --git a/arghmm/deps/compbio/fasta.pyc b/arghmm/deps/compbio/fasta.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d8ec7c925510a9836b9ce4195768c81a426e6890 GIT binary patch literal 10403 zcmcgy&u<(@c78p-$Qf}&niM5bvdq>GT9j>xw&Wez8(F)uXxU2Ew9|A}Fk@oUobDl; zoar8R^-yF$B1k0o5@fT;B?u7YzQ;WUIph)`ryz%5v42C($tinEzVB7f4CNr$g9jnm zudA!7tE=Ao-j904fBVmR?Snu1_up?x{&NQRALBB=aYO=<4Nymdg6ui6fr2d;WTT+v zqU;r=Q<9C6IBYA)URgR7*{I;TEI~!~CZsbd8o+srcK2VdOs)aKW z)FiA*8{?c|%?pvQH>M;+!xW#xI{$*Yyu=63a#H*|D=#sf87)uCOHA`MePxEP1g~l9 ztn9oY!K?&x(g@B;Fpt~3pwvJO>Knm%!Cxb|Ai+i479_ZY+a*D%fg02|g3E%xM)0}> zS8%%`C^Z5MyNKJOpwtLl39jOHRZwaK*Ce=(+jT*y5!{eq3AZIdsS&&>!A;z53QCP& zS%O=*-4c`MW^qv;5s=Hdi zg6?SnOL|{|4<+~kAC;$AP(%M9q5JZ(C>vPt2ihA;F39@w4+(*{L25M@Kc6Q-zZ1Ic zBz2!B>0T6XyU(L+$9?i>{i{cAJL-g{R*U`{KWodtjbtT|=#Qh`=&7GYorrC=s)CXU z_dC&+eV)McdbqzDwXzm_@hakajLXb`9Am%%7GH3RY+cLP@2w0ZAD82#VXx!2LOXC7 z6$gH&A1-Sr67vzrN;l3{wxaD7KW*(q2jNODO?JapW>B%c)9ns!LD(}Zt)$!AijtMK zZ!-T*?_G*=w>@$Y?PXe ztPNb>Xw6=Dgyjxa#YLE2C(7)axO8+F>x!xvQ>VdMT;%t3L4}MBPq=QC4IcD|Gx`{p zX@Z16Ae0tVtpWjAl(0l7p&XVatVmE)wVjY~Qlct#K0t@Eq7MpQQS?noFriJLunqub zUA*;Wa@+h9$aIsUK%y7`Rz77%z%dRL1Y(i)j$1x$ur$zt;=> z)Xh6kKknqyP<&8+yuRSNRzUz@=mt>o!VxDcP)qU9EK-q<9oE9Q?v^AuCl0YxX6}!vLN*D~Y({aEHJgfBWx~@q2oE#LFwzpccEVWIHVyqil}n#Y z*lBA4T48iF@PrUC<3yiV#{`ye8O~RndSR+CH_|04`D3=0K(PMx(YLKau*2b~atsR2 zc|7R&-L1fXc!!_HB`xxm`asU5(ua)$&QNAiZ1y>E3~3S18JUA z2uX>HvioMRE2&l4tRKaSrbZ1Z(kd zTx8Sg=AwL4;J7FHPIN{;fO5&c`B(A`gXDI|#S%Qo+=V&rhjA-3ZaYo7F0_nVuhxBS zd5R0A9!4gLQH_2XQDd?+o5p>|H_bNt18cyR$2#vd5Jg!G6)_pPN6m->MHH1bW4zI9 znlOv9unUM%!fQz!+HYlHYDd*?`9ZKu?t8N!ibZM`#|C@UBDVW+OAVGT}`+@P82Bv6g}tg1u5NdW{VS2aq7-F=W=0Arkvjc<=buG#6q$KqLqD~4zcpNs^xVm{qPGA~JQiA0L3@;R({w{~X zt3Xa!7Jv`fVZxx`IQ#bigA!msG@C_`XDOs`3J0)XIR=un>u2h0pDTT+>`MCCn!fE( z0aca>#M=ZYLci<1flBWZTPU?&8pYZ3)bA~GnJPo?>aYjr@H`5#9q%pHQ6l4%gre{{ zm-i!#Z0M7vI9sU0(oi60off?v0nJNg@EHoZ3{=4>18`M69A$8IAcK)sYcpBx$OCj- zcSuH>#xUX2FmwGOi`}gw`k0)F{B*7m+XIELOr$m`$5F4g*|KklTflvs-11iNy`SJR zRC{GqVYXOv$_0(!S`-$AI5dNQgNN_(3P417Sya4UdX-mT3y??{0)7r;Lu03mMSmDD z@HwvpUZDWC5^rEk?RF8bU$syN(syBZXVgZb11u&Qs1$fu$E)khR4?-;$ak2(jBwWU zw+43aRxS-)V;;;-;w^Qg%94>Vym#54s3#Lz@Dx4kYt!(6(c&qPtCZLa)-lagj5L8{ zAQZ)q1MfW)$0&QcJ@Gpxw3eRuHAmLYn=F8JuneOGwEVk|(rBSt`u@J! zBS;xdt6Epem14D8b*5+L>(!YSMKrint^bb@!lQDIrZ$FIoyUL&oE0t^sth@*^SFfq zROW@K$%YDO{5N#tfW7L6qcFBRhIjCadg@mKU|7|}^+zUU6bpMxctKxP7O9XdF4gW{ z<8aqWZji7uLQm`h{BiO@9XVr4>&TTOguaxj&g+i4KI#U=a+)ue9pFOl`W%U1G+|vzfab0WHei9QNH@F_rX`L9kqTq(!7zMXR_15tsQ(x6 zm@HBUh=@N$28#T&@EaLCir*tH6@Hy49&2{UV=S#gW$-J7ic7m({Y>eq(gUSiN_Uh7swqS%CGi-S`2u7cPRx|ciwI<+~@12%eeFFXiSy$53(aCQ)pi08P%o;NR5+M&qs z;O`RDa*#1Kbazi(1Z|1C;~#|JC%5g$T|IV@0o_jhuG{iE9VEJt?Zu|A?T>WL>Jkm5 z+W3rt*Sg;h8I(|w)z@p3YnUEZ)@EU+<2^w~>maV8q(X{f)o9HVe9C5AV>A5}eb?U$ zPp2gQ9(DP~Z@OR=a>ltFho+H0Ym>Y zE}PXY>Z+@F9>!f$;KLAtgci2DP-5NmLObaRN=!nK>b{OStY0(sSl#?r=9=h)G+~9_ zbTfYqTq0RjgcB-j#kbalROCJev%N=-W|E4?@MKcItlHWTgmn|^o)agKUz$W|KEsnHDQRbg_v{f}$Odlkd8_{tXwTk*QO4s>~qtpnviBQdvKXWuSvJK}_l*5*e1ns~`CAnY^Y+pFe>gH$i4Rl^rom&a`8`Vg1 z5J>?j5K1gmSU?1c&oVz<(!3WRs`{|+YebkUlgA)e706seNkQ)hi5n-G^+l~DHn4_t z^NPDu22_1`5O$)jq2#K1CUi>i@J+j`P*HuN$2T|;(sbG?U!#u-QyUKuJ=hmoNuS`s z?opT~v5{gIB{kE-^sqjY z1CZ>w20;jz*eSqRBdHBY0Y)b^iD|iYJ{R$XMV_3I!@redoMhB=ia$fU9wffNC{uDg zB@y~59D8#Z2py-0P4dUOhA%ZISe3~3t_$8o2M1%aH_js`gz@Nod@BDAyv#Lpuua+7 zy!s+KopEjLgD?0014acGQ|Zw($3ZjK1(ll^rY2`b4f!hulHv2}Z-`qk~@Oxg6pR@6RvWe6Ov z*Z~ZaUVylJbIIIPHN=MBp5Lg$qo!G3XOH&vJcs7|kuiw&pkJTxWX1ba_R~mCQ~uo7 zc~mP^A;d^~s;KG`pRPSoXU2>W4S=`D&cje$BSjCA5fAVXji8r_`%%Jsq={Yw4MWpx zv#Yd~wB=xn_buP+lH440gau#2A#P|-pzw6p`xI?s)#-OJ;5>Eu0SZ#O;@l`)a>}JS zc*=PsUdy=Z&~TnT)Zi(LQg^1D>rUC3!?94^xq^Pvd4iS!qW1yGha}1f3c==8`JvIw zweN&-fnxIKEa8i%i;m<*=D}t862+-od8Wg2nQVRo%0V4qm zLLG(L~N?r`hO}hb_M;`&TyBi?y5H0t6Eo+kac{g#{)BU16GR| zlb?b<`R%17nz9%=Aq#yDZ5Yy-I2`N^c^TjD(b*3=K z*bny>Z3V9)E=9-NC|GpRhiDp$+bI4E!zR|I`55UQfJe_;Nu^(e9!s9A^9N#g{MR_BnVg=O cs?E;62^;tpeqVPM3)QputyFK%J)1rEKhQWh?EnA( literal 0 HcmV?d00001 diff --git a/arghmm/deps/compbio/phylo.py b/arghmm/deps/compbio/phylo.py new file mode 100644 index 00000000..04734aeb --- /dev/null +++ b/arghmm/deps/compbio/phylo.py @@ -0,0 +1,2769 @@ +# +# Phylogeny functions +# Matt Rasmussen 2006-2012 +# + + +# python imports +import math +import os +import random +import sys + + +# rasmus imports +from rasmus import stats +from rasmus import treelib +from rasmus import util + +# compbio imports +from . import fasta + + + +#============================================================================= +# Gene to species mapping functions + + +def gene2species(genename): + """Default gene2species mapping""" + return genename + + +def make_gene2species(maps): + """Returns a function that maps gene names to species names + + maps -- a list of tuples [(gene_pattern, species_name), ... ] + """ + + # find exact matches and expressions + exacts = {} + exps = [] + for mapping in maps: + if "*" not in mapping[0]: + exacts[mapping[0]] = mapping[1] + else: + exps.append(mapping) + + # create mapping function + def gene2species(gene): + # eval expressions first in order of appearance + for exp, species in exps: + if exp[-1] == "*": + if gene.startswith(exp[:-1]): + return species + elif exp[0] == "*": + if gene.endswith(exp[1:]): + return species + + if gene in exacts: + return exacts[gene] + + raise Exception("Cannot map gene '%s' to any species" % gene) + return gene2species + + +def read_gene2species(* filenames): + """ + Reads a gene2species file + + Returns a function that will map gene names to species names. + """ + + for filename in filenames: + maps = [] + for filename in filenames: + maps.extend(util.read_delim(util.skip_comments( + util.open_stream(filename)))) + return make_gene2species(maps) + + +#============================================================================= +# Reconciliation functions +# + + +def reconcile(gtree, stree, gene2species=gene2species): + """ + Returns a reconciliation dict for a gene tree 'gtree' and species tree 'stree' + """ + + recon = {} + + # determine the preorder traversal of the stree + order = {} + def walk(node): + order[node] = len(order) + node.recurse(walk) + walk(stree.root) + + + # label gene leaves with their species + for node in gtree.leaves(): + recon[node] = stree.nodes[gene2species(node.name)] + + # recurse through gene tree + def walk(node): + node.recurse(walk) + + if not node.is_leaf(): + # this node's species is lca of children species + recon[node] = reconcile_lca(stree, order, + util.mget(recon, node.children)) + walk(gtree.root) + + return recon + + +def reconcile_lca(stree, order, nodes): + """Helper function for reconcile""" + + # handle simple and complex cases + if len(nodes) == 1: + return nodes[0] + if len(nodes) > 2: + return treelib.lca(nodes) + + # 2 node case + node1, node2 = nodes + index1 = order[node1] + index2 = order[node2] + + while index1 != index2: + if index1 > index2: + node1 = node1.parent + index1 = order[node1] + else: + node2 = node2.parent + index2 = order[node2] + return node1 + + +def reconcile_node(node, stree, recon): + """Reconcile a single gene node to a species node""" + return treelib.lca([recon[x] for x in node.children]) + + +def label_events(gtree, recon): + """Returns a dict with gene node keys and values indicating + 'gene', 'spec', or 'dup'""" + events = {} + + def walk(node): + events[node] = label_events_node(node, recon) + node.recurse(walk) + walk(gtree.root) + + return events + + +def label_events_node(node, recon): + if not node.is_leaf(): + if recon[node] in map(lambda x: recon[x], node.children): + return "dup" + else: + return "spec" + else: + return "gene" + + +def find_loss_node(node, recon): + """Finds the loss events for a branch in a reconciled gene tree""" + loss = [] + + # if not parent, then no losses + if not node.parent: + return loss + + # determine starting and ending species + sstart = recon[node] + send = recon[node.parent] + + # determine species path of this gene branch (node, node.parent) + ptr = sstart + spath = [] + while ptr != send: + spath.append(ptr) + ptr = ptr.parent + + # determine whether node.parent is a dup + # if so, send (species end) is part of species path + if label_events_node(node.parent, recon) == "dup": + spath.append(send) + + # go up species path (skip starting species) + # every node on the list is at least one loss + for i, snode in enumerate(spath[1:]): + for schild in snode.children: + if schild != spath[i]: + loss.append([node, schild]) + + return loss + + +def find_loss_under_node(node, recon): + loss = [] + snodes = {} + internal = {} + species1 = recon[node] + + # walk from child species to parent species + for child in node.children: + ptr = recon[child] + snodes[ptr] = 1 + while ptr != species1: + ptr = ptr.parent + snodes[ptr] = 1 + internal[ptr] = 1 + + # foreach internal node in partial speciation tree, all children + # not in speciation are loss events + for i in internal: + for child in i.children: + if child not in snodes: + loss.append([node,child]) + return loss + + +def find_loss(gtree, stree, recon, node=None): + """Returns a list of gene losses in a gene tree""" + loss = [] + + def walk(node): + loss.extend(find_loss_node(node, recon)) + node.recurse(walk) + if node: + walk(node) + else: + walk(gtree.root) + + return loss + + +def count_dup(gtree, events, node=None): + """Returns the number of duplications in a gene tree""" + var = {"dups": 0} + + def walk(node): + if events[node] == "dup": + var["dups"] += len(node.children) - 1 + node.recurse(walk) + if node: + walk(node) + else: + walk(gtree.root) + + return var["dups"] + + +def count_dup_loss(gtree, stree, recon, events=None): + """Returns the number of duplications + losses in a gene tree""" + if events is None: + events = label_events(gtree, recon) + + nloss = len(find_loss(gtree, stree, recon)) + ndups = count_dup(gtree, events) + return nloss + ndups + + +def find_species_roots(tree, stree, recon): + """Find speciation nodes in the gene tree that reconcile to the + species tree root""" + + roots = [] + def walk(node): + found = False + for child in node.children: + found = walk(child) or found + if not found and recon[node] == stree.root: + roots.append(node) + found = True + return found + walk(tree.root) + return roots + + +def find_orthologs(gtree, stree, recon, events=None, counts=True): + """Find all ortholog pairs within a gene tree""" + + if events is None: + events = label_events(gtree, recon) + orths = [] + + for node, event in events.items(): + if event == "spec": + leavesmat = [x.leaves() for x in node.children] + sp_counts = [util.hist_dict(util.mget(recon, row)) + for row in leavesmat] + + for i in range(len(leavesmat)): + for j in range(i+1, len(leavesmat)): + for gene1 in leavesmat[i]: + for gene2 in leavesmat[j]: + if gene1.name > gene2.name: + g1, g2 = gene2, gene1 + a, b = j, i + else: + g1, g2 = gene1, gene2 + a, b = i, j + + if not counts: + orths.append((g1.name, g2.name)) + else: + orths.append((g1.name, g2.name, + sp_counts[a][recon[g1]], + sp_counts[b][recon[g2]])) + + return orths + + + +def subset_recon(tree, recon, events=None): + """Ensure the reconciliation only refers to nodes in tree""" + + # get all nodes that are walkable + nodes = set(tree.postorder()) + for node in list(recon): + if node not in nodes: + del recon[node] + if events: + for node in list(events): + if node not in nodes: + del events[node] + + + +#============================================================================= +# Reconciliation Input/Output + +def write_recon(filename, recon): + """Write a reconciliation to a file""" + util.write_delim(filename, [(str(a.name), str(b.name)) + for a,b in recon.items()]) + + +def read_recon(filename, tree1, tree2): + """Read a reconciliation from a file""" + recon = {} + for a, b in util.read_delim(filename): + if a.isdigit(): a = int(a) + if b.isdigit(): b = int(b) + recon[tree1.nodes[a]] = tree2.nodes[b] + return recon + + +def write_events(filename, events): + """Write events data structure to file""" + util.write_delim(filename, [(str(a.name), b) for a,b in events.items()]) + + +def read_events(filename, tree): + """Read events data structure from file""" + events = {} + for name, event in util.read_delim(filename): + if name.isdigit(): name = int(name) + events[tree.nodes[name]] = event + return events + + +def write_recon_events(filename, recon, events=None, noevent=""): + """Write a reconciliation and events to a file""" + + if events is None: + events = dict.fromkeys(recon.keys(), noevent) + + util.write_delim(filename, [(str(a.name), str(b.name), events[a]) + for a,b in recon.items()]) + + +def read_recon_events(filename, tree1, tree2): + """Read a reconciliation and events data structure from file""" + + recon = {} + events = {} + for a, b, event in util.read_delim(filename): + if a.isdigit(): a = int(a) + if b.isdigit(): b = int(b) + node1 = tree1.nodes[a] + recon[node1] = tree2.nodes[b] + events[node1] = event + return recon, events + + +#============================================================================ +# duplication loss counting +# + + +def init_dup_loss_tree(stree): + # initalize counts to zero + def walk(node): + node.data['dup'] = 0 + node.data['loss'] = 0 + node.data['appear'] = 0 + node.data['genes'] = 0 + node.recurse(walk) + walk(stree.root) + + +def count_dup_loss_tree(tree, stree, gene2species, recon=None): + """count dup loss""" + + if recon is None: + recon = reconcile(tree, stree, gene2species) + events = label_events(tree, recon) + losses = find_loss(tree, stree, recon) + + dup = 0 + loss = 0 + appear = 0 + + # count appearance + recon[tree.root].data["appear"] += 1 + appear += 1 + + # count dups + for node, event in events.iteritems(): + if event == "dup": + recon[node].data['dup'] += 1 + dup += 1 + elif event == "gene": + recon[node].data['genes'] += 1 + + # count losses + for gnode, snode in losses: + snode.data['loss'] += 1 + loss += 1 + + return dup, loss, appear + + +def count_ancestral_genes(stree): + """count ancestral genes""" + def walk(node): + if not node.is_leaf(): + counts = [] + for child in node.children: + walk(child) + counts.append(child.data['genes'] + - child.data['appear'] + - child.data['dup'] + + child.data['loss']) + assert util.equal(* counts), str(counts) + node.data['genes'] = counts[0] + walk(stree.root) + + +def count_dup_loss_trees(trees, stree, gene2species): + """ + Returns new species tree with dup,loss,appear,genes counts in node's data + """ + + stree = stree.copy() + init_dup_loss_tree(stree) + + for tree in trees: + count_dup_loss_tree(tree, stree, gene2species) + count_ancestral_genes(stree) + + return stree + + +def write_event_tree(stree, out=sys.stdout): + labels = {} + for name, node in stree.nodes.iteritems(): + labels[name] = "[%s]\nD=%d,L=%d;\nG=%d;" % \ + (str(name), + node.data['dup'], node.data['loss'], + node.data['genes']) + + treelib.draw_tree(stree, labels=labels, minlen=15, spacing=4, + labelOffset=-3, + out=out) + + +def dup_consistency(tree, recon, events): + """ + Calculate duplication consistency scores for a reconcilied tree + + See Vilella2009 (Ensembl) + """ + + if len(tree.leaves()) == 1: + return {} + + spset = {} + def walk(node): + for child in node.children: + walk(child) + if node.is_leaf(): + spset[node] = set([recon[node]]) + elif len(node.children) == 1: + pass + elif len(node.children) == 2: + spset[node] = (spset[node.children[0]] | + spset[node.children[1]]) + else: + raise Exception("too many children (%d)" % len(node.children)) + walk(tree.root) + + conf = {} + for node in tree: + if events[node] == "dup": + conf[node] = (len(spset[node.children[0]] & + spset[node.children[1]]) / + float(len(spset[node]))) + + return conf + + +#============================================================================= +# tree rooting + + +def recon_root(gtree, stree, gene2species = gene2species, + rootby = "duploss", newCopy=True): + """Reroot a tree by minimizing the number of duplications/losses/both""" + + # make a consistent unrooted copy of gene tree + if newCopy: + gtree = gtree.copy() + + if len(gtree.leaves()) == 2: + return + + treelib.unroot(gtree, newCopy=False) + treelib.reroot(gtree, + gtree.nodes[sorted(gtree.leaf_names())[0]].parent.name, + onBranch=False, newCopy=False) + + + # make recon root consistent for rerooting tree of the same names + # TODO: there is the possibility of ties, they are currently broken + # arbitrarily. In order to make comparison of reconRooted trees with + # same gene names accurate, hashOrdering must be done, for now. + hash_order_tree(gtree, gene2species) + + # get list of edges to root on + edges = [] + def walk(node): + edges.append((node, node.parent)) + if not node.is_leaf(): + node.recurse(walk) + edges.append((node, node.parent)) + for child in gtree.root.children: + walk(child) + + + # try initial root and recon + treelib.reroot(gtree, edges[0][0].name, newCopy=False) + recon = reconcile(gtree, stree, gene2species) + events = label_events(gtree, recon) + + # find reconciliation that minimizes loss + minroot = edges[0] + rootedge = sorted(edges[0]) + if rootby == "dup": + cost = count_dup(gtree, events) + elif rootby == "loss": + cost = len(find_loss(gtree, stree, recon)) + elif rootby == "duploss": + cost = count_dup_loss(gtree, stree, recon, events) + else: + raise "unknown rootby value '%s'" % rootby + mincost = cost + + + # try rooting on everything + for edge in edges[1:]: + if sorted(edge) == rootedge: + continue + rootedge = sorted(edge) + + node1, node2 = edge + if node1.parent != node2: + node1, node2 = node2, node1 + assert node1.parent == node2, "%s %s" % (node1.name, node2.name) + + # uncount cost + if rootby in ["dup", "duploss"]: + if events[gtree.root] == "dup": + cost -= 1 + if events[node2] == "dup": + cost -= 1 + if rootby in ["loss", "duploss"]: + cost -= len(find_loss_under_node(gtree.root, recon)) + cost -= len(find_loss_under_node(node2, recon)) + + # new root and recon + treelib.reroot(gtree, node1.name, newCopy=False) + + recon[node2] = reconcile_node(node2, stree, recon) + recon[gtree.root] = reconcile_node(gtree.root, stree, recon) + events[node2] = label_events_node(node2, recon) + events[gtree.root] = label_events_node(gtree.root, recon) + + if rootby in ["dup", "duploss"]: + if events[node2] == "dup": + cost += 1 + if events[gtree.root] == "dup": + cost += 1 + if rootby in ["loss", "duploss"]: + cost += len(find_loss_under_node(gtree.root, recon)) + cost += len(find_loss_under_node(node2, recon)) + + # keep track of min cost + if cost < mincost: + mincost = cost + minroot = edge + + + # root tree by minroot + if edge != minroot: + node1, node2 = minroot + if node1.parent != node2: + node1, node2 = node2, node1 + assert node1.parent == node2 + treelib.reroot(gtree, node1.name, newCopy=False) + + return gtree + + +def midroot_recon(tree, stree, recon, events, params, generate): + + node1, node2 = tree.root.children + + specs1 = [] + specs2 = [] + + # find nearest specs/genes + def walk(node, specs): + if events[node] == "dup": + for child in node.children: + walk(child, specs) + else: + specs.append(node) + #walk(node1, specs1) + #walk(node2, specs2) + specs1 = node1.leaves() + specs2 = node2.leaves() + + def getDists(start, end): + exp_dist = 0 + obs_dist = 0 + + sstart = recon[start] + send = recon[end] + while sstart != send: + exp_dist += params[sstart.name][0] + sstart = sstart.parent + + while start != end: + obs_dist += start.dist + start = start.parent + + return exp_dist, obs_dist / generate + + diffs1 = [] + for spec in specs1: + if events[tree.root] == "spec": + exp_dist1, obs_dist1 = getDists(spec, tree.root) + else: + exp_dist1, obs_dist1 = getDists(spec, node1) + diffs1.append(obs_dist1 - exp_dist1) + + diffs2 = [] + for spec in specs2: + if events[tree.root] == "spec": + exp_dist2, obs_dist2 = getDists(spec, tree.root) + else: + exp_dist2, obs_dist2 = getDists(spec, node2) + diffs2.append(obs_dist2 - exp_dist2) + + totdist = (node1.dist + node2.dist) / generate + + left = node1.dist - stats.mean(diffs1) + right = totdist - node2.dist + stats.mean(diffs2) + + #print diffs1, diffs2 + #print stats.mean(diffs1), stats.mean(diffs2) + + mid = util.clamp((left + right) / 2.0, 0, totdist) + + node1.dist = mid * generate + node2.dist = (totdist - mid) * generate + + + +def stree2gtree(stree, genes, gene2species): + """Create a gene tree with the same topology as the species tree""" + + tree = stree.copy() + + for gene in genes: + tree.rename(gene2species(gene), gene) + return tree + + +#============================================================================= +# relationships +# encoded using gene name tuples + + +def get_gene_dups(tree, events): + """Returns duplications as gene name tuples""" + return set(tuple(sorted([tuple(sorted(child.leaf_names())) + for child in node.children])) + for node, kind in events.iteritems() + if kind == "dup") + +def get_speciations(tree, events): + """Returns speciations as gene name tuples""" + return set(tuple(sorted([tuple(sorted(child.leaf_names())) + for child in node.children])) + for node, kind in events.iteritems() + if kind == "spec") + + +def get_gene_losses(tree, stree, recon): + """Returns losses as gene name, species name tuples""" + return set((loss[0].name, loss[1].name) + for loss in find_loss(tree, stree, recon)) + + +def get_orthologs(tree, events): + """Returns orthologs as gene name pairs""" + + specs = [sorted([sorted(child.leaf_names()) + for child in node.children]) + for node in events + if events[node] == "spec"] + + return set(tuple(sorted((a, b))) + for x in specs + for a in x[0] + for b in x[1]) + + +#============================================================================= +# Tree hashing +# + +def hash_tree_compose(child_hashes, node=None): + return "(%s)" % ",".join(child_hashes) + + +def hash_tree(tree, smap=lambda x: x, compose=hash_tree_compose): + def walk(node): + if node.is_leaf(): + return smap(node.name) + else: + child_hashes = map(walk, node.children) + child_hashes.sort() + return compose(child_hashes, node) + + if isinstance(tree, treelib.Tree) or hasattr(tree, "root"): + return walk(tree.root) + elif isinstance(tree, treelib.TreeNode): + return walk(tree) + else: + raise Exception("Expected Tree object") + + +def hash_order_tree(tree, smap = lambda x: x): + def walk(node): + if node.is_leaf(): + return smap(node.name) + else: + child_hashes = map(walk, node.children) + ind = util.sortindex(child_hashes) + child_hashes = util.mget(child_hashes, ind) + node.children = util.mget(node.children, ind) + return hash_tree_compose(child_hashes) + walk(tree.root) + + +#============================================================================= +# branch-based reconciliations +# useful for modeling HGT + +''' + +brecon = {node: branch_path, ...} +branch_path = [(snode, event), ...] + +The path is from parent to node, so that the last pair is the actual +node reconciliation (mapping). + +event is one of the following: + gene -- leaf node + dup -- duplication + spec -- speciation + specloss -- speciation point but only one lineage survives (other is lost) + trans -- transfer event, one child horizontally transferred + transloss -- transfer event, native copy is lost + + +These events happen at the end of a branch_path: + gene, dup, spec, trans + +These events happen everywhere else: + specloss, transloss + +''' + + +def brecon2recon_events(brecon): + """ + Returns 'recon' and 'events' data structures from a branch reconciliation + """ + recon = {} + events = {} + + for node, branch_path in brecon.iteritems(): + recon[node] = branch_path[-1][0] + events[node] = branch_path[-1][1] + + return recon, events + + +def recon_events2brecon(recon, events): + """ + Returns a branch reconciliation from 'recon' and 'events' data structures + """ + + brecon = {} + for node, snode in recon.iteritems(): + branch = [] + if node.parent: + sparent = recon[node.parent] + if sparent != snode: + if events[node.parent] == "dup": + branch.append((sparent, "specloss")) + + losses = [] + ptr = snode.parent + while ptr != sparent: + losses.append((ptr, "specloss")) + ptr = ptr.parent + + branch.extend(reversed(losses)) + + branch.append((snode, events[node])) + brecon[node] = branch + + return brecon + + +def subtree_brecon_by_leaves(tree, brecon, leaves): + """ + Find a subtree of tree and branch reconciliation 'brecon' + + tree -- tree to subset + brecon -- branch reconciliation + leaves -- leaf nodes to keep in tree + """ + + # record orignal parent pointers + parents = dict((node, node.parent) for node in tree) + + # NOTE: calculating doomed nodes requires single children + nnodes = len(tree.nodes) + treelib.subtree_by_leaves(tree, leaves, keep_single=True) + doomed = nnodes - len(tree.nodes) + + # now remove single children + treelib.remove_single_children(tree) + + # modify brecon structure + for node in tree: + # find path for branch + ptr = node + path = [] + while ptr != node.parent: + path.append(ptr) + ptr = parents[ptr] + path.reverse() + + # concatenate brecon info + if len(path) == 1: + continue + else: + branch_path = [] + for node2 in path: + size = len(brecon[node2]) + for i, (snode, event) in enumerate(brecon[node2]): + if node2 == node and i == size - 1: + # last event does not need editing + branch_path.append((snode, event)) + elif event == "trans": + branch_path.append((snode, "transloss")) + elif event == "spec": + branch_path.append((snode, "specloss")) + elif event == "dup": + # skip these events, they're "doomed" + continue + elif event in ("transloss", "specloss"): + # these events don't need editing + branch_path.append((snode, event)) + else: + raise Exception("unknown event '%s'" % + str((snode, event))) + + # post process path: remove transloss where destination lineage + # is lost. + remove = [i for i in xrange(len(branch_path)-1, -1, -1) + if (branch_path[i][1] == "transloss" and + branch_path[i][0] == branch_path[i+1][0])] + for i in remove: + del branch_path[i] + + brecon[node] = branch_path + + # remove unused nodes from brecon + for node in brecon.keys(): + if node.name not in tree: + del brecon[node] + + return doomed + + +def add_implied_spec_nodes_brecon(tree, brecon): + """ + adds speciation nodes to tree that are implied but are not present + because of gene losses + """ + + for node, events in brecon.items(): + for sp, event in events: + if event == "specloss": + parent = node.parent + children = parent.children + node2 = tree.new_node() + + node2.parent = parent + children[children.index(node)] = node2 + + node.parent = node2 + node2.children.append(node) + + brecon[node2] = [[sp, "spec"]] + + elif event == "transloss": + + parent = node.parent + children = parent.children + node2 = tree.new_node() + + node2.parent = parent + children[children.index(node)] = node2 + + node.parent = node2 + node2.children.append(node) + + brecon[node2] = [[sp, "trans"]] + + + brecon[node] = events[-1:] + + + +def write_brecon(out, brecon): + """ + Writes a branch reconciliation to file + """ + + for node, branch_path in brecon.iteritems(): + out.write(str(node.name)) + for snode, event in branch_path: + out.write("\t" + str(snode.name) + "\t" + event) + out.write("\n") + + +def read_brecon(infile, tree, stree): + """ + Reads branch reconciliation from file + """ + + brecon = {} + + for line in infile: + tokens = line.rstrip().split("\t") + + # parse node + node_name = tokens[0] + if node_name.isdigit(): + node_name = int(node_name) + node = tree[node_name] + + events = [] + for i in range(1, len(tokens), 2): + snode_name = tokens[i] + event = tokens[i+1] + + if snode_name.isdigit(): + snode_name = int(snode_name) + snode = stree[snode_name] + + events.append([snode, event]) + + brecon[node] = events + + return brecon + + +def find_bevents(brecon): + """ + Iterates over branch events (bevents) implied by a branch reconciliation + + Events have the format + (gene_node, 'v'|'e', event, details) + where gene_node is the vertex ('v') or edge ('e') where the event occurs + and (event, details) are one of the following + + 'spec', snode = speciation event at species node snode + 'gene', snode = extant gene (leaf) at species node snode + 'dup', snode = duplication event along species branch snode + 'loss', snode = loss event occuring along species branch snode + 'trans', (src, dst) = horizontal transfer event from source species src + to destination species dst + + """ + + for node, branch_path in brecon.iteritems(): + for i, (snode, event) in enumerate(branch_path): + if event == "dup": + yield (node, "v", "dup", snode) + elif event == "spec": + yield (node, "v", "spec", snode) + elif event == "gene": + yield (node, "v", "gene", snode) + elif event == "specloss": + yield (node, "e", "spec", snode) + + # mark the species branch in which we expected a gene lineage + # but it is absent + next_snode = branch_path[i+1][0] + for schild in snode.children: + if schild != next_snode: + yield (node, "e", "loss", schild) + elif event == "trans": + # the target species is the species that one of the children + # map to + assert len(node.children) == 2, len(node.children) + starget = brecon[node.children[0]][0][0] + if starget == snode: + starget = brecon[node.children[1]][0][0] + assert starget != snode + yield (node, "v", "trans", (snode, starget)) + + elif event == "transloss": + # the gene is lost in this species + yield (node, "e", "loss", snode) + + # it transfers to the next species + yield (node, "e", "trans", (snode, branch_path[i+1][0])) + + else: + raise Exception("unknown event '%s'" % event) + + +def write_bevents(out, bevents): + """ + Writes branch events to file + """ + + for node, kind, event, details in bevents: + if event == "trans": + out.write("%s\t%s\t%s\t%s\t%s\n" % + (str(node.name), kind, event, str(details[0].name), + str(details[1].name))) + else: + out.write("%s\t%s\t%s\t%s\n" % + (str(node.name), kind, event, str(details.name))) + + + + +#============================================================================= +# add implied speciation nodes to a gene tree + + + +def add_spec_node(node, snode, tree, recon, events): + """ + insert new speciation node above gene node 'node' from gene tree 'tree' + + new node reconciles to species node 'snode'. Modifies recon and events + accordingly + """ + + newnode = treelib.TreeNode(tree.new_name()) + parent = node.parent + + # find index of node in parent's children + nodei = parent.children.index(node) + + # insert new node into tree + tree.add_child(parent, newnode) + parent.children[nodei] = newnode + parent.children.pop() + tree.add_child(newnode, node) + + # add recon and events info + recon[newnode] = snode + events[newnode] = "spec" + + return newnode + + +def add_implied_spec_nodes(tree, stree, recon, events): + """ + adds speciation nodes to tree that are implied but are not present + because of gene losses + """ + + added_nodes = [] + + for node in list(tree): + # process this node and the branch above it + + # handle root node specially + if node.parent is None: + # ensure root of gene tree properly reconciles to + # root of species tree + if recon[node] == stree.root: + continue + tree.root = treelib.TreeNode(tree.new_name()) + tree.add_child(tree.root, node) + recon[tree.root] = stree.root + events[tree.root] = "spec" + added_nodes.append(tree.root) + + # determine starting and ending species + sstart = recon[node] + send = recon[node.parent] + + # the species path is too short to have implied speciations + if sstart == send: + continue + + parent = node.parent + + # determine species path of this gene branch (node, node->parent) + snode = sstart.parent + + while snode != send: + added_nodes.append(add_spec_node(node, snode, tree, recon, events)) + node = node.parent + snode = snode.parent + + + # determine whether node.parent is a dup + # if so, send (a.k.a. species end) is part of species path + if events[parent] == "dup": + added_nodes.append(add_spec_node(node, send, tree, recon, events)) + + return added_nodes + + +#============================================================================= +# reconciliation rearrangements + +def change_recon_up(recon, node, events=None): + """ + Move the mapping of a node up one branch + """ + + if events is not None and events[node] == "spec": + # promote speciation to duplication + # R'(v) = e(R(u)) + events[node] = "dup" + else: + # R'(v) = p(R(u)) + recon[node] = recon[node].parent + + +def change_recon_down(recon, node, schild, events=None): + """ + Move the mapping of a node down one branch + """ + + if events is not None and recon[node] == schild: + events[node] = "spec" + else: + recon[node] = schild + + +def can_change_recon_up(recon, node, events=None): + """Returns True is recon can remap node one 'step' up""" + + if events is not None and events[node] == "spec" and not node.is_leaf(): + # promote speciation to duplication + return True + else: + # move duplication up one branch + rnode = recon[node] + prnode = rnode.parent + + # rearrangement is valid if + return (not node.is_leaf() and + prnode is not None and # 1. there is parent sp. branch + (node.parent is None or # 2. no parent to restrict move + rnode != recon[node.parent] # 3. not already matching parent + )) + + +def enum_recon(tree, stree, depth=None, + step=0, preorder=None, + recon=None, events=None, + gene2species=None): + """ + Enumerate reconciliations between a gene tree and a species tree + """ + + if recon is None: + recon = reconcile(tree, stree, gene2species) + events = label_events(tree, recon) + + if preorder is None: + preorder = list(tree.preorder()) + + # yield current recon + yield recon, events + + if depth is None or depth > 0: + for i in xrange(step, len(preorder)): + node = preorder[i] + if can_change_recon_up(recon, node, events): + schild = recon[node] + change_recon_up(recon, node, events) + + # recurse + depth2 = depth - 1 if depth is not None else None + for r, e in enum_recon(tree, stree, depth2, + i, preorder, + recon, events): + yield r, e + + change_recon_down(recon, node, schild, events) + + + + +#============================================================================= +# local rearrangements + + +def perform_nni(tree, node1, node2, change=0, rooted=True): + """Proposes a new tree using Nearest Neighbor Interchange + + Branch for NNI is specified by giving its two incident nodes (node1 and + node2). Change specifies which subtree of node1 will be swapped with + the uncle. See figure below. + + node2 + / \ + uncle node1 + / \ + child[0] child[1] + + special case with rooted branch and rooted=False: + + node2 + / \ + node2' node1 + / \ / \ + uncle * child[0] child[1] + + """ + + if node1.parent != node2: + node1, node2 = node2, node1 + + # try to see if edge is one branch (not root edge) + if not rooted and treelib.is_rooted(tree) and \ + node2 == tree.root: + # special case of specifying root edge + if node2.children[0] == node1: + node2 = node2.children[1] + else: + node2 = node2.children[0] + + # edge is not an internal edge, give up + if len(node2.children) < 2: + return + + if node1.parent == node2.parent == tree.root: + uncle = 0 + + if len(node2.children[0].children) < 2 and \ + len(node2.children[1].children) < 2: + # can't do NNI on this branch + return + else: + assert node1.parent == node2 + + # find uncle + uncle = 0 + if node2.children[uncle] == node1: + uncle = 1 + + # swap parent pointers + node1.children[change].parent = node2 + node2.children[uncle].parent = node1 + + # swap child pointers + node2.children[uncle], node1.children[change] = \ + node1.children[change], node2.children[uncle] + + + +def propose_random_nni(tree): + """ + Propose a random NNI rearrangement + """ + + nodes = tree.nodes.values() + + # find edges for NNI + while True: + node1 = random.sample(nodes, 1)[0] + if not node1.is_leaf() and node1.parent is not None: + break + + node2 = node1.parent + #a = node1.children[random.randint(0, 1)] + #b = node2.children[1] if node2.children[0] == node1 else node2.children[0] + #assert a.parent.parent == b.parent + + return node1, node2, random.randint(0, 1) + + + + +def perform_spr(tree, subtree, newpos): + """ + Proposes new topology using Subtree Pruning and Regrafting (SPR) + + a = subtree + e = newpos + + BEFORE + .... + f d + / \ + c e + / \ ... + a b + ... ... + + AFTER + + f d + / \ + b c + ... / \ + a e + ... ... + + Requirements: + 1. a (subtree) is not root or children of root + 2. e (newpos) is not root, a, descendant of a, c (parent of a), or + b (sibling of a) + 3. tree is binary + +""" + a = subtree + e = newpos + + c = a.parent + f = c.parent + bi = 1 if c.children[0] == a else 0 + b = c.children[bi] + ci = 0 if f.children[0] == c else 1 + d = e.parent + ei = 0 if d.children[0] == e else 1 + + d.children[ei] = c + c.children[bi] = e + f.children[ci] = b + b.parent = f + c.parent = d + e.parent = c + + + + +def propose_random_spr(tree): + """ + What if e == f (also equivalent to NNI) this is OK + + BEFORE + + d + / \ + e ... + / \ + c ... + / \ + a b + ... ... + + AFTER + d + / \ + c + / \ + a e + ... / \ + b ... + ... + + What if d == f (also equivalent to NNI) this is OK + + BEFORE + + f + / \ + c e + / \ ... + a b + ... ... + + AFTER + + f + / \ + b c + ... / \ + a e + ... ... + + Requirements: + 1. a (subtree) is not root or children of root + 2. e (newpos) is not root, a, descendant of a, c (parent of a), or + b (sibling of a) + 3. tree is binary + """ + + assert len(tree.nodes) >= 5, "Tree is too small" + + # find subtree (a) to cut off (any node that is not root or child of root) + nodes = tree.nodes.values() + while True: + a = random.sample(nodes, 1)[0] + if (a.parent is not None and a.parent.parent is not None): + break + subtree = a + + # find sibling (b) of a + c = a.parent + bi = 1 if c.children[0] == a else 0 + b = c.children[bi] + + # choose newpos (e) + e = None + while True: + e = random.sample(nodes, 1)[0] + + # test if e is a valid choice + if e.parent is None or e == a or e == c or e == b: + continue + + # also test if e is a descendent of a + under_a = False + ptr = e.parent + while ptr is not None: + if ptr == a: + under_a = True + break + ptr = ptr.parent + if under_a: + continue + + break + newpos = e + + return subtree, newpos + + +#============================================================================= +# tree search + +class TreeSearch (object): + + def __init__(self, tree): + self.tree = tree + + def __iter__(self): + return self + + def set_tree(self, tree): + self.tree = tree + + def get_tree(self): + return self.tree + + def propose(self): + raise + + def revert(self): + raise + + def reset(self): + pass + + def next(self): + return self.propose() + + +class TreeSearchNni (TreeSearch): + + def __init__(self, tree): + TreeSearch.__init__(self, tree) + self.set_tree(tree) + + def set_tree(self, tree): + self.tree = tree + self.node1 = None + self.node2 = None + self.child = None + + def propose(self): + self.node1, self.node2, self.child = propose_random_nni(self.tree) + perform_nni(self.tree, self.node1, self.node2, self.child) + return self.tree + + def revert(self): + if self.node1 is not None: + perform_nni(self.tree, self.node1, self.node2, self.child) + return self.tree + + def reset(self): + self.node1 = None + self.node2 = None + self.child = None + + +class TreeSearchSpr (TreeSearch): + + def __init__(self, tree): + TreeSearch.__init__(self, tree) + self.set_tree(tree) + + def set_tree(self, tree): + self.tree = tree + self.node1 = None + self.node2 = None + + def propose(self): + + # choose SPR move + self.node1, node3 = propose_random_spr(self.tree) + + # remember sibling of node1 + p = self.node1.parent + self.node2 = (p.children[1] if p.children[0] == self.node1 + else p.children[0]) + + # perform SPR move + perform_spr(self.tree, self.node1, node3) + return self.tree + + def revert(self): + if self.node1 is not None: + perform_spr(self.tree, self.node1, self.node2) + return self.tree + + def reset(self): + self.node1 = None + self.node2 = None + + +class TreeSearchMix (TreeSearch): + + def __init__(self, tree): + TreeSearch.__init__(self, tree) + self.total_weight = 0.0 + self.last_propose = 0 + self.methods = [] + self.set_tree(tree) + + def set_tree(self, tree): + self.tree = tree + + for method in self.methods: + method[0].set_tree(tree) + + def add_proposer(self, proposer, weight): + self.total_weight += weight + self.methods.append((proposer, weight)) + + def propose(self): + # randomly choose method + choice = random.random() * self.total_weight + s = self.methods[0][1] + i = 0 + while i < len(self.methods)-1 and s < choice: + i += 1 + s += self.methods[i][1] + + # make proposal + self.last_propose = i + return self.methods[i][0].propose() + + def revert(self): + return self.methods[self.last_propose][0].revert() + + def reset(self): + for method in self.methods: + method[0].reset() + + +class TreeSearchUnique (TreeSearch): + """ + Propose unique tree topologies + """ + + def __init__(self, tree, search, tree_hash=None, maxtries=5, + auto_add=True): + TreeSearch.__init__(self, tree) + self.search = search + self.seen = set() + self._tree_hash = tree_hash if tree_hash else hash_tree + self.maxtries = maxtries + self.auto_add = auto_add + + def set_tree(self, tree): + self.tree = tree + self.search.set_tree(tree) + + + def propose(self): + + for i in xrange(self.maxtries): + if i > 0: + self.search.revert() + tree = self.search.propose() + top = self._tree_hash(tree) + if top not in self.seen: + #util.logger("tried", i, len(self.seen)) + break + else: + #util.logger("maxtries", len(self.seen)) + pass + + if self.auto_add: + self.seen.add(top) + self.tree = tree + return self.tree + + + def revert(self): + self.tree = self.search.revert() + return self.tree + + + def reset(self): + self.seen.clear() + self.search.reset() + + + def add_seen(self, tree): + top = self._tree_hash(tree) + self.seen.add(top) + + +class TreeSearchPrescreen (TreeSearch): + + def __init__(self, tree, search, prescreen, poolsize): + TreeSearch.__init__(self, tree) + self.search = TreeSearchUnique(tree, search, auto_add=False) + self.prescreen = prescreen + self.poolsize = poolsize + self.oldtree = None + + + def set_tree(self, tree): + self.tree = tree + self.search.set_tree(tree) + + + def propose(self): + + # save old topology + self.oldtree = self.tree.copy() + + pool = [] + best_score = self.prescreen(self.tree) + total = -util.INF + + # TODO: add unique filter + + # make many subproposals + self.search.reset() + for i in xrange(self.poolsize): + self.search.propose() + score = self.prescreen(self.tree) + tree = self.tree.copy() + + # save tree and logl + pool.append((tree, score)) + total = stats.logadd(total, score) + + if score > best_score: + # make more proposals off this one + best_score = score + else: + self.search.revert() + + # propose one of the subproposals + choice = random.random() + partsum = -util.INF + + for tree, score in pool: + partsum = stats.logadd(partsum, score) + if choice < math.exp(partsum - total): + # propose tree i + treelib.set_tree_topology(self.tree, tree) + break + + + self.search.add_seen(self.tree) + + + def revert(self): + if self.oldtree: + treelib.set_tree_topology(self.tree, self.oldtree) + + + def reset(self): + self.oldtree = None + self.search.reset() + + + + +#============================================================================= +# Phylogenetic reconstruction: Neighbor-Joining +# + +def neighborjoin(distmat, genes, usertree=None): + """Neighbor joining algorithm""" + + tree = treelib.Tree() + leaves = {} + dists = util.Dict(dim=2) + restdists = {} + + + # initialize distances + for i in range(len(genes)): + r = 0 + for j in range(len(genes)): + dists[genes[i]][genes[j]] = distmat[i][j] + r += distmat[i][j] + restdists[genes[i]] = r / (len(genes) - 2) + + # initialize leaves + for gene in genes: + tree.add(treelib.TreeNode(gene)) + leaves[gene] = 1 + + # if usertree is given, determine merging order + merges = [] + newnames = {} + if usertree != None: + def walk(node): + if not node.is_leaf(): + assert len(node.children) == 2, \ + Exception("usertree is not binary") + + for child in node: + walk(child) + merges.append(node) + newnames[node] = len(merges) + else: + newnames[node] = node.name + walk(usertree.root) + merges.reverse() + + # join loop + while len(leaves) > 2: + # search for closest genes + if not usertree: + low = util.INF + lowpair = (None, None) + leaveslst = leaves.keys() + + for i in range(len(leaves)): + for j in range(i+1, len(leaves)): + gene1, gene2 = leaveslst[i], leaveslst[j] + dist = dists[gene1][gene2] - restdists[gene1] \ + - restdists[gene2] + if dist < low: + low = dist + lowpair = (gene1, gene2) + else: + node = merges.pop() + lowpair = (newnames[node.children[0]], + newnames[node.children[1]]) + + # join gene1 and gene2 + gene1, gene2 = lowpair + parent = treelib.TreeNode(tree.new_name()) + tree.add_child(parent, tree.nodes[gene1]) + tree.add_child(parent, tree.nodes[gene2]) + + # set distances + tree.nodes[gene1].dist = (dists[gene1][gene2] + restdists[gene1] - + restdists[gene2]) / 2.0 + tree.nodes[gene2].dist = dists[gene1][gene2] - tree.nodes[gene1].dist + + # gene1 and gene2 are no longer leaves + del leaves[gene1] + del leaves[gene2] + + gene3 = parent.name + r = 0 + for gene in leaves: + dists[gene3][gene] = (dists[gene1][gene] + dists[gene2][gene] - + dists[gene1][gene2]) / 2.0 + dists[gene][gene3] = dists[gene3][gene] + r += dists[gene3][gene] + leaves[gene3] = 1 + + if len(leaves) > 2: + restdists[gene3] = r / (len(leaves) - 2) + + # join the last two genes into a tribranch + gene1, gene2 = leaves.keys() + if type(gene1) != int: + gene1, gene2 = gene2, gene1 + tree.add_child(tree.nodes[gene1], tree.nodes[gene2]) + tree.nodes[gene2].dist = dists[gene1][gene2] + tree.root = tree.nodes[gene1] + + # root tree according to usertree + if usertree != None and treelib.is_rooted(usertree): + roots = set([newnames[usertree.root.children[0]], + newnames[usertree.root.children[1]]]) + newroot = None + for child in tree.root.children: + if child.name in roots: + newroot = child + + assert newroot != None + + treelib.reroot(tree, newroot.name, newCopy=False) + + return tree + + + + +#============================================================================= +# Phylogenetic reconstruct: Least Square Error + +def least_square_error(tree, distmat, genes, forcePos=True, weighting=False): + """Least Squared Error algorithm for phylogenetic reconstruction""" + + # use SCIPY to perform LSE + import scipy + import scipy.linalg + + def makeVector(array): + """convience function for handling different configurations of scipy""" + if len(array.shape) == 2: + if array.shape[0] == 1: + return array[0] + else: + return scipy.transpose(array)[0] + else: + return array + + + if treelib.is_rooted(tree): + rootedge = sorted([x.name for x in tree.root.children]) + treelib.unroot(tree, newCopy=False) + else: + rootedge = None + + # create pairwise dist array + dists = [] + for i in xrange(len(genes)): + for j in xrange(i+1, len(genes)): + dists.append(distmat[i][j]) + + # create topology matrix + topmat, edges = make_topology_matrix(tree, genes) + + # setup matrix and vector + if weighting: + topmat2 = scipy.array([[util.safediv(x, math.sqrt(dists[i]), 0) + for x in row] + for i, row in enumerate(topmat)]) + paths = scipy.array(map(math.sqrt, dists)) + else: + topmat2 = scipy.array(topmat) + paths = scipy.array(dists) + + + # solve LSE + edgelens, resids, rank, singlars = scipy.linalg.lstsq(topmat2, paths) + + # force non-negative branch lengths + if forcePos: + edgelens = [max(float(x), 0) for x in makeVector(edgelens)] + else: + edgelens = [float(x) for x in makeVector(edgelens)] + + # calc path residuals (errors) + paths2 = makeVector(scipy.dot(topmat2, edgelens)) + resids = (paths2 - paths).tolist() + paths = paths.tolist() + + # set branch lengths + set_branch_lengths_from_matrix(tree, edges, edgelens, paths, resids, + topmat=topmat, rootedge=rootedge) + + return util.Bundle(resids=resids, + paths=paths, + edges=edges, + topmat=topmat) + + +def make_topology_matrix(tree, genes): + + # find how edges split vertices + network = treelib.tree2graph(tree) + splits = find_all_branch_splits(network, set(genes)) + edges = splits.keys() + + # create topology matrix + n = len(genes) + ndists = n*(n-1) / 2 + topmat = util.make_matrix(ndists, len(edges)) + + vlookup = util.list2lookup(genes) + n = len(genes) + for e in xrange(len(edges)): + set1, set2 = splits[edges[e]] + for gene1 in set1: + for gene2 in set2: + i, j = util.sort([vlookup[gene1], vlookup[gene2]]) + index = i*n-i*(i+1)/2+j-i-1 + topmat[index][e] = 1.0 + + return topmat, edges + + +def set_branch_lengths_from_matrix(tree, edges, edgelens, paths, resids, + topmat=None, rootedge=None): + # recreate rooting branches + if rootedge != None: + # restore original rooting + if tree.nodes[rootedge[0]].parent == tree.nodes[rootedge[1]]: + treelib.reroot(tree, rootedge[0], newCopy=False) + else: + treelib.reroot(tree, rootedge[1], newCopy=False) + + # find root edge in edges + for i in xrange(len(edges)): + if sorted(edges[i]) == rootedge: + break + + edges[i] = [rootedge[0], tree.root.name] + edges.append([rootedge[1], tree.root.name]) + edgelens[i] /= 2.0 + edgelens.append(edgelens[i]) + resids[i] /= 2.0 + resids.append(resids[i]) + paths[i] /= 2.0 + paths.append(paths[i]) + + if topmat != None: + for row in topmat: + row.append(row[i]) + + # set branch lengths + for i in xrange(len(edges)): + gene1, gene2 = edges[i] + if tree.nodes[gene2].parent == tree.nodes[gene1]: + gene1, gene2 = gene2, gene1 + tree.nodes[gene1].dist = edgelens[i] + + + + +def tree2distmat(tree, leaves): + """Returns pair-wise distances between leaves of a tree""" + + # TODO: not implemented efficiently + mat = [] + for i in range(len(leaves)): + mat.append([]) + for j in range(len(leaves)): + mat[-1].append(treelib.find_dist(tree, leaves[i], leaves[j])) + + return mat + + + +#============================================================================ +# branch splits +# + +def find_splits(tree, rooted=False): + """ + Find branch splits for a tree + + If 'rooted' is True, then orient splits based on rooting + """ + + all_leaves = set(tree.leaf_names()) + nall_leaves = len(all_leaves) + + # find descendants + descendants = {} + def walk(node): + if node.is_leaf(): + s = descendants[node] = set([node.name]) + else: + s = set() + for child in node.children: + s.update(walk(child)) + descendants[node] = s + return s + for child in tree.root.children: + walk(child) + + # left child's descendants immediately defines + # right child's descendants (by complement) + if len(tree.root.children) == 2: + # in order to work with rooted, be consistent about which descendents + # to keep + a, b = tree.root.children + if len(descendants[a]) < len(descendants[b]): + del descendants[a] + elif len(descendants[b]) < len(descendants[a]): + del descendants[b] + elif min(descendants[a]) < min(descendants[b]): + del descendants[a] + else: + del descendants[b] + + # build splits list + splits = [] + for leaves in descendants.itervalues(): + if 1 < len(leaves) and (rooted or len(leaves) < nall_leaves - 1): + set1 = tuple(sorted(leaves)) + set2 = tuple(sorted(all_leaves - leaves)) + if not rooted and min(set2) < min(set1): + set1, set2 = set2, set1 + + splits.append((set1, set2)) + + return splits + + +def split_string(split, leaves=None, leafDelim=" ", splitDelim="|"): + """ + Returns a string representing a split + + If leaves are specified, leaf names will be displayed in that order. + """ + + if leaves is not None: + lookup = util.list2lookup(leaves) + split = (sorted(split[0], key=lambda x: lookup[x]), + sorted(split[0], key=lambda x: lookup[x])) + + return leafDelim.join(split[0]) + splitDelim + leafDelim.join(split[1]) + + +def split_bit_string(split, leaves=None, char1="*", char2=".", nochar=" "): + """Returns a bit string representation of a split""" + + if leaves is None: + leaves = split[0] + split[1] + set1, set2 = map(set, split) + + chars = [] + for leaf in leaves: + if leaf in set1: + chars.append(char1) + elif leaf in set2: + chars.append(char2) + else: + chars.append(nochar) + + return "".join(chars) + + +def robinson_foulds_error(tree1, tree2, rooted=False): + """ + Returns RF error + + This definition of RF error is the fraction of branches in larger + tree that are not present in the smaller tree. + + Of course, trees can be the same size as well. + """ + splits1 = find_splits(tree1, rooted=rooted) + splits2 = find_splits(tree2, rooted=rooted) + + overlap = set(splits1) & set(splits2) + + #assert len(splits1) == len(splits2) + + denom = float(max(len(splits1), len(splits2))) + + if denom == 0.0: + return 0.0 + else: + return 1 - (len(overlap) / denom) + + +#============================================================================= +# consensus methods + + +def consensus_majority_rule(trees, extended=True, rooted=False): + """ + Performs majority rule on a set of trees + + extended -- if True, performs the extended majority rule + rooted -- if True, assumes trees are rooted + """ + + # consensus tree + contree = treelib.Tree() + + nleaves = len(trees[0].leaves()) + ntrees = len(trees) + split_counts = util.Dict(default=0) + + # handle special cases + if not rooted and nleaves == 3: + leaves = trees[0].leaf_names() + root = tree.make_root() + n = tree.add_child(root, treelib.TreeNode(tree.new_name())) + tree.add_child(n, treelib.TreeNode(leaves[0])) + tree.add_child(n, treelib.TreeNode(leaves[1])) + tree.add_child(root, treelib.TreeNode(leaves[2])) + return tree + + elif nleaves == 2: + leaves = trees[0].leaf_names() + root = tree.make_root() + tree.add_child(root, treelib.TreeNode(leaves[0])) + tree.add_child(root, treelib.TreeNode(leaves[1])) + return tree + + + # count all splits + for tree in trees: + for split in find_splits(tree, rooted): + split_counts[split] += 1 + contree.nextname = max(tree.nextname for tree in trees) + + #util.print_dict(split_counts) + + # choose splits + pick_splits = 0 + rank_splits = split_counts.items() + rank_splits.sort(key=lambda x: x[1], reverse=True) + + # add splits to the contree in increasing frequency + for split, count in rank_splits: + if not extended and count <= ntrees / 2.0: + continue + + # choose split if it is compatiable + if _add_split_to_tree(contree, split, count / float(ntrees), rooted): + pick_splits += 1 + + # stop if enough splits are choosen + if ((rooted and pick_splits >= nleaves - 2) or + (not rooted and pick_splits >= nleaves - 3)): + break + + # add remaining leaves and remove clade data + _post_process_split_tree(contree) + + return contree + + +def splits2tree(splits, rooted=False): + """ + Builds a tree from a set of splits + + Silently reject splits that are in conflict. Process splits in order. + + splits -- iterable of splits + rooted -- if True treat splits as rooted/polarized + """ + + tree = treelib.Tree() + for split in splits: + _add_split_to_tree(tree, split, 1.0, rooted) + _post_process_split_tree(tree) + return tree + + +def _add_split_to_tree(tree, split, count, rooted=False): + """ + Add split to tree + private method + """ + + split = (set(split[0]), set(split[1])) + + # init first split + if len(tree) == 0: + root = tree.make_root() + root.data["leaves"] = split[0] | split[1] + + if len(split[0]) == 1: + node = tree.add_child(root, treelib.TreeNode(list(split[0])[0])) + node.data["leaves"] = split[0] + node.data["boot"] = count + else: + node = tree.add_child(root, treelib.TreeNode(tree.new_name())) + node.data["leaves"] = split[0] + node.data["boot"] = count + + if len(split[1]) == 1: + node = tree.add_child(root, treelib.TreeNode(list(split[1])[0])) + node.data["leaves"] = split[1] + node.data["boot"] = count + + return True + + def walk(node, clade): + if node.is_leaf(): + # make new child + if len(clade) == 1: + name = list(clade)[0] + else: + name = tree.new_name() + child = tree.add_child(node, treelib.TreeNode(name)) + child.data["leaves"] = clade + child.data["boot"] = count + return True + + # which children intersect this clade? + intersects = [] + for child in node: + leaves = child.data["leaves"] + intersect = clade & leaves + + if len(clade) == len(intersect): + if len(intersect) < len(leaves): + # subset, recurse + return walk(child, clade) + else: + # len(intersect) == len(leaves), split is already present + return True + + elif len(intersect) == 0: + continue + + elif len(intersect) == len(leaves): + # len(clade) > len(leaves) + # superset + intersects.append(child) + else: + # conflict + return False + + # insert new node + if len(clade) == 1: + name = list(clade)[0] + else: + name = tree.new_name() + new_node = tree.add_child(node, treelib.TreeNode(name)) + new_node.data["leaves"] = clade + new_node.data["boot"] = count + for child in intersects: + tree.remove(child) + tree.add_child(new_node, child) + + return True + + # try to place split into tree + if rooted: + walk(tree.root, split[0]) + else: + if walk(tree.root, split[0]): + return True + else: + return walk(tree.root, split[1]) + + # split is in conflict + return False + + +def _post_process_split_tree(tree): + """ + Post-process a tree built from splits + private method + """ + + for node in list(tree): + if len(node.data["leaves"]) > 1: + for leaf_name in node.data["leaves"]: + for child in node: + if leaf_name in child.data.get("leaves", ()): + break + else: + child = tree.add_child(node, treelib.TreeNode(leaf_name)) + else: + assert node.name == list(node.data["leaves"])[0], node.name + + # remove leaf data and set root + for node in tree: + if "leaves" in node.data: + del node.data["leaves"] + + + +def ensure_binary_tree(tree): + """ + Arbitrarly expand multifurcating nodes + """ + + # first tree just rerooting root branch + if len(tree.root.children) > 2: + treelib.reroot(tree, tree.root.children[0].name, newCopy=False) + + multibranches = [node for node in tree + if len(node.children) > 2] + + for node in multibranches: + children = list(node.children) + + # remove children + for child in children: + tree.remove(child) + + # add back in binary + while len(children) > 2: + left = children.pop() + right = children.pop() + newnode = treelib.TreeNode(tree.new_name()) + newnode.data['boot'] = 0 + tree.add_child(newnode, left) + tree.add_child(newnode, right) + children.append(newnode) + + # add last two to original node + tree.add_child(node, children.pop()) + tree.add_child(node, children.pop()) + + + +#============================================================================= +# simulation + +def make_jc_matrix(t, a=1.): + """ + Returns Juke Cantor transition matrix + + t -- time span + a -- mutation rate (sub/site/time) + """ + eat = math.exp(-4*a/3.*t) + r = .25 * (1 + 3*eat) + s = .25 * (1 - eat) + + return [[r, s, s, s], + [s, r, s, s], + [s, s, r, s], + [s, s, s, r]] + + +def make_hky_matrix(t, bgfreq=(.25,.25,.25,.25), kappa=1.0): + """ + Returns HKY transition matrix + + Assume base order A,C,G,T. + + t -- time span + bgfreq -- background base frequency + kappa -- transition/transversion ratio + """ + + # bases = "ACGT" + # pi = bfreq + + pi_r = bgfreq[0] + bgfreq[2] + pi_y = bgfreq[1] + bgfreq[3] + rho = pi_r / pi_y + + + # convert the usual ratio definition (kappa) to Felsenstein's + # definition (R) + ratio = (bgfreq[3]*bgfreq[1] + bgfreq[0]*bgfreq[2]) * kappa / (pi_y*pi_r) + + + # determine HKY parameters alpha_r, alpha_y, and beta + b = 1.0 / (2.0 * pi_r * pi_y * (1.0+ratio)) + a_y = ((pi_r*pi_y*ratio - bgfreq[0]*bgfreq[2] - bgfreq[1]*bgfreq[3]) / + (2.0*(1+ratio)*(pi_y*bgfreq[0]*bgfreq[2]*rho + + pi_r*bgfreq[1]*bgfreq[3]))) + a_r = rho * a_y + + + # make transition probability P(j | i, t) + + mat = [[0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]] + + for i in (0, 1, 2, 3): + for j in (0, 1, 2, 3): + # convenience variables + # NOTE: it is ok to assign pi_ry, because it is only used when + # dnatype[i] == dnatype[j] + if i in (0, 2): # purine + a_i = a_r + pi_ry = pi_r + else: # prymidine + a_i = a_y + pi_ry = pi_y + delta_ij = int(i == j) + e_ij = int((i in (0, 2)) == (j in (0, 2))) + + ait = math.exp(-a_i*t) + ebt = math.exp(-b*t) + + mat[i][j] = (ait*ebt * delta_ij + + ebt * (1.0 - ait) * (bgfreq[j]*e_ij/pi_ry) + + (1.0 - ebt) * bgfreq[j]) + + return mat + + +def sim_seq_branch(seq, time, matrix_func): + """Simulate sequence evolving down one branch""" + + matrix = matrix_func(time) + bases = "ACGT" + lookup = {"A": 0, "C": 1, "G": 2, "T": 3} + + seq2 = [] + for a in seq: + seq2.append(bases[stats.sample(matrix[lookup[a]])]) + + return "".join(seq2) + + +def sim_seq_tree(tree, seqlen, matrix_func=make_jc_matrix, + bgfreq=[.25,.25,.25,.25], rootseq=None, + keep_internal=False): + """Simulate the evolution of a sequence down a tree""" + + bases = "ACGT" + + # make root sequence + if rootseq is None: + rootseq = [bases[stats.sample(bgfreq)] + for i in xrange(seqlen)] + + # leaf sequences + seqs = fasta.FastaDict() + + # evolve sequences down tree + def walk(node, seq): + if node.is_leaf(): + # save sequence + seqs[node.name] = seq + elif keep_internal: + seqs[node.name] = seq + + # recurse + for child in node.children: + seq2 = sim_seq_branch(seq, child.dist, matrix_func) + walk(child, seq2) + walk(tree.root, rootseq) + + return seqs + + + +#============================================================================= +# gene tree simulation + + +def sample_dlt_gene_tree(stree, duprate, lossrate, transrate, + genename=lambda sp, x: sp + "_" + str(x), + removeloss=True): + """Simulate a gene tree within a species tree with dup, loss, transfer""" + + # TODO: return brecon instead of (recon, events) + + stimes = treelib.get_tree_timestamps(stree) + + # initialize gene tree + tree = treelib.Tree() + tree.make_root() + recon = {tree.root: stree.root} + events = {tree.root: "spec"} + losses = set() + + totalrate = duprate + lossrate + transrate + + + def sim_branch(node, snode, dist): + + # sample next event + if totalrate > 0.0: + time = random.expovariate(totalrate) + else: + time = dist + + if time >= dist: + # no events just evolve to end of species branch + node = tree.add_child(node, tree.new_node()) + node.dist = dist + recon[node] = snode + events[node] = "spec" + sim_spec(node, snode) + else: + # event occurs, choose event + pick = random.random() + if pick <= duprate / totalrate: + # dup occurs + node = tree.add_child(node, tree.new_node()) + node.dist = time + recon[node] = snode + events[node] = "dup" + + # recurse + sim_branch(node, snode, dist - time) + sim_branch(node, snode, dist - time) + + elif pick <= (duprate + lossrate) / totalrate: + # loss occurs + node = tree.add_child(node, tree.new_node()) + node.dist = time + recon[node] = snode + events[node] = "loss" + losses.add(node) + + else: + # transfer occurs + node = tree.add_child(node, tree.new_node()) + node.dist = time + recon[node] = snode + events[node] = "trans" + + # choose destination species + age = stimes[snode] + dist - time + + others = [] + for snode2, sage in stimes.iteritems(): + if sage < age < sage + snode2.dist and snode2 != snode: + others.append(snode2) + + assert len(others) > 0, (age, stimes) + + dest = random.sample(others, 1)[0] + + # recurse + sim_branch(node, snode, dist - time) + sim_branch(node, dest, age - stimes[dest]) + + + def sim_spec(node, snode): + if snode.is_leaf(): + # leaf in species tree, terminal gene lineage + tree.rename(node.name, genename(snode.name, node.name)) + events[node] = "gene" + else: + # speciation in species tree, follow each branch + for schild in snode.children: + sim_branch(node, schild, schild.dist) + + sim_spec(tree.root, stree.root) + + + if removeloss: + keep = [node for node in tree.leaves() if node not in losses] + treelib.subtree_by_leaves(tree, keep, keep_single=False) + + + brecon = recon_events2brecon(recon, events) + + return tree, brecon + + + +def sample_dltr_gene_tree(stree, duprate, lossrate, transrate, recombrate, + genename=lambda sp, x: sp + "_" + str(x), + removeloss=True): + """Simulate a gene tree within a species tree with dup, loss, transfer""" + + stimes = treelib.get_tree_timestamps(stree) + spec_times = sorted((x for x in stimes.values() if x > 0.0), reverse=True) + spec_times.append(0.0) + + # initialize gene tree + tree = treelib.Tree() + tree.make_root() + times = {tree.root: stimes[stree.root]} + brecon = {tree.root: [(stree.root, "spec")]} + losses = set() + + totalrate = duprate + lossrate + transrate + recombrate + + class Lineage (object): + def __init__(self, node, snode): + self.node = node + self.snode = snode + + lineages = set() + for schild in stree.root: + lineages.add(Lineage(tree.root, schild)) + age = stimes[stree.root] + i = 1 + + while len(lineages) > 0: + if totalrate > 0.0: + age -= random.expovariate(totalrate * len(lineages)) + else: + age = 0.0 + + if age <= spec_times[i]: + if spec_times[i] == 0.0: + # create leaves + for l in lineages: + child = tree.add_child(l.node, tree.new_node()) + tree.rename(child.name, genename(l.snode.name, child.name)) + child.dist = times[l.node] + times[child] = 0.0 + brecon[child] = [(l.snode, "gene")] + break + else: + # speciation + age = spec_times[i] + i += 1 + for l in list(lineages): + if stimes[l.snode] == age: + lineages.remove(l) + child = tree.add_child(l.node, tree.new_node()) + child.dist = times[l.node] - age + times[child] = age + brecon[child] = [(l.snode, "spec")] + for schild in l.snode.children: + lineages.add(Lineage(child, schild)) + continue + + # choose event type and lineage + lineage = random.sample(lineages, 1)[0] + node, snode = lineage.node, lineage.snode + pick = stats.sample((duprate, lossrate, transrate, recombrate)) + + if pick == 0: + # duplication + child = tree.add_child(node, tree.new_node()) + child.dist = times[node] - age + times[child] = age + brecon[child] = [(snode, "dup")] + lineages.remove(lineage) + lineages.add(Lineage(child, snode)) + lineages.add(Lineage(child, snode)) + + elif pick == 1: + # loss + child = tree.add_child(node, tree.new_node()) + child.dist = times[node] - age + times[child] = age + brecon[child] = [(snode, "loss")] + losses.add(child) + lineages.remove(lineage) + + elif pick == 2: + # transfer + # choose destination species + others = [] + for snode2, sage in stimes.iteritems(): + if sage < age < sage + snode2.dist and snode2 != snode: + others.append(snode2) + dest = random.sample(others, 1)[0] + + # make transfer node + child = tree.add_child(node, tree.new_node()) + child.dist = times[node] - age + times[child] = age + brecon[child] = [(snode, "trans")] + lineages.remove(lineage) + lineages.add(Lineage(child, dest)) + lineages.add(Lineage(child, snode)) + + elif pick == 3: + # recomb + # choose destination species + others = [] + for snode2, sage in stimes.iteritems(): + if sage < age < sage + snode2.dist and snode2 != snode: + others.append(snode2) + dest = random.sample(others, 1)[0] + + # find gene to replace + genes = [] + for l in lineages: + if l.snode == dest: + genes.append(l) + if len(genes) == 0: + # nothing to replace, no recombination + continue + else: + gene = random.sample(genes, 1)[0] + + # make transfer node + child = tree.add_child(node, tree.new_node()) + child.dist = times[node] - age + times[child] = age + brecon[child] = [(snode, "trans")] + lineages.remove(lineage) + lineages.add(Lineage(child, dest)) + lineages.add(Lineage(child, snode)) + + # mark gene as loss + child2 = tree.add_child(gene.node, tree.new_node()) + child2.dist = times[gene.node] - age + times[child2] = age + brecon[child2] = [(gene.snode, "loss")] + lineages.remove(gene) + + if removeloss: + keep = [x for x in tree.leaves() if isinstance(x.name, str)] + subtree_brecon_by_leaves(tree, brecon, keep) + + return tree, brecon + + + + + + +#============================================================================= +# file functions + +def phylofile(famdir, famid, ext): + """ + Creates a filename using my gene family format + + famdir/famid/famid.ext + """ + return os.path.join(famdir, famid, famid + ext) + + + + + diff --git a/arghmm/deps/compbio/phylo.pyc b/arghmm/deps/compbio/phylo.pyc new file mode 100644 index 0000000000000000000000000000000000000000..962fd17b240197d4c6e42ca9c52b8babfc9937e3 GIT binary patch literal 71729 zcmd?S3!Ge6Uf+4Esvqi>TF;hRZOe99vfV9@ELpP0_ITunr1oPxGaAp8XEft+(^IM{ zsmp40b#;~O_F$V~Z-%)$Z*gzyOLE|3qfyPpgsn`}PWKtgyfknjl;LU@H|vcQm# zkPYO+et*CJy;aq6dxnWiko~N!x_$0B=bq>Peg5a1pZ#|e;}3lNw?0@+vj4X6_fdZB z7lx9g#=k;xF-e~7h6~BFg*?8PJX_4erR3R?!^Py{P_i_fJUg5e^nJ%H#z>+q;|6d`<>wy7kD74 zl^uCoQoGfG+mm`JY21<2_B+E&Qaj-ISr5W(ZrJU~YmE58q;`js?o4Vk4!k3&%{p)> zsU39SuB3LS1Ba8^I~+LT+7G$#yOY{oimVej?A9JlYDW}PyW1I#CAFje>Yk)_%z=B8 z+C2`uGpXI{z<5(QhS#J?@nqDI&j?0c(-eL$l>D- zKkV>B4nN{>#4vsF?~&v+!0FMX_OOTIQ8)WUQaj;dPbRgK4xCDAryQ6|YI8~bvE)3T zKc<)~N%GNWA5UtJC-ui2`$RYPiKO;q9{Zkd?2}3DsXX?*-PrdewfE(*@9)MwmDEn> zu^;HhzSoU^+MVK~>7@Str1n9#{B%-FmG9XPq;@u` zeZ(+u#xWmFYR@J$;y;$WHk3U3j6ZqKpL{5I`v~o?UG;XF66nScHVjBlR8PY-oFb&YL9w%!7cf+q_(J>!ls75T}W!r=P?%@ z^YNs%+DEWo4zYd|{UKtjtY> zlx4YcsUFcvYd^%}$ff1Zk#mg;M=Gn;#l{QuBP*-T=j+u@n}`dGmoDXhYW0=&k!thO z%DG1K$jahNOU=V8FLkzL1?P3EP3@=Bx@(to4NT1 zlUIx$FY_!W)6b74%{@s$Z>41VY9YzukGFOug{zD&gHTAW6p{iFXJ1c}*9*ycsQFOx z=_Gljn7mwMggo${q{6h`xxBjEE?3IuFE3X+jplN>vsme9klG%$vIeDH?ljB!_&H*H zyv%>jb=O^dz0_!T%FXlT&gGRQvVUw=>2&I=%ZKuB7xeAHL*>JV50~d%LPv9T zCvB$T3a%TLT|~6Mg~!@TLeJ*r7nA3T8Vz;BV=~Q%JYP(jH!&c2*MYP$-EYFC)Yn1_ zew2;Y`rI*eL&+`I=H`pD6vx;;Kawkw`kzLTPLGI||`r1lI*ptoCtYEC893-?mYE^x$0@ThD8USFYf}f)*YL5ymjwV|Q zI|`G9F+V4Y6NRy&vCvh6Z*-*BYC(-#n_;3$bIaWBbb=z&tt$QPW8e;oxm3AWU+58KK>KiaSa>w^kLa5!Glu!LN;kl&rl!Q1LMV-vy)5+I-9Lf+8P>Fwq)(y#70T+aiPl(KD)p_bLI;8bVO~}~a+Vc6vQa$kPt+4gQ z#?q3IF(7PXj6LiM46GSIbeNUYkRs4qrRs$mwD(d+6VSfcSXqFeU8*m4+MUhBG?~SP zHuSl2Db?V`gbFg%Kv!y@&EL}P%ET`o1DZ@XvwwI_QbAi)C8L_in9iMrn+sJ56DiIeDezM3c~$5#&nB zHGLI&TS|_%9>|F;MN;s2*k!R-stSry$$!Q}*Zv62$!0w#!mIUabGZt`tC$qm8r4qu zd~-E}h_aYtdFF!t%s@bDIRVKMG&%Jd7g-fC)lVHa9Tm`bAZl)f8l5D^qkHsN-8C)K zC#Qpa=u76+m`b}YueR%f(;Cy|X00CCL<*X#wfd^*&5M<#i?hTG0C&@yDP}=MslueN zz0mEH2%0-342_{>=G2Mk5fip@FcGz;Z&$%aW@|`YSMqRjg}lsVjKT8yuB+(uMjq~W zqrI?HubekVx(uRA2!TTpTMz{c6N&v)M7$)0-FSZMQ^I*!H9ecYAF$c%`@Tl?ik7M5Q`Q zdO!(I)%>^<13+z&k?sJ0n3wh>k7RhHG*Z}Eni!e@wQnx%9i1o|t%cH}T&MW8zmEry z9TXMrFM$B2&Aa3!n07a;D@w^LCfT6#^ea$k==u<$k!0;X z$;-TtB+r+U=DR65UMOmc7gGX0R{r?1^>LzyL?E&PqZmb>CCeEPWLwk!WPNF+zS@($ zgzMd5us{UW0A|(K82%dwrNUD4eIU`(x>5n~IU(I&M2GkQ&WAk8#?As8q2l% z+RkBC-evb zCXN1I;9Cu?hhT1ghX<5*XfR{Cvo)E#YAT%Z(ZhV0W+b#;u9Vxz`AhYTD>4R_yShZv zG_0ggG@j`@^w`hC(y_rl#_|Ah#*>AQ9i6wpx;DtM>PXb-PWpmc^%-JvJwa2ipBk!P z`w)*sxK$x}-NLjkDmBo#v(DZZ)`5sUqbYNsk~5Gx{M4D zfHVLvhOlgwg1kQ?#yfMUJR>=izd-9u?efY@moTe=Cd?ub!nhOnK6gP$kbS>t!f8<7rW;r@Patx;(9F!Btl?eP)($Cf^xSF1ey9faHbg7yJ;#!A6rDhbw7ReDRFjyxRbtkwrQcN^Ag=i6^$ zdN1XFJHNIX0q-r`Q5Y_6iWyQ=m#+ODFHkgTug1`oTtyY@V5TxA;p0#H+loSOJNDH2u zf6|W;axL++ulSJC|0ScfhrdV*9 ziUOvxXM5;7V}$CQ)EJj0Bkr}JDUN=-2D3zWE+9r2q~k269A6+J-&8C ze}FAb^sP+vs78NX7G(xN$=o^x$hGcB&aOSKsJZ!v4Lt8O@JMmG)74ND(igU?3~&si z)SLs(R|7a-4dA?z0gjONbj-P6$kW=J_*wP2H^HeOET%~l*0wy}SeEUnvgDV{FcLzL zr!sv+MV#O<&`F)7z}}=@M#x(A)cz##F||ym{vR)C5)p?yxnum@H6#UAu@i+`3RUqR z{nUN^+G;CP`vzQpBgW78O0-20A|$G?oV}TzDB`LZt_IdJL{A#Xjd$YiMo)A9a+o2k zTULd9`l&(Y1TzFEufe#R23$3*Q7xi9)}-}fW}**I2X)rB>6{+Yv4RZmQ^=F7X$%EJ zY}E7e_VLUt`L`)A=n=UWb#ZjGR13!G8ugddJ4Z<+QoFA?kE< zxt`vq(mcmcDfE6GHd|hcryA!_YR%i#&1He4=X*etZYJFwXLITj-l&ln5YM24d(}ab>(Jmgw zLQPk3lx|CUuBv4WFD!#H{}@s8HX2cDR0TF-zh1k|LV*OpX^E%eAtoRzLtrMXzmT-H z!JD?sCr0bs#+^;FCh76YQaj|d^mbk?$$9pl zS-o5jMzBv!Q{MA{crAUB&_>oLXHn;3&fh*-nOE%81lyu4HVr$TD(r(Lk6(-JZnNdn z+tiH%Y9zn@mTES`h%g|%P=lHZA5~(%WYW&w=nGlKznPGJ<}&P$2IRN&*rb(>^!9%NhdjW6~J- za%bz= zf5#Eq6!E>uD?5^xwYFTOIFys|r4zLC`J%AKcf zRfa4TGPt6(N4i4~ce>nNda~jwL}(QX8zjCwk-R*aOsnHNlj&)_?vB@8yh0?hPtUeK zk3{^-{QdzyG~> zFgvTEZHw-%*NR|jK|}@wR~bg?ODrCXo-Tas*u0tgYV$=Wx3w?@=WAPfx3>1Y%Mxh} z316XOuri47(fD)B`6_;%3p6El5T+WL?~Ic96tr&^I!b3vN^pq6NO1y{ZK7~zVYG0h zxC?@E1HaALZx<;$;3N~p(ZWr|op6)GVdEAG>u2=+3IaXJ$y64Gy>=*(vY(;H&DQi_ z%%Ny)Ht7aJA8OqK=-gt(stFXW-R-Xop>y*ad_|G^>O*|>q2#Q{IC!7&Cjrc3%k9gn z7*pX!Is3^LteVS9FA;aX4hX`JdW;LXF*E?rWSb?EF$!8~wmZGGBdH;w;D4C3&;u+> z&FEDuVW9ISBxqkgM_U%$ixZkPsc*^9&Vtq(CYX!SEQI75vmq+$8976M83ChUdZuxn z3E9>}H{a2>q!1#N*Py^`iB~KR7|S6OJ6RNdm5+-3qgP`s|C!A1k=2mKI~AJFBTiEyi^(p%_;q9IZ~$MWyl^l)B}$;%JM9^^As9c$^ej z!7K51olMH$*+u~{H+$o0a{px>LPQCRn=!D>?MN3MK(Zi~TE>9-derko8^!p_i5RIY zt4@O!V2WN?Iym0i13qXihUO*hTX|^h{msLDezkci8}JxI0YWM;$rcD}jSG#A#~Oo! zms~7R!#uABw08H@*pm+y|*`XhTI4HMhmoG2b`f$g|fxiNeBq2WbH7Z zQg%~ArNv4?<;KHj(9{541DI)xmZhtD%<$+#ebZGBq6RjdS1P}OqQ2oT8oa3C6MO6j zz};GpMIsPM5cG);7y_>lU^FuAbf2|ThEAiyn00l}VyLN5P{=|?c^O7>jRfUA=D{Df z)e*hbhRnt$$y5p>uR5%8A5NxOIosJfmAncyUPpkn7#N`YCIYM_vTQ>7XtBCsk;*cQ z%&YOrtAIZ+v-&&f!O(aQ- z*_TsqbyRcbIs0Y;w5DrkUi;KFn9xd|^CCv3L3}|<&D5;J6y_5eKa%T_85x(-5i}jM zwP^Y#mKB*;V1Q6-pUHX0P7_ z6yJ7GV2|Es)Ar?>HZ>{n(!us*TbJ-7X%@Ej+-OS5!>?WD@j6m$zDD>)zRK4KeZH0( zj(baj`p#B_#|VJ4pdy*k$&UA4aEt1_4E4VZcb9_uDv(@onnD=D2lP13!$y5M5VB+= z$}7RXZ5wvr84TrGBcMp7t(EKG{qModcOvnL8-v^+*Ks8QQwWglMzuD9#C-_@|#(7Yo!DuF4(7l77jZWa5Hjz-sE^pmH_l}7MNu)x*gqw zlE$K#AsdFXVyI8Wj3lks$;1~dL-B>w@ZJ|AeP1Xazq4ycMbYQx54u8(2v{1nh)dK) z`*O>Oq4GzQ)_;OjctMkZNSpHdqEoN&~a{9Kb19NI$8E!1Q#u-dLr+ zQ;|}WQpr?d;`&NL)_4%nC8l3$&uYR_Gh_iyxLO@h$b?_GAggA*-NS`cO^8J=aqhqe zsaWYrUI$88gxTw@o(lrIZxYz4f8q#xP>*&ZS@M7B29~oz$(@40uzXdLOE4Zk;pfh0 zy=AO2FJF+@bk~h{lQX7*^rNEA zfU(d83_EzY|93Slql=rZZ0bd0XS(}9^u!g`ZWf@-&F_&cBby4eCa0D$ z?T*MGClf5rP~18P-sSp>UGFmCi_LJNBZq{`hk}nCG7l?<2}@Req3}$*ED6M=+0t$H zR&B1llzxg@(ogI029HQok5oZBjLUj8Eg2{gkdnIr5wIya#QI@85pez=!S#QMH3=6XmNSKAdK`f(tKu+eyic*G&NF8Vb zC=PHAh&ya(OgjqMOUN!lPw#LnLyO{`ZVPrkc6)n%eD0y!Ylq&)@- z9(KVBw~=+kvw{pW-}n)U>|mc^iim*!+) zx_C41Mkhg<#EcTZ_Gft@@ru-WTA4s5Q`{QICMN!j9sn)Wp2e@sPw^V3_@Ksu5-dQK zK*EpOkjX?d=P+v(coISzLTNwJ5G_w=qc8m#gg4sWw;fi3m^puF11r^Xgh`%{T+Le;8eS zX{mDT?z`Vrp2bL7zjSWtpi$a_Lsj!FGlCd_xz^t!uy$jH8guj8#j>>a7Bs0fPIL3I zl>h{qTm?s1db{8p8UVcGmRCulm%;wJbHLKw@<*;oiRHT4db>HRyOG>+k<121&}jO!jkoo2IqNjsc!r}*;h?X`me-TO%oKKHyrUg#Yo zWT`&Hn-s~R*b`@8^N4Nkn!C(-3b1e+m#TMIW7O%fO+T4NIHw%);O zNOkIoTX=q{S=rEZol)`S^L_ZMex+Zp#{oSCv@j^T1fyYPqzkscq0Xtv$>=m5i6e#G zTPAqMcBdVMorMEMQ!+jid-9s0iV-n0ezQ0x+>u!|11`{%MRp{iP$)uQ2g`bBj|7Fs zy=~om19UM!S6Bs86vB2;i%~D(0Wf3Y2Krw3%(5A)Xb^Kg$v!L=3v8m)SI`NBE2O$N zpd{hZYLX9Qgu(>-ZcHSNtuPnxXxO4&ct^JW7E)VYJqV=A^iyTgz9Q{o(T{3!o{#>~ zVbzZq)EKm*QDL4wLchVVlfBS#IYuJqjhafyrGk}2z8dvcY$ph+)#Bg%3RJ&1f>r+KH zTEk1r@K%18zXtR=yR&j*cIc|gqvR}&=6l+*1b00L0QGg<#P3lHH1VDdp1dCK5bKt& zOK{Bb&Xm7!4bgr_MfPrNzgZW+K z|ELC=m(*()5bFoc=x(R{u;s$qWWVZ@cE6FT+tWNQFJD}4zPMc0=riix2U6C4Ws5Iu zvB2Bg<=eyl*4D7WI#PXZ-fhK8lh7CS#WL;X<5YS=$N}c98Z%4ZmzUL&G|k^v4HvPk zNitNM759408>h8P&%4Xp;Uqk(Z#3eImGG455n9?MD zj~@5vaW9WqPu@YjS~%BGxQQ`T={G6mNj(neVH1wTWB^w=40j4>axDvAyyq$1NcoB(fWtv#;}WJ@-4+Jr0yu(iDhUK;Ysp^^4(OLF7Dy) zF7k)@d9s@#>%rEci9&oih5JKQ(<*sPtrYCE|1QuVR5Ug|m%Ab%2SBUz9J6-eHAsXI zdh!PO^OXehK%hfPQBLpol^-5d*?I_MQiZ@oYa(}Z^Uuj_c$KvTSD9-c@NKB6NTQ<= z2-K3`jyj7W(oFih36NJ*S+SOhWUpvQ>-prYD#JyI%ErFL%8+SxB+I?ZEMTZIC*YB! z%Iw7ymHoVpn_4qr0EUt&)Qa7>H)&{)=k0z)3u)J0c|4KeYrz3gSO zOeRyvKf9%C@3eUs9=u!H$|PkjP!DD7PWC!tR1v0sn$Oc!5jV{y@?=ojCkQ}58P42C z7>lsB9e8~0PL><%qzobXwQj@{MdAbI8-J)FbrmW+{De(~de*a^Ma+a%9&_z5i|G@S zSYxUY?d@Zh3;d+;Nx&=rVEGR24!elPK_g&3gXPdhbuV_J_d;+q5}4o>LIsl--rW+t zF=&eg@2B74m5II^V>W%iwh{4)^f;8X4kU%M+9Krd^@+9!g-%$++8rUXCQ0kVWTiAx znStHNZQ_cU$l8%qo+=rFTEJ7VNc%}b~Lf@~) zU3!SNyYmLf*n#WdvGjk;DDB*| z1$MG~d#I|DEIscAjn=4cZUdL;BnGP0@)crRf%!xvQVn6-x80t zx~676T)Uu0%mY*~vC@meNCQyez)A1=_7ct9{4LqMMWSdPO0&%dv}`9nv9Sl7mk$?O z97f0QEZvAUTh0v$+z{^riqdbTDZwbTaAcUrxRxY`@3x`FWbYzg_~CT%r1J2K$J@t& zwua=m25fC*_2Hk;>L<1);WJHKwig)Z=b)=D-oU`Z~Di zo)T8RmWl`rg&?LP;-PDIn2JCcs0rH_B2mLc3$#}Vp~j@(tgMliKZvvjp}7`e3eQ3n z9Fu#T&>*pEJ&ON=hESEQL-40mr^d3+4dB3l+hHBJi{&xupHpizo=j*kf>FYvNuJP8 z5HJOXHj8bh;-Q*SRtgwj*)iE=&IB{-&WD(4rs~Z@s0Zj5Oh)eNWg2~-J?uoSF<|n; z;aL_|t6{H^d87v_8j$vFJaSO!y4A0Pme}WD3CWyB{ZTgzBOfBLcG@uVgxvrh&cS4E z{+K;B4>|&9nVY}KOTK125(_xG$!k)gOS^C{P28je+7So-=Us7u& z?j)sAWbzN?0H|Tq#2Wx*2L#Xt!>(oZ~NWe))&h(lQL`Xo=i+lTm zMJmzv57iR>CYFpfs;XXp+u)^^VYBrUurDko7}VW92358aO#GRbJ@epqZz_fYKtjE3 zAd+VJN_@Edj z_G$rxeAU!R6@r$vo~hMB_AQhf%jOR%w4aBeLX=@DVtW$ z&HyGx<^BgvjDa7^*jBdsj2EnGdd2;*HOv#(2g}dMZ=`unhm0C$ZQg8fk`c#N;05Kg zirijhbW?#vwo0wKEUVZ`IW501(eyqJ^MCQ0{Q;1UEeow83cCo{-xJ9)U zc5ga>S#VcjbI&F0F9tt`L508%!D0@N#*YI;yl!U&ct9690r75&JNisNgR{`< zO9E%+QC{o@cT^$0vit;?8|&nt6fvL(EZC8hzrdnp?s3)+`kSae))q9sX&3avP$5_l z{G-*>*Swc6{Yjz%Ji6!rOQteT>(scU1{i_vAkkoiD#CpzLj&9h3w!%F*Sc=ADm5U0 zb$xr@KAfiCPSFMh(Wa!ZbIkK8@}Y=R{Mu)EEMh_^FXWNh|E^+M~XlUU1*8cA|D+gY(iuiz z9k^7=8v}tM1)dM2#t+uacKkH-vSIF>i`Mu2AndD7L6!WUCB$fz(Y8ZOayRn4oEq! zCwP@e7h-C`L+9HIbSfi|7~Ev-mZA!spDG4`0%!s^pzM&q1t@FaI02x^{T-^`s|e}9J|*KtrWG;!QtZu9i0%Khkqzxza};X*G5Of7ACCV*vpf$= zNg2TBd7|KTc}B0z6P`5c)*mPNcU$Za(f{50`ti@iDO8?BeJ#rz*(l>|>*u^QH^kZ; zI*Uey!9)Y$cp7<-{H@;A22|Kv zNyllaaOZxg^;H!9ReY}l0ww_9_V){tIkvGJrv#MQ?JSkW=d|muwm)r5KxP7cX2Q48 zAC=1-^zIP5-eTXh%Z{8%+^{I;i4FbAqjmahTR6pBtk+k3t_iJi#;VwrC_i``s{DQe z;UYQDc?|T_|KM$aPG;lj3IF=q6f4A+AJ*$W9)lz`>(y^2I);5XX+1^@hzE zPTjU(%oc{_wp}E=DeV{3Kv7@2)93CN=8NHX(dFaOpnT3)>AOM2X($BZ5t z1>Dfc74yAt?j@aK%(=g*I0&vy@&LHhe4(DTbRqZB>`{A@?2OQ-@8$#R682p-VPB&o zI4gX)QZ%}D-U&OF*??%S%Lu9;F4P=^ufy5I7;4StrFt#6g_r^C5$762j`VW`VvS5u z?ZzkSKIXxDTw)vDQRP=5(TPD$ZfCFR%!e%)N_X_Q2PbJiM}}m4PiY%|G`mU@CA=wN zBs!r_z7gBl2ZJ|6wpfKfM8Zkru$BqVknxX09wv%nJvR@{6m}QhX>ZKU#jWg~!Q;BH ztta=4QqL*1=3eG=?Jh5TTTf;|r0^WtF#lNFDvA~8XcR@oUU|GHL#BBi+JF%S(r)Po z9wb2qX5{q~IY$t`DOmue%viz)fKQ-(+RsHO()dz(iW9`sU~rt`zE#7drBdVk$(~ueM}&f=>Cy`Xlyc(| z%UY=6=Pt*)?rvDY`5VzC$X&f!x!g7l^@>-RRGpL>osHN+)bv+)WVf9NT4P)LA%#TL zEK0Fq#HRzAOoV18St}$fYd}4rN-~xgsoE=n2i>Cj+=a+92wD9miebPOveEEzfE(Ig z2zvjW!iB@CZA4;~c6;eW=P?#;Iq8|-cA#8mkoU*jSh7^F!O+jpjYmSV_JVm_2HkPV zhoM`riW2NaLIG_^&$@a+Fn_oY=A99+S6(D_=bhqx3b2f=TlWDckt@L1U{cC8F|AE_ z620l01*?s@nGQA4j(cZqi_nhwIi9T`T_MJE>75mQXBZd&zLwNJDcO|nq~hjz43vz? zzsbuY5-61AUx-x9&Ht=dUl~(R57WV@ zG7-|Vz<4y?E^(pr`3W4jLYeB=w-dCC6Gk)9c2(3K#>qfY1R0SHEnt1b-qgghFH{b! z4q23n^P?n0_v-MuP-Jr*OgmDn1Suq8s2YLI-Lpm4n0lxgCKR1l3V9%y-GOt2{s!`L z&O-YwCcV2Eyl>ij+BL0W_C#!gDY5Y0STPW^Atjx@(-@v5Pw;6Yxdz^!G?oQZeXBr1 zdUM331{b_&ayre6^-P8k6Sb>ja8IjVCzM4>OS_jUpc?$nP+17P#wOA~s;FGF^MUS&(4%b3XpFYv#fR%hr@s&|gpC5=c&>+Wgs^GyR&Z1J z(f$rV!x;A-HcjJ<(=H>juI4xz)}!+5xd1TmtNlLyu}ytA98u+W>t5pxG*VVv)9yOX@ui2t*!CV`R3A6^F>_XqZb^; zFJXq(8Rg3l^~*L<0TPYn9R^Mvii}x5^OOAZkLzo!F2qG?vM%2E?={k;B6HFRxx3nV#p%Xr8;?fXYY&i-G#5Gz!aGTUCQGDGdoL0YreTy1=! zx!hs9xYId~EGyZRyxqLKT4gvGKHUe$msyjfD!=JJIKGsY#&XzK^RK8XCc^p~CIjxf z@M^S=qPHmM$hy@)a|@po5K^I7Mk*okEfl}gv1t@;mxT7UUm>e_R}~WZ3?Z>6A6lFc zBj~A0W(jy1;m;umNKcU(#yFff@d_BY%;9tJGK~DA65V@%8k3zq8UR&j0+pxt+rc?& zaoY7w9CLoKFiqMtD*-nmIPNIT^n}H(HST|;u(fXyxtp8+hU7Wa9gB&X-eZ%ysL~TA zcZ@j%OvW6v7n4y*0AY}`t}-35%E7xfggMvwAW3m=oY@k}#ak7VeZt0M$h1D`Q$55M z4Oy8?nIVCZ)5qg=B}oHCTSV{n_E_J)al?%vs4bDMKPfv<`wxiTNY3;$XEPCk!aq_@ z2?b8dh}l3PKmtuBbw($Tpu)#Vs>7q6{;|S-+&S?&kyU>aKVT&EfKL&?&`Mvk>9T=T zQYMisvV;bU80r{Fm#|eP*6|V}@e!B6X#aM}c@e^V3KCfjCTD!BoKyoaa12cd3<~kf z+#I`hGZtN;=jHCU{@!uC<;ppjQReh0;yNP)n~AjjUcM`~xaPnHi~&P{9H7YBs{k@LqwuW(&!4L-1FL~5GP3Xo$&pRm zDIyZNzCyKvYBzv3$(ApBOl*{teLI>#6{Lo-MgdItr)-6eg@Jd{2Uoz9l2yiIuR#Fb zSFBulS&Q$S76r6^MN0-W5P=aADU!+lfpx{WgKAs17@Ff7Mah<}UZTzgefx7Tt=Os} zW^#H~+#p@jLqHhE*xs&C7xL2*!E{Iu(bRMkq4>-YeVgKXU@eV?9@J}L;0R**kn%jN zhbuWrXrMs8Co0{*!18fHyBvd(5v_)f3~et>vcGCCt`ma$N#6-FfsJvjG*;ABhFh?) zY#rWFn2F`95?|_fil48HU={`MV@Z=^U^S&$mkt9V`)Qc}h&#T#lVocN(|}2Yj`L+U z+iH?&&HF|8B*tdfcS=C51ZCsEBuT8n=z4@H^T94sLQI#&G#n2OC~lt~A~6FUVoiN@ zQD*AcXRvU2)>S@XdXTsh9og!}Vqoj3$b)%%FeP`b|<;$Yt*xhk@ zjTic!jni${!WE=K6cs~J!in)527+1cOb%S~CS?sY94C6Y4)cvI%NqGxqkx!+P05P( zq?@1yq1FvVXL=)UU(4(wW1S%{Wh~fcwZd-Jx^6}_d)0`~aHhw~Oqq?VqA8i(PU9V} zGg=`r&c_;fZ}`muH3+LJ%0`(L2L}qYT4blu@a1)Y`~8{_t@R|OL&af8z*tFVRctR< zK&*=QZRIDfUgg(*5{d^m5Z-gzJA3OSzZy{CFWB1P2C|eo6ATPKWUgehoW9VJaUSpR zwv)KkJ!=;x<+V4>zsp~lV0L(crVJ&`A7>-qKe25O3m^Q<4ujO;!`E$*5Zi%`A%OEzr94{gP<14ss$vR6}8uJW9<|$#Zm) z{p}bWunbOQi-+Ufm6e+rwTF}36xsS3|Ki;1F8Ooc@NQtpIKlU)#*X+MO=G z3i_6A5t|bj^el-M(YTwOORYrb1p4WAeQ&rHNXIs$6AJCq;}j1Ax4(*0(^u0!A$E{( zp6;L#|4=~fQn|5UC#wdWDOcKDyrHnicEerR4!4(XW=&0JjBU}{n9xt;TYXVO+y4W_ zh$QfYLIXr16lQTn){4;%GqB#$Y#9eo*MJhh*{2%#C6oI;=nnyRz*yp-$bA&1IGMRA zO#4sKIU*Ag3~#(sqHqD@G{g>7@=Qp1dCO?uP)Q{UQ!EJbvRq7xh30$AQiP9rS!#gL zu3O}On%+&MCF>Hq^&O|^ygo{|HE+b$JPMTb=Z1q+dGE%l8La3YldZ2Y_2IFGM>sFN z58^#b>_r#5!QISw%d2c-U3C%UZV@$zX-^u(;fU$!8g!&3Gh zUDp+4&nSW+CjWafJl%Bs(&^Lh5p}oHgnGm}-Am;QjTcm6qk}&6ViSLYYNN(-ok@PX zJS%d2)CyEyo??z2M9Dg3D%&lMYZDe5)x~nZqeOh|3%1W;GkQ^HxYc~FTV9$(q07tF zr8))b6sYr!3p(7Idn}tT9?p$*^v1R3AC-^v8JhgH-d$NYOO~TQr`5HO-92B9gO#|C z)Vclem|*g%R#=|&Ud1?ZAh(5z6h+KKzJcYz-lx5`tZ$^_`|HnoF%uuGYi?wF?oFTM zox3w@GL4U3^tZi755Z7zW$BtBdV}Giu5uT1G{0|uZr36f_A-n2cj?o^`bwlCAj-k` z+9c798YDr(!wCO{`l^h{3(L!me!d1$oThpOtSD4VF9shj*dL@xMeQ3{f__LvxV^C9zRVI)+7G>c*)Q9?=7K8amg zZLonvlnI#%O7V0H8DBp+6{6Nk6yqBhH3FGE7^5(shN`CcqBz4zhRGl@$XMe{d|@IB zL92EI(Jk@LMnm0T&EXiq-eDbat893nXkwxmPYnBMS6KhLMftlrovl>L#IgDz3VU{E zl&L$#1D(R16ulAzYav>vnY@$f$ZS~KN~L6!#nYRyJB$riOXT0ePk&GGYyT@AOh}9> z0+Ynt@JL06CtwQ{BP0q>niv9lK1A4(3?Et|A_GF24w^zB$#cg7lyY;glH?vJx0hU0 zE}{!ZXJzHaiqHC-h(s^|=V4h?775;?9{YK?9!v)^njt)i-diqGfDeZh0~BSN;Maqq z{V>7uf@pibA3Es!natPh>D9~2HswQo()xu}E>qX*?3t(2SV(b6q8b&gVZ>M6xH@qZ z4iD)O(vy!p{(|3EwO}+m%?u`bsT<+^d?*k9i`lhCwE89Rz z0*Bkd6qNnwVG5beCWO^BEh2TyK(wxvdyGl>7hTQAp?vIcnFX(yvvpq`Dp$0G)~<4F zgqE6QH8@nRmS@95%bg&htej=xrWc%cevVcLd`D&HhcCnkoL;z z36ULby-3P(iMw$_{#TrthCVIvJH@a4X*z%*U3|li8W4=#%-A% zjU}xk1v^~ZwK|G?nxcs#8L|ma_5ELGER(eVWlk;6%Ehd49`tzVA<=?auEA@Tg?q3@ z$*U{|z*G-nTLY;1_ks5Y{pcfjUGOjW6SW=@s2jcWTzNdit)FT{@_NGNxGqIoN6mtg z^(aUmHO(tP9KE*AJ_4lDdO0C%MC94~f<)y*RQnQ!C{q*q1}U1F*HAv6hw?Uv)@QWQ zgOIo3&({6Cv^SS4l1Av*nBSBHU^c5Cg@U@|-Rnu<=v3#zM0}`ppiBq zI+5cQ|Kl=y5gUGBz^Q-7)W}; zpOzicvNG@2<2F4O6=(7-#UXB8uEY(`w~~In#YE`!3!>M@sXV#2jB{`ZiU%-ZjFe2d z5%e8by1y_=ICh@s0JLFtMUL8Kc%*2Hj>ho}f^~JVs^at?T}V#yYd_7So+Qsgpp)b^ z0yeuTU{k3A7$-_j@|w0{OjZ?uV2?NnQGe8dF($yPepRu${AWI*#atKxd_=S1rXkwihn?4PlxRgFNBGXa`9kd(Cs)AKm21H2 zsIc++fRzS8Zce}3Ynj)3Z9-qLv+TlxiR;3`rDpB&Qhi||y^FS_hxIt3$K86Ujp;Ex z?$P639zB}o&;WKGzk^c$xmv0P9Yp(5|NlNSqsIbHFE^^>@_&35TtIgFI`sTYt{5_wbgAjVHa7kiV0l>?_`uH@fqtTe?5 z3trvUcWJGrrJqrWW5E^;hPZ2F0oY;>9yJWHNaBURt~sgibja(|9SzqXsXO8%i8Z)P zha;h9_8f#6mu+FaLlX*dEiok=|z$5WnJ7}IMC~27fNrT>I@W(wzT~kD9!sZq(*K8 zm)EEDB6Op@*2}06*7tX60PoYol+r|UJ$)QJX}q86{y@+oHUeoJ8XFlK9vi-fD4tna z{r^2t+@>iLsBBCW1#VxMD6RuHYgg-G?s`&n!P2hvFlR%Sn2Y*kqVNKNbwn`8ALb=% zC6GLbLuQ?Vy33{xJqXiHZ=sg;(#%l@x|n5JV6_!17rFD2#OF#qnyGfgdAQbL10U`b zDtMqwWUQ&~C!^PzQjyPaX%gq~B9oTK?y#1oD)tbR;i0E}{ujuDHddT<}Nj)E+DHCc+mwFD5ZG98T(c;m{_!i(}I=s04j((Wu|TAi)_m+GCxX02^$|9TQPam!=*EQmUbr6T1Cc-0yG zPVuvoXO~8=?av63yp@8giCRgp3mIHT!2t5%qTYX4w@K2&R2n@~@jb(W*Epi*1EoQt z;O|l=GeNefjT~MPUDw5fG=`}jV%jjO-(SR(Byz;sgZo7su4Gpb<=ittW45i~xccKx z$&Is*u&TanR#)A*nDRk(&fp^t^t_%&)X^a-PH;vtqtZ)v9{&n20&yIR^6pJ`d6Xs* zFxN5)K~_e%ESx2R%)8E=CwcGlxDMS71M;xw1qqmJB>C`mqN#wV&fugp9sb_BEA!$8 zl>%|Hbb*b10J1|9jWfZ>j5zx=w!HziRrVppSwmOX3_l*->SD7|t(%FpeGbDQ2ig^$ z(%@?8J>lkw9X?562D9l6m`y#@0*Wr(-^+_eA|GctGJD(3Mu=hXamU0{t`qrJ(NC)j zYGV>j&N2NIG2R@zc1zakx$Yq*ZaA3DH@fG|@UQPO%vLbqU;L$yTs@*!65}xdX+q_n zQ3nJYaSdAR1KCLaKS}0mj3m9rI5dD3y_-tzR*NLxpahJf1PqN$jFq^NE0!6;JpQ5O zM(c9Ds%Gmy0q#kD?f3KO@-kV-F?Sb|I^th3d99Q@YrojnU|Sf*hcf<^F*0(7g`5?^ zu#k_-LO!Nq)jhA)JRLqFIDM)9SccBLAK0+tEM67yWhNTc5%D3mudgd_kB z3WLqKGO}XHA1#!IroVv5YCK}`7)!RNnbhWh$)`g3Z8 zBm`SweM+kBWS1&y>|DnuZxd;i%bn%|G%#3g*BFvMPF>kaHk^H`a~{+82qvE5(Sw8m z0D-vZd=3N&m(O#M1c#_(d{pS6pGWm30x(I9Tuv8yY_o5GGz}rXsu$N?4iJkxuO0|6 zW$Wp34n?@--$Y<-ah;YkH-9!e<_MUTKee1tP5WcHzAL4p{$S`~{XTdD0AXgLwfC@@ zu$pIRApE$tV%0AC|!ZI@qs8{{Te$QL|*fPS6mzgbT7~+v8_5sSN^8 zYB-YRE;6wpax4f;gh!5KraK|kO()H1$*H2KFaUOf`QBL zR~&^7iXjI1)BZzz`QPYuW*e7VdlkFO7p+w+EYzA+Ruilsd-KR!SE|@$o>`6l5vLe}>b{gbh{! zt(t1q6l8he-IB1l9cNeN+)e>|8Y%!^HjwB1O~7+HSd2JvPxDLFc{>U87?TcbavwdB zE6wJT&k)o6c*2@XHLX6z_MY{OElpM286JZKD4S2eMl&QTDur+_r0D-+jO-hGh>(#{ z&BQ1_&o)a@>!^}ukpsf)T}HTT#Ser>7C}JTELKGtd?+?)uYJbbeu(nvi*xg@*nupv zaM}-JcWqg*1VRoIZ#mdPNbq(g&^=rdb&eZI#S~GB!a2>%Qp^>vIAWztY5zymHgh!RM^ww){L{&El!g;E*2Jg+b{bXKy$or!!G&z*vAUB@S{=@F z-2U3|)voXy+9|wvy7l`o=-&?p&2UU+V=2zuN6~jP+ClTaN}*>A`+5jJ%v9fV`tcr( z4}L9Z4vF1cVaXYO@myVmco%D(`^aPYkaJKLQxiF2vNHs;)%u0pzc~?}TL* zDbFE3F8H==a~&IHtTy`93DJX7`c&Y9_h+Yl`O-iUPO`wa2w?A4cT2ieijyXW*)M0Q zKvx;=D(vKxiE+-Eh%4;3kmCkjeORih>-v@W8MxIp!3OiEt8ar<@gCrd*3T45fa;qn zGha`TN=iS%)BV0qd{?5_kM=NhyeBtw`L4A5jH;R>BOzx3<+aT=Cv)2^^N6Qkr)I0@ zpXxRHFVb}SL7MUzHAOTUol_k29hr~+6j4$|1rY6@U}{5!6J03zur$_UJ(@`DsF+t0 z9W<~=>PWKoh7d&hn}r<^dT#!*$VrW*3rL9_Z`UW^XUBdZjy(YQoK8bh_*ie+xTP4o z?DsOZtsQn)sdR-zoRW`)YB>3{FWtOKix>ryZr#qnh0=_W5o4`hRLf#d z8w)fL-Veh2*`)S^77C!)Y~tilES=v5Z{YH3MsFfn`;mO~{HaIplXeh(Q3;F*`ukM3 zXzzlb2`I`%8TVGx{yL`mgPry{v%pJbSa)*xSWm2c7uQFKihR?H3^j4?h z0A)8L8*{dXn6=O;UCYTU4Tu0tgT{%fTpLYW9sQ=BOyxbne6d?!MHvee*q65(-`JV# z&2mwsaxt1)lZW(Y8-L8AxEGWT2}&20J~#hy15Q>Rr|qd|4JJVyv#3UUG~EW?cR5Yx zs$@Zbp$~Vl)5Od&PcRJs$KqM$Q*9u4rQ0-Q+Op4XeTq%1bA#mWQ24c@NA%8DyLC5W>M9-Y*jc*3 zIa-~?OBO9^T)-`60vF@~wM?pdC`xj_^o;f=c?6>bF;>o*SL)pzlR2Y%vkcfPO34K= zK>6BUeKj`^(iJ>Trj>=(gUMNl3X&qJWEJP-oe#_ONNm=!knx`BTl=i?L7oJ zt94cKe!hVvHjBiHt`^B-PxXF5@Uuv|E2nwzBb|w6}9v@PI&vYJE$fHtHXh@G4W%(XGG&C-_^U_K^T~*W` z<$!!t0)0&h|VD(>|w3{%fibNmO#`$j~mNR4&!#c{7_0%O#GO=I>U# z#%0smy_w5{N~L5MM@j3>uPsI8yn)~D;?1OwaKP3Ue99*&Z#NdxDP-O0!W|{~m@B_Z zn=0H$iF??_xT|n$iLLio%5Fw2-Cnv0JJ}7Yhg{n^`eU-lrQDho0ge<9VL1f1wE!~JRcw&=&!1U(P_?pmL6 zPWBg>$GD#)Zlk_~5?c@}IF}vfOFuiitmnZvsGE;rn)qPsY~59Obvu7{6<#&pl6c|D zU|`)(Xc1o^HY{_ZPb5bRy15a1B8O7p8dJU-oO`?J3tO6YiOs+|#8co{5BhS5p;yRd zUhTecTP`i0N;nd|0}JupC2C}m*8OtfRTil^pNd0U+)F5U?@LPO%f8Qcwx)}(?$rn6 zQj2bIb9SjPHIw4`hfLDA(IOR08wn)BJCn{nPXrUi_lzA3lH#|B*&z{J87>wys5kjg zRnpjjwNnSCBK$?ZE6LCqZvZs*zORm7K>1ATGRF_p%8#wCvYD}xb2<*$TrICGzJziN zb?7vzamEKH`d+Sf8qMW6&ASREu!bo5YR3080eHS3+Po`xU6bu_5e2m7SILh#P7!B3 zb!KG@f@7Ak*7L;~U_CoT;RGdDo69dWu-S360Q=U}4!4uD3u@jNsSBVvjFcT@@erW4k0e`o#gX7OrpVIoT5>>KcJm{ zGoe}2?G+}<1lO%T%wRFo!5_`8zLlJPhY6@#>0j4Fj4OEKZzyD@DVd-%$wMvfT0<6S z%W7jSZBkX7)G44aM`>5i*K3Uz(z9v>T4T~~t#*7Xm)jk!al7GF?Uwl&YjEi|r~r}K zTC~WDm?95%YvE(}%)7%eGsjZov|#W$ z{FY|6)3#zX1T_7kdB3m7sI0cL67HSP0`Hu6Cp?qK3}<0`&{Ph9`|hL-$Oke~nF1`- zMF8F}$U8|=GNhZww{6~DxLfxF!EtlmdQ)jP9CV`i@xm=d)|7^BuhMCViYd488!zJ`(3DZY^mA2$N^zS{+}XK-|Z%! z)Q6f}_`{3M)r$t#tPUZEfInGwEJs&>*s1WwU%ZeN`zOj~f>7@q%>&2!q}3B-2%P&p zLV*PY27mzR3pD0y1@02J6E-UD27G1w+zSA2F}QQuAi|%12MTv!6s@Ya8mdkFPVsC1 z4ut@INY>JK7rEvt;F~DiaqP3!jeyZjYuoqDh<=7}^6Fc;MfnNQU>E^u~1`K;-v%MP4Ql+wD)M zAQ`486HK0l4?kEs&b!tW9BUC#7_%+|b_X&zHZmBz`%0kaWN>U`0Hb5{IQD(*fKbMM ze->$!l|Xc>&`Pm7T@HW0*jk zHDCIdy)3>W;{q11tA(?)G31awXK3@}3>T9}ebH2~${9c-_U{57N!rx_`;IQ_Fz)@5q0g!yq)d$?4nCpmqfPmn_mn79HZla6$Ihmc$4k5PfjWl@~w-0Or*) zLEqZRH|IL+g)pz1YC?OcLRq?Dl0R6G&>vJl>k%laE}Ik$6`gZu^4qkfCg1g9gCn9u zophsY@8G)7oPE%Sy_ziU#zN_{l+%-gEJImhG__Spk<9{!HF*gzlG8k6=Xe<;8M9Dl zy05CK=4Ui#S^!VDL{hUg;pg3Pol;J`)TZ{&F~v+n!1|T-EScd8lYZ#2uNTCKAXTWc zZ*WY;&Hl<%%sT6ktySj0h1Mxl#=x~;O^EYptqgLn4K70Cm|KGt)vbh1P9YesA;18`;@Jb*9)Jc)s#3L}ku*;~p07 zO_i!=z>ZTF-gCY@6K8_W$U3N#Ll5a-TTW)h4-R!I`|cbEs?|8H)d$@mEW7+FU}R&6 zkljk4R1`rKAVm+v??6l5eNh&z9g=m^S&lEi+o|LuZtb=VtmfvY?GD2NV?dTW>3%*8 zI(NS&l)EOW@;En(Q471At#+0_F0a(MH6#3G_9=OeHiv<$EB^Ejgdv)dKg&jUIK5#jtV#1#dF&N#E3@Rkpk@~8(EIXt_dkyNSyVA!qcIW1O21yAZ{XoE2+QwZQ=ApAhsbi| z(sJj>xyFSfI-H^LLj4F1;<~51O~i%8OPBIL9G%%dq6@^&HJV3kPB^^sk{e=j8DRBm z8XdEl(VfU(nEu9OblW;KjY00#!sh%}+)SEMjv}S)A$A9sl>%BxQug!Z>JVZ5C@{=( zuMq%9u-0hH&j$@_j5$#N6QqrC1u!jwc_puFufw{wQQD9QAZc36LbwI-#IX=}q7Vv& z22Azhf~rkrH?WBT8f9`8RMBO|T47kNXE&>PxgpT4i<%iT!c5aWpRsnROf2_D%F zyoO`vA1XoEV_`s(Ggn_~Trv(C%>C*{h90jrzy0wGqmHQhLV(hgnl8joMs*3}zQP^F zRI_FbIcx*=7_Y*TQ~cWh6Axhs2Svqc#~CQ=4|4w$WLaR&=VEgJf7pP{x-(vRRt3z> ze?VMP;yR~ejRfO@Pg$k|FAA;4Ko(L##dWEfsHW7&o=Fjpu@%6Q0?~Y*Spg5z+#dFv zYrvYW!JxRA!Y8qN+zt1h$8?R4`zP~3752OymcKoj8l1E@nPMv(oVj`wrW~PTq0u%K z+oP1-zm_U-pB|_BHV5h{#-a)~$tG*U?dI~r`R3)NT02aF?-G=4B{dn_ ziL2P2AsxCm9Iz#{5yBX@UWGQ&6Z#I6393vl#9PC?N+1Gc1r;I8iWt@c)PE}$@)ZJr zipava4FFd_j&z1^;w6|X>tiYyBqjbR&LaY{(9Cm;!I)f0a4z$`MX77 z*JiwwN2ftwE#Tq$fSH7`+nif!?k^p0wXv1VbE-S5L*B=u0@N`h_5v$H%mC|zm@)0; zsq^3`w3Nq6QEgL#+=LSn>v>l=nS1RUl{Pp3HCEd2P{N`qokiy#+Q!LB)9&Y4)YQp> zzi5XF)}f#gw1^D2c?SOPR*P9S)RuIbHN=L#Ex8P3J;y#zjObn+(>P5oL4%x-B0;Lv zaAw!~Mbw>Nq%JcHyc&WU~c{fsDDrJ@rq}?06 zgWPav4(D+qJ8?HRXA_1Fl$~SDQEs4qH^;j2TvaGf90J$>aixtrC)6Woh{#W5^A~1I z#%oAiPoBr)>vkdRS@PFOlqw5UXTACEO|N@q6^_GRV~?I_v!=5H@7=olc$hatcJ zFkgq`C4oT+OY3g{|AZ)o+Rj;O-$C=;g}fVQi1!v*+Fwgp^RpCWLOJUeDWjKM;#F~P zUp%sS`ITyy%Cl=nPNW~5om7;o{aJ;qaY=%+w@6wcC6y{>fzC@^c8eHN{6AB+l5^-` zqW)QhKB5E}%Hw!vt+HtI7*{w+L}MqJfFTyP#JLw(V)Wi@j8E-66FGeCWBQlK;i=3a zug4@<-&9Ekpyf~yLM}0|^+|;=G7hs1z0NkN+ts+QRa^g1#mgeJ(x_gH%G-X|+Eo5- zu}5VLa$4{L13a4CDYo3c+-A)#@N+@<`612L`$$j5wy^w(MV>_^mX^@>I4PX}hv7eZ zcX&u#X^EldyN0#cYR=|bOWjutHA96~IP%-oG7u$0UzQTPS!URb=veHCmLaK=)O*0%}fk|W5ha*2IxcS>WBEcpDNj~AYB!C(ymjM5=IYsMj|;bc{tK2DwB2Ji|dIkRb)dCkT<~&0Rt%7aX6%tIdP}Ffz34;&1z(0xj?)K46fW zm#peC#W8UL4K|5NXpJbE#`?uFx0k4kh#r7TbQhoj>^%KJBs5zP3;+f-7O4v8(PYO9 zlMQ7*V{-E0u8E{Bwdp%i~(kqDW%K#1dBU^`{<(%U#>uk48O!M0vDx-`{0KK7#7lvx> z9nv)C<_~CmH1G(P(Hgxl@_fuYaMt=RghG01lTEf{&Maq!%^wow5&;QIu{r*dUQ423 zLwfMb*!x#1B(Wu|1F1s)mPecku#ZqE{Yw?@lbu!SlZqSXfuXjJ`iMQWXAIS>=qmybyWH=HdK+$30xkh3)97|rA?%d z7N<))N}F(y?B$GHlf@V?Yp@=$A5fz`IM)nVZzMfx7Bn(q^gU>^3 z_+tWV|JdxsBzbQBZ)fZTJcE6h-UVks(`m4X`SrrUEM;!~jF&&3Qrz79jM3d#N(M0m zzVyC1naCiGPq&@~q@QHT1DG;bdNYA(y_-c&=#gq+#mTI$uOTdof|)u29<+!&@-)}| z;%kS3>fELtP<6CZRZBz58~sr0r^)`)SmWf1CnjsSGL@D0)LoeVEuvIH@;Qeb)Cu{g zPY~IFL>o0sqD`!`{=2a3m#LghAGC95qYT>!dBj3&vVC-ao?8GvPm zDGGyUU$Vpx*+`H{q4C*v1;jHgp;3lmh-GlT@XC3wG+hoy7Wt?mpU96Gg`YEben*sY zZvIyVhZ0lr&k%anZVW1m*D8Vv4Fq7!PYSJX1zNts)!5(%uxKaB#hf*W3u1sUn@|`y zq$daf6tY=7l6Gkc6;Q_-H!&?6RVg{S1YTC-}SW>-2-(=s`q)WLC0>)LX1wT%MLD)%3#Q} zpGPX*aXR!eL#Aohe)0s%u#<-&(u~5iP~0N*{(+vp_?3zqqAYWjxEaWhdsGSgc=(7% zH!C-3#`c3JmEr%;#8>i(C<_|Bhy^0x}$Js zQJ}re0KP-s_fS*$*Gnnej=MVkP6|Mv zo{&J>)BwxLsY&29VtCy8;y}l8I}Fa@3l~~jM@m+wA=@utR_L3Bp&RDPf%>QTXI3LVxYV@YwCy27Mnii20iPt?Ao~Td^Pi( zJ*649!FTjV7FxH;%ai}REw8H45}muVx`TO!*f6EJ#xxa@FSF+u?3q*D>D!%Y8AR$M zvP`c3cv1_flbYpHGATDt5E&z;-iVy!4U#cMilsh9@qTR?d$Y3jhngn$QUZ1POBEf> z^;eMCNeJDfpsTMz`bxE!3POh_F*no2?mqyk8z%5YU&(+ydhOR_Vx61+1^bu&tc$+p+6MLOogy%(GTI2hTO@J|Z?j+O3<#!h-b2( z^XwQQ)A81Etj5;o4Uf6`RU>2(EMOrsN2>fB(hgj!xQZCbqeQbwGf5pjBu_+%@qQ5C&%H@B+yJqJ=OP z!=YA-;I~=QeZiCN69pMf?gL|48@UfE1{^@0*6t2TY}RX2ET~YJS5T4eL{dN(JW6E= z(y!1ufn>r3bf!f_WUCfpv1b~veescYfGtZ7FEp2YrRO!*WPAKioOL1JWI~fCs3!|P5mA&3gLUz~8j}x+5>uIERmoFY>dvGwq1@F0$JQeQ-4WU`v z{ulm)Pmh^v4OB7_Gio<3aSl-{^JV`!fq}goy;pF5w)A~R(I?H3*s}&+;RW&>@+llZ zZVnG*o3)Trp<3HhTO@f)Yyj9kCH$)5o>b^J2}Q!cR-7yp7IQDw>nmJr zgS7<5v>bv4x^?(}>p}`z{toUJ+EJ48-EPjX68`t?ZQ?CM2cwLD=yCSPRJ>(a(tb}^ zH|Yf$vE--j&`;4Tuf@omPD1+I>gI3gVY&WZ3Kbi|@hh9DyggP!L?YDDQ~bQQ8kk^?rzBk%Bk)?IT|q_`988OEp6XHdqR7lt)M2TrdJ#7-xyD>nyOyGm*JK_Ky1a+OWto65+-1iG+G1wx z7V?ifGv_OT{3XYOIQ(<1!ZEaEsj{qEry0qiHd0Y}rM%y4mNA-K9rFo=(%B&z+V}gD zAl*-D?=Rw~`bZwcEI1FG1(i561YkP9x+%G8lM&LXvW&%!A(4r;rnXCMD&^i??79<~ zrKAeVOufoMmtnc_P06*(EBF>R+im&2pbD{{GJO~2SnAhIY~p7Ev;30|?ZqWNgEVO`#p{Mx*i<<~mVR(`KJx*`%@ee>?X5zP9ho%P` zm=(0!B9or%e(`H6ReV@$<{|uInUqk}gq^dC)AlCc9UkngLbCE>R_eIK$RXLP9nPUp z>@f6L!nT|PR{BMFD8oYfPD(I;0X-GLC=;QB-kp4axAZkVWJ;RlHvoQ;{tSe6+UI82(h@5{|0j{LhO=Lx@8L09QfQwZp8ivc(@ds**k-2@^4tt+$o~@s?g8#% zl^cm=KTZsiSJGcr^@d2Y=l}$p#N*)N04A;uZ^vmTUZIi15`KqwDD-Q3*fjY|grcO| z6!)LihyRC1z?5jdVM@I;fN~!)u7?7}kYzL^25AXz>CdY)ei_zzZ2qH)^w_9BF5IIO zb^Wp8)|K6< z1J<{FX(CbjdvsxzU-IW75gidgs0UR^tQt8jVG17GA0Q^!F318MmgOBHTS#65YwS*a zCH($irOeqC+Jb}^XbL_#h1m*=5s;+46?R1=+ec-_iVwCWYj+yeWeB@P_9_feTHwX_ z@4~S>3F)Oe{{$;x-DYvb2kJOyf2{rXFtRUSi4k$~6w=7g4HvL?)zJ2MxfSk(#S8aF zik$HAD9$UR^8=LPQ&doTFn{rn@!83m8Ir{FF#32?K`2w7hSIzmr z4q{uK!r7-=Kf-}=(JZwXL)|Ejl#Id;B<%W*QJ9oIC#)_@!-WoBcfhR6u+O}?C#sGV${~y_I*3=y?JkDe)F4o^ZU(jenSCX z79~Vnc{foWL{M^hw*n-2lNQkD5Aw9-&z7gd1wMcH)$;`%R9(t#N^4z;CP_tJ>X@7x z=t$|9oXdENQB{vzpO^uv$fJ5(Ew?Q417hG8T)GaQ^Bp?{T!81W*DhtDh4 zh~()^mtN3yu@YRU24|EbeeBeXs@h+F>e4If7BhEBmYmSWq+#JY? zTOFi&F@3Y-|3IH9{2`^hJ$GuZQ=?UM!#pbn@;`MG39iN0fGq+)_#(p6M^Wwe+*=uU zkeKYbzt+EIK1}>Fapb}2r%p)1IZpEYP~5#bYI(tl_Fc5JdkD8pkpMY(^I;w4oRC3- zq<@WCY@ja}bD~N^-)IG9*l8niKQPDRaxyw!P9m6>@;#J2qa@j`!lyVvirWDE+9xSm zXz8?%Q!3`}2FW{9Cdx`AQhK@=B+{;gAf0|CFx~!Gg;V87+rE>z*9nyfL0oUUhLLjI zE){s0^%y3|v5CPZ{FZs{dmsZC%}rds!W$@Qi!7YjI8?bY%o7R(@pFJl9XAP~HH^O% zjExdn2Q;OQ_*Xqvmov0bSduVeI#GV(eTWiv7fsb;Cg_-qYm*QD6bZx&zb08gMlU>j z-mJy@i0}h%+oO<8i_Caiy)K>0`xGpk@KSt>sJDS1fuDe%foFITm0`rw!snT(EiJC1 z?kG;e7E9*{Ta%T@G^Z1xW)p1kxiS)H&a0ZrnP5{i+as;eTH)8156dK@vjYQ-Txvv; zJH~F9Zj{&~QG(ErYypwuv0oZcoRToBb%dN(Wj|k#s8lj}KW}0aYYd0l&hs5PTaj(j zeX1a9`?i>E_q=sfuN3cc*C;(eYurM*k#<3@9CykFvDrzWBcT4wF{_t!yefbW!^VI` zN%y{^s6C>HHha4dBX_Aak5*1%ZO@XudHifuX3u#o=*gqA)3O1Q9|X;1c?%C+nwmbn znLAS+H)+|0z0TUdn{Rh#}wUsu*E%kG^VZl`Mz^rZFTx+ZFauqo3QTH*TF0hN(0nI+y}5% zOuP}O0QUmhfUUrO-~jM2a1eM1I0Q@r)-_ENngy84isyl|z|(+K!a;lycoujbcma4x zP*QhMM7r*-0sA$0dD|r0hfVy zfGfaN;9cN7;1l2m@FigLaNiN)0~=?7NRV_)7FvehsarX}-|r#6lW^?=0k&ZJ&0q`G zo9@MV{D>O}D{fGsoXfhZ8w{hcH_WzxG3xTU611Rx+u!I4e3<$q}%Hv zZF$zMIDKKJFCUF-56`kY;Kp3&DsG1>xRRR)SB5=df7*P`h3li;uIMJ*= len(self[key]): + dict.__setitem__(self, key, value) + else: + self.names.append(key) + dict.__setitem__(self, key, value) + + + def get(self, keys, new=None): + """Return a subset of the sequences""" + + if new == None: + new = type(self)() + + for key in keys: + if key in self: + new[key] = self[key] + + return new + + + def alignlen(self): + """ + If this SeqDict is an alignment, this function + will return its length + """ + + return len(self.values()[0]) + + + # The following methods keep names in sync with dictionary keys + def __setitem__(self, key, value): + if key not in self: + self.names.append(key) + dict.__setitem__(self, key, value) + + def __delitem__(self, key): + self.names.remove(key) + + def update(self, dct): + for key in dct: + if key not in self.names: + self.names.append(key) + dict.update(self, dct) + + def setdefault(self, key, value): + if key not in self.names: + self.names.append(key) + dict.setdefault(self, key, value) + + def clear(self): + self.names = [] + dict.clear(self) + + # keys are always sorted in order added + def keys(self): + return list(self.names) + + def iterkeys(self): + return iter(self.names) + + def values(self): + return [self[key] for key in self.iterkeys()] + + def itervalues(self): + def func(): + for key in self.iterkeys(): + yield self[key] + return func() + + def iteritems(self): + def func(): + for key in self.iterkeys(): + yield (key, self[key]) + return func() + + def __iter__(self): + return iter(self.names) + + def __len__(self): + return len(self.names) + + + +#-------------------------------------------------------------------------------- +# Constants +#-------------------------------------------------------------------------------- + + +# standard codon table +CODON_TABLE = { + "TTT": "F", "CTT": "L", "ATT": "I", "GTT": "V", + "TTC": "F", "CTC": "L", "ATC": "I", "GTC": "V", + "TTA": "L", "CTA": "L", "ATA": "I", "GTA": "V", + "TTG": "L", "CTG": "L", "ATG": "M", "GTG": "V", + + "TCT": "S", "CCT": "P", "ACT": "T", "GCT": "A", + "TCC": "S", "CCC": "P", "ACC": "T", "GCC": "A", + "TCA": "S", "CCA": "P", "ACA": "T", "GCA": "A", + "TCG": "S", "CCG": "P", "ACG": "T", "GCG": "A", + + "TAT": "Y", "CAT": "H", "AAT": "N", "GAT": "D", + "TAC": "Y", "CAC": "H", "AAC": "N", "GAC": "D", + "TAA": "*", "CAA": "Q", "AAA": "K", "GAA": "E", + "TAG": "*", "CAG": "Q", "AAG": "K", "GAG": "E", + + "TGT": "C", "CGT": "R", "AGT": "S", "GGT": "G", + "TGC": "C", "CGC": "R", "AGC": "S", "GGC": "G", + "TGA": "*", "CGA": "R", "AGA": "R", "GGA": "G", + "TGG": "W", "CGG": "R", "AGG": "R", "GGG": "G", + + "---": "-" +} + +# codon table specific to the Candida species +CANDIDA_CODON_TABLE = copy.copy(CODON_TABLE) +CANDIDA_CODON_TABLE["CTG"] = "S" # originally L + + +# make reverse codon table +REV_CODON_TABLE = {} +for key,val in CODON_TABLE.items(): + REV_CODON_TABLE.setdefault(val, []).append(key) + + +# make degenerate counts +# +# example: +# +# CGT => "R" +# CGC => "R" +# CGA => "R" +# CGG => "R" +# +# CODON_DEGEN["R"] = [1, 1, 4] +# CODON_DEGEN["CGT"] = [1, 1, 4] +# +CODON_DEGEN = {} +AA_DEGEN = {} +for aa, lst in REV_CODON_TABLE.items(): + folds = map(lambda x: len(util.unique(x)), zip(* lst)) + for codon in lst: + AA_DEGEN[aa] = folds + CODON_DEGEN[codon] = folds + + +# substitution types +SUB_NONE = 0 # none +SUB_TSIT = 1 # tranSition +SUB_TVER = 2 # transVersion +SUB_INS = 3 # insert +SUB_DEL = 4 # del +SUBSTITUTION_TYPES = { + "AA": SUB_NONE, "AC": SUB_TVER, "AG": SUB_TSIT, "AT": SUB_TVER, + "CA": SUB_TVER, "CC": SUB_NONE, "CG": SUB_TVER, "CT": SUB_TSIT, + "GA": SUB_TSIT, "GC": SUB_TVER, "GG": SUB_NONE, "GT": SUB_TVER, + "TA": SUB_TVER, "TC": SUB_TSIT, "TG": SUB_TVER, "TT": SUB_NONE, + + "A-": SUB_DEL, "C-": SUB_DEL, "G-": SUB_DEL, "T-": SUB_DEL, + "-A": SUB_INS, "-C": SUB_INS, "-G": SUB_INS, "-T": SUB_INS, + + "--": SUB_NONE, "NN": SUB_NONE, + "NA": SUB_NONE, "NC": SUB_NONE, "NT": SUB_NONE, "NG": SUB_NONE, + "AN": SUB_NONE, "CN": SUB_NONE, "TN": SUB_NONE, "GN": SUB_NONE, + "N-": SUB_NONE, "N-": SUB_NONE, "N-": SUB_NONE, "N-": SUB_NONE, + "-N": SUB_NONE, "-N": SUB_NONE, "-N": SUB_NONE, "-N": SUB_NONE +} + + +# hydrophobic / hydrophilic +def hydrophobic(aa): + if aa in 'VILMFWC': return 2.0 + if aa in 'AYHTSPG': return 1.0 + if aa in 'RK': return 0.5 + return 0.0 + + +AA_PROPERTY = {'A': 'weakly hydrophobic', + 'R': 'charged', + 'N': 'polar', + 'D': 'charged', + 'C': 'polar', + 'E': 'charged', + 'Q': 'polar', + 'G': 'turn', + 'H': 'charged', + 'I': 'hydrophobic', + 'L': 'hydrophobic', + 'K': 'polar', + 'M': 'met', + 'F': 'hydrophobic', + 'P': 'hydrophobic', + 'S': 'polar', + 'T': 'polar', + 'W': 'hydrophobic', + 'Y': 'polar', + 'V': 'hydrophobic', + 'U': 'polar', + '*': 'stop', + '-': 'gap'} + + + +BLOSUM62 = \ + {'A': {'A': 4, 'R':-1, 'N':-2, 'D':-2, 'C': 0, 'Q':-1, 'E':-1, 'G': 0, 'H':-2, 'I':-1, 'L':-1, 'K':-1, + 'M':-1, 'F':-2, 'P':-1, 'S': 1, 'T': 0, 'W':-3, 'Y':-2, 'V': 0, 'B':-2, 'Z':-1, 'X': 0, '*':-4}, + 'R': {'A':-1, 'R': 5, 'N': 0, 'D':-2, 'C':-3, 'Q': 1, 'E': 0, 'G':-2, 'H': 0, 'I':-3, 'L':-2, 'K': 2, + 'M':-1, 'F':-3, 'P':-2, 'S':-1, 'T':-1, 'W':-3, 'Y':-2, 'V':-3, 'B':-1, 'Z': 0, 'X':-1, '*':-4}, + 'N': {'A':-2, 'R': 0, 'N': 6, 'D': 1, 'C':-3, 'Q': 0, 'E': 0, 'G': 0, 'H': 1, 'I':-3, 'L':-3, 'K': 0, + 'M':-2, 'F':-3, 'P':-2, 'S': 1, 'T': 0, 'W':-4, 'Y':-2, 'V':-3, 'B': 3, 'Z': 0, 'X':-1, '*':-4}, + 'D': {'A':-2, 'R':-2, 'N': 1, 'D': 6, 'C':-3, 'Q': 0, 'E': 2, 'G':-1, 'H':-1, 'I':-3, 'L':-4, 'K':-1, + 'M':-3, 'F':-3, 'P':-1, 'S': 0, 'T':-1, 'W':-4, 'Y':-3, 'V':-3, 'B': 4, 'Z': 1, 'X':-1, '*':-4}, + 'C': {'A': 0, 'R':-3, 'N':-3, 'D':-3, 'C': 9, 'Q':-3, 'E':-4, 'G':-3, 'H':-3, 'I':-1, 'L':-1, 'K':-3, + 'M':-1, 'F':-2, 'P':-3, 'S':-1, 'T':-1, 'W':-2, 'Y':-2, 'V':-1, 'B':-3, 'Z':-3, 'X':-2, '*':-4}, + 'Q': {'A':-1, 'R': 1, 'N': 0, 'D': 0, 'C':-3, 'Q': 5, 'E': 2, 'G':-2, 'H': 0, 'I':-3, 'L':-2, 'K': 1, + 'M': 0, 'F':-3, 'P':-1, 'S': 0, 'T':-1, 'W':-2, 'Y':-1, 'V':-2, 'B': 0, 'Z': 3, 'X':-1, '*':-4}, + 'E': {'A':-1, 'R': 0, 'N': 0, 'D': 2, 'C':-4, 'Q': 2, 'E': 5, 'G':-2, 'H': 0, 'I':-3, 'L':-3, 'K': 1, + 'M':-2, 'F':-3, 'P':-1, 'S': 0, 'T':-1, 'W':-3, 'Y':-2, 'V':-2, 'B': 1, 'Z': 4, 'X':-1, '*':-4}, + 'G': {'A': 0, 'R':-2, 'N': 0, 'D':-1, 'C':-3, 'Q':-2, 'E':-2, 'G': 6, 'H':-2, 'I':-4, 'L':-4, 'K':-2, + 'M':-3, 'F':-3, 'P':-2, 'S': 0, 'T':-2, 'W':-2, 'Y':-3, 'V':-3, 'B':-1, 'Z':-2, 'X':-1, '*':-4}, + 'H': {'A':-2, 'R': 0, 'N': 1, 'D':-1, 'C':-3, 'Q': 0, 'E': 0, 'G':-2, 'H': 8, 'I':-3, 'L':-3, 'K':-1, + 'M':-2, 'F':-1, 'P':-2, 'S':-1, 'T':-2, 'W':-2, 'Y': 2, 'V':-3, 'B': 0, 'Z': 0, 'X':-1, '*':-4}, + 'I': {'A':-1, 'R':-3, 'N':-3, 'D':-3, 'C':-1, 'Q':-3, 'E':-3, 'G':-4, 'H':-3, 'I': 4, 'L': 2, 'K':-3, + 'M': 1, 'F': 0, 'P':-3, 'S':-2, 'T':-1, 'W':-3, 'Y':-1, 'V': 3, 'B':-3, 'Z':-3, 'X':-1, '*':-4}, + 'L': {'A':-1, 'R':-2, 'N':-3, 'D':-4, 'C':-1, 'Q':-2, 'E':-3, 'G':-4, 'H':-3, 'I': 2, 'L': 4, 'K':-2, + 'M': 2, 'F': 0, 'P':-3, 'S':-2, 'T':-1, 'W':-2, 'Y':-1, 'V': 1, 'B':-4, 'Z':-3, 'X':-1, '*':-4}, + 'K': {'A':-1, 'R': 2, 'N': 0, 'D':-1, 'C':-3, 'Q': 1, 'E': 1, 'G':-2, 'H':-1, 'I':-3, 'L':-2, 'K': 5, + 'M':-1, 'F':-3, 'P':-1, 'S': 0, 'T':-1, 'W':-3, 'Y':-2, 'V':-2, 'B': 0, 'Z': 1, 'X':-1, '*':-4}, + 'M': {'A':-1, 'R':-1, 'N':-2, 'D':-3, 'C':-1, 'Q': 0, 'E':-2, 'G':-3, 'H':-2, 'I': 1, 'L': 2, 'K':-1, + 'M': 5, 'F': 0, 'P':-2, 'S':-1, 'T':-1, 'W':-1, 'Y':-1, 'V': 1, 'B':-3, 'Z':-1, 'X':-1, '*':-4}, + 'F': {'A':-2, 'R':-3, 'N':-3, 'D':-3, 'C':-2, 'Q':-3, 'E':-3, 'G':-3, 'H':-1, 'I': 0, 'L': 0, 'K':-3, + 'M': 0, 'F': 6, 'P':-4, 'S':-2, 'T':-2, 'W': 1, 'Y': 3, 'V':-1, 'B':-3, 'Z':-3, 'X':-1, '*':-4}, + 'P': {'A':-1, 'R':-2, 'N':-2, 'D':-1, 'C':-3, 'Q':-1, 'E':-1, 'G':-2, 'H':-2, 'I':-3, 'L':-3, 'K':-1, + 'M':-2, 'F':-4, 'P': 7, 'S':-1, 'T':-1, 'W':-4, 'Y':-3, 'V':-2, 'B':-2, 'Z':-1, 'X':-2, '*':-4}, + 'S': {'A': 1, 'R':-1, 'N': 1, 'D': 0, 'C':-1, 'Q': 0, 'E': 0, 'G': 0, 'H':-1, 'I':-2, 'L':-2, 'K': 0, + 'M':-1, 'F':-2, 'P':-1, 'S': 4, 'T': 1, 'W':-3, 'Y':-2, 'V':-2, 'B': 0, 'Z': 0, 'X': 0, '*':-4}, + 'T': {'A': 0, 'R':-1, 'N': 0, 'D':-1, 'C':-1, 'Q':-1, 'E':-1, 'G':-2, 'H':-2, 'I':-1, 'L':-1, 'K':-1, + 'M':-1, 'F':-2, 'P':-1, 'S': 1, 'T': 5, 'W':-2, 'Y':-2, 'V': 0, 'B':-1, 'Z':-1, 'X': 0, '*':-4}, + 'W': {'A':-3, 'R':-3, 'N':-4, 'D':-4, 'C':-2, 'Q':-2, 'E':-3, 'G':-2, 'H':-2, 'I':-3, 'L':-2, 'K':-3, + 'M':-1, 'F': 1, 'P':-4, 'S':-3, 'T':-2, 'W':11, 'Y': 2, 'V':-3, 'B':-4, 'Z':-3, 'X':-2, '*':-4}, + 'Y': {'A':-2, 'R':-2, 'N':-2, 'D':-3, 'C':-2, 'Q':-1, 'E':-2, 'G':-3, 'H': 2, 'I':-1, 'L':-1, 'K':-2, + 'M':-1, 'F': 3, 'P':-3, 'S':-2, 'T':-2, 'W': 2, 'Y': 7, 'V':-1, 'B':-3, 'Z':-2, 'X':-1, '*':-4}, + 'V': {'A': 0, 'R':-3, 'N':-3, 'D':-3, 'C':-1, 'Q':-2, 'E':-2, 'G':-3, 'H':-3, 'I': 3, 'L': 1, 'K':-2, + 'M': 1, 'F':-1, 'P':-2, 'S':-2, 'T': 0, 'W':-3, 'Y':-1, 'V': 4, 'B':-3, 'Z':-2, 'X':-1, '*':-4}, + 'B': {'A':-2, 'R':-1, 'N': 3, 'D': 4, 'C':-3, 'Q': 0, 'E': 1, 'G':-1, 'H': 0, 'I':-3, 'L':-4, 'K': 0, + 'M':-3, 'F':-3, 'P':-2, 'S': 0, 'T':-1, 'W':-4, 'Y':-3, 'V':-3, 'B': 4, 'Z': 1, 'X':-1, '*':-4}, + 'Z': {'A':-1, 'R': 0, 'N': 0, 'D': 1, 'C':-3, 'Q': 3, 'E': 4, 'G':-2, 'H': 0, 'I':-3, 'L':-3, 'K': 1, + 'M':-1, 'F':-3, 'P':-1, 'S': 0, 'T':-1, 'W':-3, 'Y':-2, 'V':-2, 'B': 1, 'Z': 4, 'X':-1, '*':-4}, + 'X': {'A': 0, 'R':-1, 'N':-1, 'D':-1, 'C':-2, 'Q':-1, 'E':-1, 'G':-1, 'H':-1, 'I':-1, 'L':-1, 'K':-1, + 'M':-1, 'F':-1, 'P':-2, 'S': 0, 'T': 0, 'W':-2, 'Y':-1, 'V':-1, 'B':-1, 'Z':-1, 'X':-1, '*':-4}, + '*': {'A':-4, 'R':-4, 'N':-4, 'D':-4, 'C':-4, 'Q':-4, 'E':-4, 'G':-4, 'H':-4, 'I':-4, 'L':-4, 'K':-4, + 'M':-4, 'F':-4, 'P':-4, 'S':-4, 'T':-4, 'W':-4, 'Y':-4, 'V':-4, 'B':-4, 'Z':-4, 'X':-4, '*': 1}} + + +BASE2INT = { + "A": 0, + "C": 1, + "G": 2, + "T": 3 +} + +INT2BASE = ["A", "C", "G", "T"] + + + +#============================================================================= +# Sequence functions +# + +class TranslateError (Exception): + def __init__(self, msg, aa=None, dna=None, a=None, codon=None): + Exception.__init__(self, msg) + self.aa = aa + self.dna = dna + self.a = a + self.codon = codon + + + +def translate(dna, table=CODON_TABLE): + """Translates DNA (with gaps) into amino-acids""" + + aa = [] + + assert len(dna) % 3 == 0, "dna sequence length is not a multiple of 3" + + for i in xrange(0, len(dna), 3): + codon = dna[i:i+3].upper() + if "N" in codon: + aa.append("X") # unkown aa + else: + aa.append(table[codon]) + return "".join(aa) + + +def revtranslate(aa, dna, check=False): + """Reverse translates aminoacids (with gaps) into DNA + + Must supply original ungapped DNA. + """ + + # trim stop codon + #if aa[-1] in "*X" and CODON_TABLE.get(dna[-3:], "") == "*": + # aa = aa[:-3] + # dna = dna[:-3] + + a = len(aa.replace("-", "")) * 3 + b = len(dna.replace("-", "")) + + if a != b: + raise TranslateError( + "sequences have wrong lengths (pep %d != dna %d)" % + (a, b), aa, dna, None, None) + + seq = [] + i = 0 + for a in aa: + if a == "-": + seq.append("---") + else: + codon = dna[i:i+3] + if check and a != CODON_TABLE.get(codon, "X"): + raise TranslateError("bad translate", aa, dna, a, codon) + seq.append(codon) + i += 3 + return "".join(seq) + +_comp = {"A":"T", "C":"G", "G":"C", "T":"A", "N":"N", + "a":"t", "c":"g", "g":"c", "t":"a", "n":"n", + "R":"Y", "Y":"R", "S":"W", "W":"S", "K":"M", "M":"K", + "r":"y", "y":"r", "s":"w", "w":"s", "k":"m", "m":"k", + "B":"V", "V":"B", "D":"H", "H":"D", + "b":"v", "v":"b", "d":"h", "h":"d"} + +def revcomp(seq): + """Reverse complement a sequence""" + + seq2 = [] + for i in xrange(len(seq)-1, -1, -1): + seq2.append(_comp[seq[i]]) + return "".join(seq2) + + +def gcContent(seq): + hist = util.hist_dict(seq) + total = hist["A"] + hist["C"] + hist["T"] + hist["G"] + + return (hist["C"] + hist["G"]) / float(total) + + +#============================================================================= +# Kimura sequence mutation model +# +# TODO: maybe move to phylo module + + +KIMURA_MATRIX = [ + ['r', 's', 'u', 's'], + ['s', 'r', 's', 'u'], + ['u', 's', 'r', 's'], + ['s', 'u', 's', 'r'] +] + + +def evolveKimuraSeq(seq, time, alpha=1, beta=1): + probs = { + 's': .25 * (1 - math.e**(-4 * beta * time)), + 'u': .25 * (1 + math.e**(-4 * beta * time) + - 2*math.e**(-2*(alpha+beta)*time)) + } + probs['r'] = 1 - 2*probs['s'] - probs['u'] + + seq2 = [] + + for base in seq: + cdf = 0 + row = KIMURA_MATRIX[BASE2INT[base]] + pick = random.random() + + for i in range(4): + cdf += probs[row[i]] + if cdf >= pick: + seq2.append(INT2BASE[i]) + break + + assert len(seq2) == len(seq), "probabilities do not add to one" + + return "".join(seq2) + + +def evolveKimuraBase(base, time, alpha, beta): + probs = { + 's': .25 * (1 - math.e**(-4 * beta * time)), + 'u': .25 * (1 + math.e**(-4 * beta * time) + - 2*math.e**(-2*(alpha+beta)*time)) + } + probs['r'] = 1 - 2*probs['s'] - probs['u'] + + cdf = 0 + row = KIMURA_MATRIX[BASE2INT[base]] + pick = random.random() + + for i in range(4): + cdf += probs[row[i]] + if cdf >= pick: + return INT2BASE[i] + + assert False, "probabilities do not add to one" + diff --git a/arghmm/deps/compbio/seqlib.pyc b/arghmm/deps/compbio/seqlib.pyc new file mode 100644 index 0000000000000000000000000000000000000000..82f5001a59dcc36202f58c709ee5065ba18d023e GIT binary patch literal 17163 zcmc(mYj|8odB#p{=das-8b(yfoMSX6XLcQ*`bGMv(cCz2i=3Ut5!hSar4!Cd~V$g*{i1jYq zfIzp=>o$4aX0O}gGq-xN&Fi*%-436*(~Dh*-7ehYb$bzax$thpJ{Rss9B|=1h@1-# zdfmNVcgXAR^Sb-J?g6iR(Cdc1?jf%`>~*j4x`(~)2;!&s|N; zuN(2YH+tQhyzb42M_u?9#0eL^74eu0A4fdl!o1g=^tw^68}qtTh|?}CAf9yLID*l> z4e^u<&v@NguPb`pIj=j9pue|!UEsxp*Od?#T==xlyy(RKeHo0iM z+Q=-7W-qGlW~XqATe@gvlNQFUg{^LKn~S!)@Hx>c+To&Y?)}!z%^ z@6kL=?su~wd9Pz(jf5{c*f*CKwx;SNj;AgKNr z;{A^GY?*yt?DM+2yg1-9FZk3^pIYxzKZ*D$2hm5upLXzeWb%NU-J?I|*^uJTg!BU* zDMCmuIo8yOoSq;e1ei=ArHGJ{DWnJ?MF=TENGT$uq=fXHS{GJbNI&RUT2@3zX;Vn4 z@F4^he#U2h81b_nH|7*vbTaJEdAQV*VH3EtDY$ebxab5Io#2uZT#5)TnSxth1s9#* zq7z&?5?n+#TuKTqI>DuB!6lV~i!lY4#srrlvhL4&xB!%O6S(M7a7hU+MN)7nDY)ol zUD}j&SHoRxU2K-JZUXo6_kfEPQ*h})aA{N41$8%EOegCyXu(CK;L>}F4D_CYOG#Om zp`@&vz@?AKaIsB_4D{E%)aXxSKqt5e!NriWE-6`;iIa8dR@Nm`a2d4V(yic=w1;nI0YBfDKe1xYH(>Y1()$@snM;h z%TT(lOYoOF1ZZeQ5xCoJf3W7_UvMxe!(WSUcrr=Ui)@3!yy7X`W0ZzfCND4011s5UfB2sXv zE4XB);F1ztdPu<~^@`R-C%A}|bxFy(tKouEaN)J!qD#T0Takf!DeJC=OC`Z&n1YLt zb(tfrzi}>0NNCF1WNQGSGwI0z%fMq^wJ3$}=#Hsij6e!9}FtQbE?Gy5Lela4C`^ z1KrBHbkuEK6oN~2k%7hp7a_P*NWodBE(&Utcws_MCvs`O4db)3<$wR2rfd_MF=iJ za1nxw0HaL)D1vA59yi71M% z(otIYtL`=@ZTGq8ei#0l#UF692UOE+LL2JATC)>Q0q6Wt7W#D;{s!WsF8mncV~*Ge zIpmLozv=k1b4!_&b3_gCf*(FdPa5}bZ>Yz-p+4@yPmuWu7k(V^TQ2-0;**ZpNch_> z{1oC-j+h6`{U9+D=?_j@Y~cK5+xN`uY++#Hi)Z=Y}6UKBAaAKMK&EY za&fe<7&WRgdJUT(V8oXbsD;&$ahH9iqVK6Ui8@?>ahPjKSJscO9v)9q}fovY2w z*Q2?p(T;Pc^An|f?q&KpYIUocLp_?QQJ(lNf;Ll}aW8OO%xUwCQ><)P7ZWz;#o-Lrj>+Utmv%e6+WT`sr#NHi*QQM^KrII2$%lkF^sA0u{buF*a=Rhv0hX;o)y*P>(d zt>%@e+K$PXnVp+k79plQR&CDBPt}^ouuQ!+b#(qlp#Hb<#KPb*8?#m6*P{$+<2Mi_ zzJp8Ya-aU{t-*@0t@X^Fohn?r<4D^UN?x?oKid zh}_P)7vMq0DPq<3lDN|6n!8aNC49C@^>EDsBO3S7B&(s%)%vY1wbNZR#PykVit$De zg^Tm`TD8)S@C7%9;&3GlbBK5o0ZS}1klE3*KXZ3>OQtIIs%a>Zfz`10 zm8@%S$41NUCE|T;%jdQdf zMD4{E3rwC4LnV%N#aR~3{6^Jcvk_UOePcdK*-j)kyEmfiX2`X-*-X?vLNDHCJsUII zvjeUl+wIDB+<|O2sxmtS$J_zF7BgWy@i7_mWwhp@_4XOpTcj5A2C&y_j%Y>BrZ&@% zmw(u6rxzRcipq8BU9Z*a_)Z9wqlWkFNA!ckcUU}bbj9Xh9$w?S& zkUUB}R_CtY2qnXaRA!tv5lPndfGub(LjP96Sg<9gkhJBhPQxrCd7hm-At*Z!KM$perw(Epxb;g#p&|fV+t2?aY|m za%#XDt2w@v>9V0O&WAYCl}WP|99G+FZ-U?e5YF(#Lj*3PrMo4w$Gba?Ul48lZz1B@ z2`9UT{C^;5TRr36!f3j(SZ}Y4+D3Z^sG6fcy_*uMZV^?reDrjvsx4u%3xtohn_{-L z3RCkZ1Nr$=J$px0>uif`Q?vH2BsfSf7kQE`;NZ*P)~k5M!^*XK&tL#GncDIg<~s#a zi|RRnRG04ifyioX-ARbHH&iFu?v-BxRdd=|^be7M1-&=z)eiA*>H8{MZTVXkPGIl0 zg{|&~S1c`i2lo-IFxlSB<#)X9(j8d9BI*=R+T60d9*rgDOIpi&{*3ng8jHPrligbt zy>VZqAK{4werdn~41U@lR(KHoA+cRj@K(i|GaM5^k-<_DtN>+kRRIla~z6 z%D=@lSpRx^K)v&%Ago32@=;=p-zr3;k~O%w$9%{4XK{_mEPj~C(xJ4)cgo^;943Kq z?{wDvVC){qpmmM+|J~+;^eUV4%*9s)LH2T)#N;BI%s$FvpFm#8?FEM@amW1$hXG(M zmlp&_xc;}UFVKGUj>6Q3vT~ViC`T4V6O_ead5GjdL!Up{TZ|y5;GhD%AIv`na$UiL3bbMaIom)h z!LMH;Yj=1evZ!P8ZT)@y*}?w7bsIKr*f=;e*f-cis?TJP#rp?)c;!2H+BROLQpr+h zEH#>>#x0dkQcqi|KuY#0l}3}YUgnc@A(5q0KGA!bPtt`%mP&<0U$CCfS}LF97LYXA`6Ku6%q?3GM~r-QaxVuxC#lb zMCKD&K&sS{SO|_BInvpkDm}^*>+>5+m|~pr`F$x{)8vFrrMxpC;y+6*HfkK$`nR$c z^U4ZagO_6fm`7U%0^tW5fzWX92oNQi5)-3d6uc;zfL_wULK9vGXO+TmCcF*< zS<35JG)l3yOn6odot%DhxC)pU zUPtw!`Q@j_&zwDdadd_Z{XAir`DdRjO`IG1=Gdt=DcOkROV~{#idQ{KMAbBNed?}e5Upv(rK&DF(-j-H)Pk^ zfC9ZO+Ir8GT9rmz=kxd}9XeLYu^&}D$rI}x$A(bXJn28@Zga8JqiYpSdR2Ys(Rwfj z=(vY9>@OX(Bj;?upDA$f;Li%nob>dHCS}4ti*RG4b62ge;;`Cl3-_s|YBaB_8vlC! zsPffO*r?cGEB>=-Gi)}jlI@kf?c$k0#e**>&?*kTM6jkbFi?*GIXPt5k{Pt$hxg+wuKPxmF_LJ7EbDOG<#eaG#mZ?fJbwv$L8ZBL|7M`?D&l&4wVVr#k%BtcYesu zcUhe8Vw#qw<;~EfOLBZZcIk{A^s=<=xR?6758?rD(JXf_Ok%Gs<*hc;x{m3yBZY-0 zmNRt1(rag;88R<&{|7^1g?V@JGM2SEl#Fv@#e8n~daXU1gYNM|T*$PWxyoFv(L7SA z*238PU+TD6TPq0BIX_O0Ms>2<+L|n7CiYF1V2N zYxjg^UT((q^k)xd_rieP98zx0a={|PY2E8FCTyp$pIH{C7B%*_DPa0{aF8z@Z{F29 zCRmfs1alkn0BnIE%UOJS+r=uT61TSe-Rg7i%Gd>g2*;A{b2t0l(g(D{$1c5?To%x? zZWcrqu3{idzbDJ{WIz+{X1F!(fnhfXM3Lqj$OoLiw&5!Q{PPXm1W}TW>O<t@I3tE?2^Wfr4z&YR1i-S1kp7v2_xP2dh>ENFUlF?y6FfVY*nq(EQ7A_Hd)nZ(y81GKkvKl4?w2o~bF-Ce5%Q(cmM(-(ewO|kaK#i`sgKcl1 z*;aI|GaqL}_xs3n>-S~$WH)Ad_|0YeGyQhLE8=g>G(RoA?1btT_cqDM!yFf zzfras7R<702R5xj9(c{P4Xt$t(yw3l28tw$-hx{PM95v!P{sxm{;(m zf<*#Ft~|pm+gxWveI=WxLMnd z?e_I{P?4{P>2*%yyqoR4zIVqSMF!knlk40G@7R}~_b$C9|BmCwQ)7K`x*tN{Tqo1v z*>ABX_@~HmId8ts!@Q1NdW*X`BtK2NFW|X{DE~$|dj1CO0)Ll(FZ4S|B(;{u`Gv2r zM!w=MF1*NxOsbjZR>YRs@+YZkaQ%G!ald4=Ne=#$4F73P z`|04%Nd|wSJRbkyF208t-)EVsOx3tet3`1xZ2D~lSL$p@xS#ebPm^SXnX9zDqlnt- z_^I);7lM5GY`zqXKVzp+C-W1hj*s(cOIr`#G+}ej2xj{Xzq` z#(pi54*UO=#1D}DqB<*R*?mKGXxgFrfLl6|rutFGN`Qo-I?HXHQJo}z14i{hHZWc> zky-d8%pUL?@^u{^>obvFf_IR4sP;-Rnck4pZX zf^RAKdjKBB>{YB{0zRAopnTvPqc}GuAf@P>@+qzvt8@6o7J&-w+In3|A%stt` Q!NE 1: + lookup[node] = basenode = basetree.new_node(node.name) + for basechild in basechildren: + basetree.add_child(basenode, basechild) + else: + lookup[node] = basechildren[0] + basetree.root = lookup[nodes[-1]] + + # assign layout based on basetree layout + # layout leaves + return dict((arg[name], i) for i, name in enumerate(basetree.leaf_names())) + + +def layout_arg(arg, leaves=None, yfunc=lambda x: x): + """Layout the nodes of an ARG""" + + layout = {} + + # layout leaves + if leaves is None: + leafx = layout_arg_leaves(arg) + else: + leafx = util.list2lookup(leaves) + + for node in arg.postorder(): + if node.is_leaf(): + layout[node] = [leafx[node], yfunc(node.age)] + else: + layout[node] = [ + stats.mean(layout[child][0] for child in node.children), + yfunc(node.age)] + + return layout + + +def map_layout(layout, xfunc=lambda x: x, yfunc=lambda x: x): + + for node, (x, y) in layout.items(): + layout[node] = [xfunc(x), yfunc(y)] + + return layout + + + + + +def get_branch_layout(layout, node, parent, side=0, recomb_width=.4): + """Layout the branches of an ARG""" + + nx, ny = layout[node] + px, py = layout[parent] + + if node.event == "recomb": + if len(node.parents) == 2 and node.parents[0] == node.parents[1]: + step = recomb_width * [-1, 1][side] + else: + step = recomb_width * [-1, 1][node.parents.index(parent)] + return [nx+step, ny, nx+step, py] + else: + return [nx, ny, nx, py] + + + +def show_arg(arg, layout=None, leaves=None, mut=None, recomb_width=.4, + win=None): + """Visualize an ARG""" + + if win is None: + win = summon.Window() + else: + win.clear_groups() + + # ensure layout + if layout is None: + layout = layout_arg(arg, leaves) + + # callbacks + def branch_click(node, parent): + print node.name, parent.name + + # draw ARG + win.add_group(draw_arg(arg, layout, recomb_width=recomb_width, + branch_click=branch_click)) + + # draw mutations + if mut: + g = group() + for node, parent, pos, t in mut: + x1, y1, x2, y2 = get_branch_layout(layout, node, parent) + g.append(group(draw_mark(x1, t, col=(0,0,1)), color(1,1,1))) + win.add_group(g) + return win + + +def draw_arg(arg, layout, recomb_width=.4, branch_click=None): + + def branch_hotspot(node, parent, x, y, y2): + def func(): + branch_click(node, parent) + return hotspot("click", x-.5, y, x+.5, y2, func) + + # draw branches + g = group(color(1,1,1)) + for node in layout: + if not node.is_leaf(): + x, y = layout[node] + for i, child in enumerate(node.children): + cx, cy = layout[child] + x1, y1, x2, y2 = get_branch_layout( + layout, child, node, i, recomb_width=recomb_width) + g.append(line_strip(x, y, x2, y2, x1, y1, cx, cy)) + if branch_click: + g.append(branch_hotspot(child, node, x1, y1, y2)) + + # draw recomb + for node in layout: + if node.event == "recomb": + x, y = layout[node] + g.append(draw_mark(x, y, col=(1, 0, 0))) + + return g + + + +def show_marginal_trees(arg, mut=None): + + win = summon.Window() + x = 0 + step = 2 + treewidth = len(list(arg.leaves())) + step + + def trans_camera(win, x, y): + v = win.get_visible() + win.set_visible(v[0]+x, v[1]+y, v[2]+x, v[3]+y, "exact") + + win.set_binding(input_key("]"), lambda : trans_camera(win, treewidth, 0)) + win.set_binding(input_key("["), lambda : trans_camera(win, -treewidth, 0)) + + blocks = arglib.iter_recomb_blocks(arg) + + for tree, block in izip(arglib.iter_marginal_trees(arg), blocks): + pos = block[0] + print pos + + leaves = sorted((x for x in tree.leaves()), key=lambda x: x.name) + layout = layout_arg(tree, leaves) + win.add_group( + translate(x, 0, color(1,1,1), + draw_tree(tree, layout), + text_clip( + "%d-%d" % (block[0], block[1]), + treewidth*.05, 0, + treewidth*.95, -max(l[1] for l in layout.values()), + 4, 20, + "center", "top"))) + + # mark responsible recomb node + for node in tree: + if pos != 0.0 and node.pos == pos: + nx, ny = layout[node] + win.add_group(draw_mark(x + nx, ny)) + + # draw mut + if mut: + for node, parent, mpos, t in mut: + if (node.name in tree and node.name != tree.root.name and + block[0] < mpos < block[1]): + nx, ny = layout[tree[node.name]] + win.add_group(draw_mark(x + nx, t, col=(0,0,1))) + if node.name in tree and tree[node.name].parents: + nx, ny = layout[tree[node.name]] + py = layout[tree[node.name].parents[0]][1] + start = arg[node.name].data["ancestral"][0][0] + win.add_group(lines(color(0,1,0), + x+nx, ny, x+nx, py, + color(1,1,1))) + + + x += treewidth + + win.set_visible(* win.get_root().get_bounding() + ("exact",)) + + return win + + +def show_tree_track(tree_track, mut=None, show_labels=False, + use_blocks=False, branch_click=None): + """ + tree_track = [((start, end), tree), ...] + """ + + def draw_labels(tree, layout): + return group(* + [text_clip(leaf.name, layout[leaf][0], layout[leaf][1], + 1, layout[leaf][1] + 1e4, 4, 20, "middle", "left") + for leaf in tree.leaves()]) + + def branch_hotspot(node, parent, x, y, y2): + def func(): + branch_click(node, parent) + return hotspot("click", x-.5, y, x+.5, y2, func) + + def print_branch(node, parent): + print "node", node.name + + + tree_track = iter(tree_track) + if mut: + mut = util.PushIter(mut) + block, tree = tree_track.next() + if branch_click is True: + branch_click = print_branch + + win = summon.Window() + treex = 0 + step = 2 + treewidth = len(list(tree.leaves())) + step + + def trans_camera(win, x, y): + v = win.get_visible() + win.set_visible(v[0]+x, v[1]+y, v[2]+x, v[3]+y, "exact") + + win.set_binding(input_key("]"), lambda : trans_camera(win, treewidth, 0)) + win.set_binding(input_key("["), lambda : trans_camera(win, -treewidth, 0)) + + for block, tree in chain([(block, tree)], tree_track): + pos = block[0] + print pos + + layout = treelib.layout_tree(tree, xscale=1, yscale=1) + treelib.layout_tree_vertical(layout, leaves=0) + g = win.add_group( + translate(treex, 0, color(1,1,1), + sumtree.draw_tree(tree, layout, + vertical=True), + (draw_labels(tree, layout) if show_labels else group()), + text_clip( + "%d-%d" % (block[0], block[1]), + treewidth*.05, 0, + treewidth*.95, -max(l[1] for l in layout.values()), + 4, 20, + "center", "top"))) + + + clicking = group() + g.append(clicking) + + # hotspots + if branch_click: + for node in tree: + if node.parent: + x, y = layout[node] + x2, y2 = layout[node.parent] + clicking.append(branch_hotspot(node, node.parent, x, y, y2)) + #win.add_group(clicking) + + + # draw mut + if mut: + for mpos, age, chroms in mut: + if block[0] < mpos < block[1]: + node = arglib.split_to_tree_branch(tree, chroms) + parent = node.parent + if node and parent: + t = random.uniform(layout[node][1], layout[parent][1]) + nx, ny = layout[node] + win.add_group(draw_mark(treex + nx, t, col=(0,0,1))) + elif mpos > block[1]: + mut.push((mpos, age, chroms)) + break + + + treex += treewidth + + #win.set_visible(* win.get_root().get_bounding() + ("exact",)) + win.home("exact") + + return win + + + +def show_coal_track(tree_track): + + win = summon.Window() + + bgcolor = (1, 1, 1, .1) + cmap = util.rainbow_color_map(low=0.0, high=1.0) + + maxage = 0 + for (start, end), tree in tree_track: + print start + l = [] + times = treelib.get_tree_timestamps(tree) + nleaves = len(tree.leaves()) + maxage2 = 0 + for node in tree: + if len(node.children) > 1: + age = times[node] + freq = len(node.leaves()) / float(nleaves) + l.extend([color(*cmap.get(freq)), start, age, end, age]) + if age > maxage2: + maxage2 = age + win.add_group(group(lines(*l), color(*bgcolor), + box(start, 0, end, maxage2, fill=True))) + if maxage2 > maxage: + maxage = maxage2 + + # hotspot + def func(): + x, y = win.get_mouse_pos() + print "pos=%s age=%f" % (util.int2pretty(int(x)), y) + win.add_group(hotspot("click", 0, 0, end, maxage, + func)) + + win.home("exact") + + + return win + + + +def show_smc(smc, mut=None, show_labels=False, branch_click=None, + use_names=False): + """ + """ + + def draw_labels(tree, layout): + return group(* + [text_clip(names[leaf.name], + layout[leaf][0] - .4, layout[leaf][1], + layout[leaf][0] + .4, layout[leaf][1] - 1e4, + 4, 20, "top", "center") + for leaf in tree.leaves()]) + + def branch_hotspot(node, parent, x, y, y2): + def func(): + branch_click(node, parent) + return hotspot("click", x-.5, y, x+.5, y2, func) + + def print_branch(node, parent): + print "node", node.name + + def trans_camera(win, x, y): + v = win.get_visible() + win.set_visible(v[0]+x, v[1]+y, v[2]+x, v[3]+y, "exact") + + def on_scroll_window(win): + region = win.get_visible() + print region + + def on_resize_window(win): + region = win.get_visible() + print region + + + + branch_color = (1, 1, 1) + spr_color = (1, 0, 0, .5) + recomb_color = (1, 0, 0) + + + # create window + win = summon.Window() + win.set_binding(input_key("]"), lambda : trans_camera(win, treewidth, 0)) + win.set_binding(input_key("["), lambda : trans_camera(win, -treewidth, 0)) + win.add_view_change_listener(lambda : on_scroll_window(win)) + #win.remove_resize_listener(lambda : on_resize_window(win)) + + treex = 0 + step = 2 + + names = [] + seq_range = [0, 0] + treewidth = 10 + tree = None + layout = None + + for item in smc: + if item["tag"] == "NAMES": + names = item["names"] + if not use_names: + names = map(str, range(len(names))) + + treewidth = len(names) + + elif item["tag"] == "RANGE": + seq_range = [item["start"], item["end"]] + + elif item["tag"] == "TREE": + tree = item["tree"] + + layout = treelib.layout_tree(tree, xscale=1, yscale=1) + treelib.layout_tree_vertical(layout, leaves=0) + #map_layout(layout, yfunc=minlog) + + region_text = text_clip("%d-%d" % (item["start"], item["end"]), + treewidth*.05, 0, + treewidth*.95, -max(l[1] for l in layout.values()), + 4, 20, + "center", "top") + + g = win.add_group( + translate(treex, 0, color(1,1,1), + sumtree.draw_tree(tree, layout, + vertical=True), + (draw_labels(tree, layout) + if show_labels else group()), + zoom_clamp(translate(0, -20, region_text), + axis=(treewidth, 0), + miny=1.0, maxy=1.0) + )) + + clicking = group() + g.append(clicking) + + elif item["tag"] == "SPR": + + rx, ry = layout[tree[item["recomb_node"]]] + ry = item["recomb_time"] + cx, cy = layout[tree[item["coal_node"]]] + cy = item["coal_time"] + + g.append( + group( + lines(color(*spr_color), rx, ry, cx, cy), + mark_tree(tree, layout, + item["recomb_node"], time=item["recomb_time"], + col=recomb_color))) + + treex += treewidth + step + + + ''' + tree_track = iter(tree_track) + if mut: + mut = util.PushIter(mut) + block, tree = tree_track.next() + if branch_click is True: + branch_click = print_branch + + win = summon.Window() + treex = 0 + step = 2 + treewidth = len(list(tree.leaves())) + step + + def trans_camera(win, x, y): + v = win.get_visible() + win.set_visible(v[0]+x, v[1]+y, v[2]+x, v[3]+y, "exact") + + win.set_binding(input_key("]"), lambda : trans_camera(win, treewidth, 0)) + win.set_binding(input_key("["), lambda : trans_camera(win, -treewidth, 0)) + + for block, tree in chain([(block, tree)], tree_track): + pos = block[0] + print pos + + layout = treelib.layout_tree(tree, xscale=1, yscale=1) + treelib.layout_tree_vertical(layout, leaves=0) + g = win.add_group( + translate(treex, 0, color(1,1,1), + sumtree.draw_tree(tree, layout, + vertical=True), + (draw_labels(tree, layout) if show_labels else group()), + text_clip( + "%d-%d" % (block[0], block[1]), + treewidth*.05, 0, + treewidth*.95, -max(l[1] for l in layout.values()), + 4, 20, + "center", "top"))) + + + clicking = group() + g.append(clicking) + + # hotspots + if branch_click: + for node in tree: + if node.parent: + x, y = layout[node] + x2, y2 = layout[node.parent] + clicking.append(branch_hotspot(node, node.parent, x, y, y2)) + #win.add_group(clicking) + + + # draw mut + if mut: + for mpos, age, chroms in mut: + if block[0] < mpos < block[1]: + node = arglib.split_to_tree_branch(tree, chroms) + parent = node.parent + if node and parent: + t = random.uniform(layout[node][1], layout[parent][1]) + nx, ny = layout[node] + win.add_group(draw_mark(treex + nx, t, col=(0,0,1))) + elif mpos > block[1]: + mut.push((mpos, age, chroms)) + break + + + treex += treewidth + ''' + + win.home("exact") + + + return win + + + +def show_coal_track3(tree_track): + + win = summon.Window() + + + bgcolor = (1, 1, 1, .1) + cmap = util.rainbow_color_map(low=0.5, high=1.0) + + maxage = 0 + for (start, end), tree in tree_track: + print start + l = [] + times = treelib.get_tree_timestamps(tree) + nleaves = len(tree.leaves()) + maxage2 = 0 + for node in tree: + if len(node.children) > 1: + age = times[node] + sizes = [len(x.leaves()) for x in node.children] + bias = max(sizes) / float(sum(sizes)) + l.extend([color(*cmap.get(bias)), start, age, end, age]) + if age > maxage2: + maxage2 = age + win.add_group(group(lines(*l), color(*bgcolor), + box(start, 0, end, maxage2, fill=True))) + if maxage2 > maxage: + maxage = maxage2 + + def func(): + x, y = win.get_mouse_pos() + print "pos=%s age=%f" % (util.int2pretty(int(x)), y) + win.add_group(hotspot("click", 0, 0, end, maxage, + func)) + + win.home("exact") + + + return win + + +def show_coal_track2(tree_track): + + win = summon.Window() + + + bgcolor = (1, 1, 1, .1) + cmap = util.rainbow_color_map(low=0.0, high=1.0) + tracks = {} + + maxage = 0 + for (start, end), tree in tree_track: + print start + l = [] + times = treelib.get_tree_timestamps(tree) + nleaves = len(tree.leaves()) + maxage2 = 0 + for node in tree: + if len(node.children) > 1: + age = times[node] + freq = len(node.leaves()) / float(nleaves) + #sizes = [len(x.leaves()) for x in node.children] + #m = max(sizes) + #n = sum(sizes) + #pval = 2 * (n - m) / float(n - 1) + l.extend([color(*cmap.get(freq)), start, age, end, age]) + if age > maxage2: + maxage2 = age + win.add_group(group(lines(*l), color(*bgcolor), + box(start, 0, end, maxage2, fill=True))) + if maxage2 > maxage: + maxage = maxage2 + + def func(): + x, y = win.get_mouse_pos() + print "pos=%s age=%f" % (util.int2pretty(int(x)), y) + win.add_group(hotspot("click", 0, 0, end, maxage, + func)) + + win.home("exact") + + + return win + + +def show_coal_track2(tree_track): + + win = summon.Window() + + + bgcolor = (1, 1, 1, .1) + cmap = util.rainbow_color_map(low=0.0, high=1.0) + + maxage = 0 + for (start, end), tree in tree_track: + print start + l = [] + times = treelib.get_tree_timestamps(tree) + nleaves = len(tree.leaves()) + maxage2 = 0 + for node in tree: + if len(node.children) > 1: + age = times[node] + sizes = [len(x.leaves()) for x in node.children] + m = max(sizes) + n = sum(sizes) + pval = 2 * (n - m) / float(n - 1) + freq = len(node.leaves()) / float(nleaves) + l.extend([color(*cmap.get(freq)), start, age, end, age]) + if age > maxage2: + maxage2 = age + win.add_group(group(lines(*l), color(*bgcolor), + box(start, 0, end, maxage2, fill=True))) + if maxage2 > maxage: + maxage = maxage2 + + def func(): + x, y = win.get_mouse_pos() + print "pos=%s age=%f" % (util.int2pretty(int(x)), y) + win.add_group(hotspot("click", 0, 0, end, maxage, + func)) + + win.home("exact") + + + return win + + + +def draw_tree(tree, layout, orient="vertical"): + + vis = group() + bends = {} + + for node in tree.postorder(): + # get node coordinates + nx, ny = layout[node] + px, py = layout[node.parents[0]] if node.parents else (nx, ny) + + # determine bend point + if orient == "vertical": + bends[node] = (nx, py) + else: + bends[node] = (px, ny) + + # draw branch + vis.append(lines(nx, ny, bends[node][0], bends[node][1])) + + # draw cross bar + if len(node.children) > 0: + a = bends[node.children[-1]] + b = bends[node.children[0]] + vis.append(lines(a[0], a[1], b[0], b[1])) + + return vis + + +def draw_mark(x, y, col=(1,0,0), size=.5, func=None): + """Draw a mark at (x, y)""" + + if func: + h = hotspot("click", x-size, y-size, x+size, y+size, func) + else: + h = group() + + return zoom_clamp( + color(*col), + box(x-size, y-size, x+size, y+size, fill=True), + h, + color(1,1,1), + origin=(x, y), + minx=10.0, miny=10.0, maxx=20.0, maxy=20.0, + link=True) + + + +def mark_tree(tree, layout, name, y=None, time=None, + col=(1, 0, 0), yfunc=lambda y: y, size=.5): + nx, ny = layout[tree[name]] + if y is not None: + y += ny + else: + y = time + return draw_mark(nx, yfunc(y), col=col, size=size) + + +def draw_branch_mark(arg, layout, node=None, parent=None, pos=None, + chroms=None, age=None, col=(0,0,1)): + """Draw a mark on a branch of an ARG""" + + if node is None: + node = arglib.split_to_arg_branch(arg, pos, chroms) + if parent is None: + assert pos is not None + parent = arg.get_local_parent(node, pos) + + if node and parent: + if age is None: + t = random.uniform(layout[node][1], layout[parent][1]) + else: + t = layout[node][1] + (age - node.age) + nx, ny = layout[node] + return draw_mark(nx, t, col=col) + else: + return group() + + + +def draw_branch(arg, layout, node=None, parent=None, chroms=None, + pos=None, col=None): + """Draw a mark on a branch of an ARG""" + + + if node is None: + node = arglib.split_to_arg_branch(arg, pos, chroms) + if parent is None: + assert pos is not None + parent = arg.get_local_parent(node, pos) + + if node and parent: + x1, y1, x2, y2 = get_branch_layout(layout, node, parent) + if col is None: + return lines(x1, y1, x2, y2) + else: + return lines(color(*col), x1, y1, x2, y2) + else: + return group() + + + + +#============================================================================= +# haplotype visualization + + +def inorder_tree(tree): + queue = [("queue", tree.root)] + + while queue: + cmd, node = queue.pop() + + if cmd == "visit": + yield node + elif cmd == "queue": + if node.is_leaf(): + yield node + else: + queue.extend( + [("queue", node.children[1]), + ("visit", node), + ("queue", node.children[0])]) + + +def layout_tree_leaves_even(tree): + layout = {} + y = 0 + + for node in inorder_tree(tree): + if node.is_leaf(): + layout[node.name] = y + else: + y += 1 + + return layout + + +def layout_tree_leaves(tree): + layout = {} + y = 0 + + for node in inorder_tree(tree): + if node.is_leaf(): + layout[node.name] = y + else: + #y += 1 + y += (node.age / 1e3) + 1 + #y += exp(node.age / 5e2) + 1 + #y += log(node.age + 1) ** 3 + + vals = layout.values() + mid = (max(vals) + min(vals)) / 2.0 + for k, v in layout.items(): + layout[k] = (v - mid) + + return layout + + +def layout_chroms(arg, start=None, end=None): + + if start is None: + start = arg.start + if end is None: + end = arg.end + + tree = arg.get_marginal_tree(start) + arglib.remove_single_lineages(tree) + last_pos = start + blocks = [] + leaf_layout = [] + + layout_func = layout_tree_leaves + #layout_func = layout_tree_leaves_even + + for spr in arglib.iter_arg_sprs(arg, start=start, end=end, use_leaves=True): + print "layout", spr[0] + blocks.append([last_pos, spr[0]]) + leaf_layout.append(layout_func(tree)) + inorder = dict((n, i) for i, n in enumerate(inorder_tree(tree))) + + # determine SPR nodes + rnode = arglib.arg_lca(tree, spr[1][0], spr[0]) + cnode = arglib.arg_lca(tree, spr[2][0], spr[0]) + + # determine best side for adding new sister + left = (inorder[rnode] < inorder[cnode]) + + # apply spr + arglib.apply_spr(tree, rnode, spr[1][1], cnode, spr[2][1], spr[0]) + + # adjust sister + rindex = rnode.parents[0].children.index(rnode) + if left and rindex != 0: + rnode.parents[0].children.reverse() + + last_pos = spr[0] + + blocks.append([last_pos, end]) + leaf_layout.append(layout_func(tree)) + + return blocks, leaf_layout + + +def layout_tree_block(tree, names): + layout = {} + + def walk(node): + if node.is_leaf(): + x = names.index(node.name) + layout[node.name] = (x, x-.25, x+.25, node.age) + return x-.25, x+.25 + else: + assert len(node.children) == 2 + low1, high1 = walk(node.children[0]) + low2, high2 = walk(node.children[1]) + x = (min(high1, high2) + max(low1, low2)) / 2.0 + low = min(low1, low2) + high = max(high1, high2) + layout[node.name] = (x, low, high, node.age) + return low, high + walk(tree.root) + return layout + + +def mouse_click(win): + print win.get_mouse_pos("world") + +def chrom_click(win, chrom, block): + def func(): + if win: + print chrom, block, win.get_mouse_pos("world")[0] + return func + + +def draw_arg_threads(arg, blocks, layout, sites=None, + chrom_colors=None, chrom_color=[.2,.2,.8,.8], + snp_colors={"compat": [1, 0, 0], + "noncompat": [0, 1, 0]}, + spr_alpha=1, + spr_trim=10, + compat=False, + draw_group=None, + win=None): + + leaf_names = list(arg.leaf_names()) + + # TEST: + rnodes = dict((r.pos, r) for r in arg if r.event == "recomb") + + if draw_group is None: + draw_group = group() + + # set chromosome color + if chrom_colors is None: + chrom_colors = {} + for name in leaf_names: + chrom_colors[name] = chrom_color + + spr_colors = {} + for name in leaf_names: + spr_colors[name] = list(chrom_colors[name]) + if len(spr_colors[name]) < 4: + spr_colors[name].append(1.0) + spr_colors[name][3] *= spr_alpha + + + trims = [] + + for k, (x1, x2) in enumerate(blocks): + # calc trims + length = x2 - x1 + minlen = 0 + spr_trim2 = min(spr_trim, (length - minlen) / 2.0) + trims.append((x1 + spr_trim2, x2 - spr_trim2)) + trim = trims[-1] + + # horizontal lines + l = [] + for name in leaf_names: + c = chrom_colors[name] + y = layout[k][name] + l.extend([color(*c), trim[0], y, trim[1], y]) + draw_group.append(lines(*l)) + + # SPRs + if k > 0: + l = [] + + # TEST: + #rnode = rnodes.get(x1, None) + #young = (rnode is not None and rnode.age < 500) + + for name in leaf_names: + #c = [1,0,0] if young else spr_colors[name] + c = spr_colors[name] + y1 = layout[k-1][name] + y2 = layout[k][name] + l.extend([color(*c), trims[k-1][1], y1, trims[k][0], y2]) + + draw_group.append(lines(*l)) + + # hotspots + g = group() + for name in leaf_names: + y = layout[k][name] + g.append(hotspot("click", x1+spr_trim, y+.4, x2-spr_trim, y-.4, + chrom_click(win, name, (x1, x2)))) + draw_group.append(g) + + # SNPs + tree = None + if sites: + l = [] + for pos, col in sites.iter_region(x1, x2): + split = sites.get_minor(pos) + if compat: + if tree is None: + tree = arg.get_marginal_tree((x1+x2)/2.0) + arglib.remove_single_lineages(tree) + node = arglib.split_to_arg_branch(tree, pos-.5, split) + if node is not None: + derived = list(tree.leaf_names(node)) + c = color(*snp_colors["compat"]) + else: + c = color(*snp_colors["noncompat"]) + derived = split + else: + c = color(*snp_colors["compat"]) + derived = split + + for d in derived: + if d in layout[k]: + y = layout[k][d] + l.extend([c, pos, y+.4, pos, y-.4]) + draw_group.append(lines(*l)) + + return draw_group + diff --git a/arghmm/deps/rasmus/__init__.py b/arghmm/deps/rasmus/__init__.py new file mode 100644 index 00000000..3f6ce54b --- /dev/null +++ b/arghmm/deps/rasmus/__init__.py @@ -0,0 +1,2 @@ +# a directory must contain a __init__.py in order to be a python package + diff --git a/arghmm/deps/rasmus/__init__.pyc b/arghmm/deps/rasmus/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b88e63306c7c6e8f77b83b8539fd9e10bed1eb01 GIT binary patch literal 150 zcmcckiI>YF;dx*(0~9a~wih%;@8M(PIDkZg`SidN-IJdM|KR!M)FS8^*Uaz3Cgac@XO>TZl OX-=vg$fjZ-W&i;2^dm3; literal 0 HcmV?d00001 diff --git a/arghmm/deps/rasmus/hmm.py b/arghmm/deps/rasmus/hmm.py new file mode 100644 index 00000000..22ce8ecd --- /dev/null +++ b/arghmm/deps/rasmus/hmm.py @@ -0,0 +1,351 @@ +""" + + Light weight generic HMM algorithms + + + Methods need the following user-defined functions: + + get_num_states(pos) + prob_prior(pos, state) + prob_emission(pos, state) + prob_transition(pos1, state1, pos2, state2) + + + probs = [[j for j in get_num_states(i)] + for i in xrange(npositions)] + + +""" + +import random +from math import log, exp + +from rasmus import util, stats +from stats import logadd + + +class HMM (object): + """ + Base class for defining an Hidden Markov Model (HMM) + """ + + def __init__(self): + pass + + def set_callbacks(self, get_num_states=None, + prob_prior=None, + prob_emission=None, + prob_transition=None, + emit=None): + if get_num_states: + self.get_num_states = get_num_states + if prob_prior: + self.prob_prior = prob_prior + if prob_emission: + self.prob_emission = prob_emission + if prob_transition: + self.prob_transition = prob_transition + if emit: + self.emit = emit + + + def get_num_states(self, pos): + """Returns the number of states at position 'pos'""" + return 0 + + def prob_prior(self, pos, state): + """Returns the prior probability of a state""" + return 0.0 + + def prob_emission(self, pos, state): + """ + Returns the emission probability at position 'pos' with state 'state' + """ + return 0.0 + + def prob_transition(self, pos1, state1, pos2, state2): + """ + Returns transition probability for transitioning between states + 'state1' and 'state2' between position 'pos1' and position 'pos2' + """ + return 0.0 + + def emit(self, pos, state): + """ + Returns emission data given state 'state' + """ + return None + + + +def sample_hmm_first_state(model): + state = 0 + nstates = model.get_num_states(0) + p = model.prob_prior(0, state) + pick = log(random.random()) + while pick > p and state < nstates: + state += 1 + p = logadd(p, model.prob_prior(0, state)) + return state + + +def sample_hmm_next_state(model, pos, state): + nstates = model.get_num_states(pos) + state2 = 0 + p = model.prob_transition(pos-1, state, pos, state2) + pick = log(random.random()) + while pick > p and state2 < nstates: + state2 += 1 + p = logadd(p, model.prob_transition(pos-1, state, pos, state2)) + return state2 + + + +def sample_hmm_states(model): + + # sample first state + pos = 0 + state = sample_hmm_first_state(model) + yield state + + # sample next states + pos = 1 + while True: + state = sample_hmm_next_state(model, pos, state) + yield state + pos += 1 + + +def sample_hmm_data(model, states=None): + + if states is None: + states = sample_hmm_states(model) + + for i, state in enumerate(states): + yield model.emit(i, state) + + + +def viterbi(model, n, verbose=False): + """ + Compute argmax_path P(path|data) + """ + + probs = [] + ptrs = [] + + # calc first position + nstates = model.get_num_states(0) + probs.append([model.prob_prior(0, j) + model.prob_emission(0, j) + for j in xrange(nstates)]) + ptrs.append([-1] * nstates) + + if n > 20: + step = (n // 20) + else: + step = 1 + + # loop through positions + for i in xrange(1, n): + if verbose and i % step == 0: + print " viterbi iter=%d/%d, lnl=%f" % (i+1, n, max(probs[-1])) + + nstates1 = model.get_num_states(i-1) + nstates2 = model.get_num_states(i) + col1 = probs[i-1] + + # find max transition and emission + col2 = [] + col2_ptr = [] + for k in xrange(nstates2): + top = -util.INF + ptr = -1 + emit = model.prob_emission(i, k) + for j in xrange(nstates1): + p = col1[j] + model.prob_transition(i-1, j, i, k) + emit + if p > top: + top = p + ptr = j + col2.append(top) + col2_ptr.append(ptr) + + probs.append(col2) + ptrs.append(col2_ptr) + + # find max traceback + j = util.argmax(probs[-1]) + traceback = [0] * n + traceback[n-1] = j + for i in xrange(n-1, 0, -1): + j = ptrs[i][j] + traceback[i-1] = j + + return traceback + + + +def forward_algorithm(model, n, verbose=False): + + probs = [] + + # calc first position + nstates = model.get_num_states(0) + probs.append([model.prob_prior(0, j) + model.prob_emission(0, j) + for j in xrange(nstates)]) + + if n > 20: + step = (n // 20) + else: + step = 1 + + # loop through positions + nstates1 = nstates + for i in xrange(1, n): + if verbose and i % step == 0: + print " forward iter=%d/%d, lnl=%f" % (i+1, n, max(probs[i-1])) + + nstates2 = model.get_num_states(i) + col1 = probs[i-1] + + # find total transition and emission + col2 = [] + for k in xrange(nstates2): + tot = -util.INF + emit = model.prob_emission(i, k) + for j in xrange(nstates1): + p = col1[j] + model.prob_transition(i-1, j, i, k) + emit + tot = logadd(tot, p) + col2.append(tot) + + probs.append(col2) + nstates1 = nstates2 + + return probs + + + +def iter_forward_algorithm(model, n, verbose=False): + + # calc first position + nstates = model.get_num_states(0) + col1 = [model.prob_prior(0, j) + model.prob_emission(0, j) + for j in xrange(nstates)] + + if n > 20: + step = (n // 20) + else: + step = 1 + + # loop through positions + nstates1 = nstates + for i in xrange(1, n): + if verbose and i % step == 0: + print " forward iter=%d/%d, lnl=%f" % (i+1, n, max(col1)) + + nstates2 = model.get_num_states(i) + + # find total transition and emission + col2 = [] + for k in xrange(nstates2): + tot = -util.INF + emit = model.prob_emission(i, k) + for j in xrange(nstates1): + p = col1[j] + model.prob_transition(i-1, j, i, k) + emit + tot = logadd(tot, p) + col2.append(tot) + + yield col2 + col1 = col2 + nstates1 = nstates2 + + + +def backward_algorithm(model, n, verbose=False): + + probs = [] + + # calc last position + nstates = model.get_num_states(n-1) + for i in xrange(n): + probs.append(None) + probs[n-1] = [model.prob_prior(n-1, j) + model.prob_emission(n-1, j) + for j in xrange(nstates)] + + if n > 20: + step = (n // 20) + else: + step = 1 + + # loop through positions + for i in xrange(n-2, -1, -1): + if verbose and i % step == 0: + print " backward iter=%d/%d, lnl=%f" % (i+1, n, max(probs[i+1])) + + nstates1 = model.get_num_states(i) + nstates2 = model.get_num_states(i+1) + col2 = probs[i+1] + + # find total transition and emission + col1 = [] + emit = [model.prob_emission(i+1, k) for k in xrange(nstates2)] + for j in xrange(nstates1): + tot = -util.INF + for k in xrange(nstates2): + p = col2[k] + emit[k] + model.prob_transition(i, j, i+1, k) + tot = logadd(tot, p) + col1.append(tot) + + probs[i] = col1 + + return probs + + +def get_posterior_probs(model, n, verbose=False): + + probs_forward = forward_algorithm(model, n, verbose=verbose) + probs_backward = backward_algorithm(model, n, verbose=verbose) + + total_prob = -util.INF + for j in xrange(model.get_num_states(0)): + total_prob = logadd(total_prob, + model.prob_prior(0, j) + + model.prob_emission(0, j) + + probs_backward[0][j]) + + probs_post = [ + [probs_forward[i][j] + probs_backward[i][j] - total_prob + for j in xrange(model.get_num_states(i))] + for i in xrange(n)] + + return probs_post + + +def sample_posterior(model, n, forward_probs=None, verbose=False): + + path = range(n) + + # get forward probabilities + if forward_probs is None: + forward_probs = forward_algorithm(model, n, verbose=verbose) + + # base case i=n-1 + B = 0.0 + i = n-1 + A = [forward_probs[i][j] for j in range(model.get_num_states(i))] + path[i] = j = stats.sample(map(exp, A)) + + # recurse + for i in xrange(n-2, -1, -1): + C = [] + A = [] + for j in range(model.get_num_states(i)): + # !$A_{j,i} = F_{i,j} C_{i,j} B_{i+1,l}$! + C.append( + model.prob_transition(i, j, i+1, path[i+1]) + + model.prob_emission(i+1, path[i+1])) + A.append(forward_probs[i][j] + C[j] + B) + path[i] = j = stats.sample(map(exp, A)) + # !$B_{i,j} = C_{i,j} B_{i+1,l}$! + B += C[j] + + return path diff --git a/arghmm/deps/rasmus/hmm.pyc b/arghmm/deps/rasmus/hmm.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b620e2d45d4453b3f8e6f4cc2504f27b25a4a2e GIT binary patch literal 9368 zcmb_iNo*X)6|L^sIK!FYB8iKaq#aw<$d=8JUL#YEZ3$jti;i1%6w@Lm!oqY$UO*XCbx3fJw5etE#JO`Tza* ze^v9ZUlnq%{^VaDRZaYp!Tk+f;op(ej9EwOn3`+$9J7uL{t{--HI0N>Pnbs1tS61b z+N9Y_nMT^Ir%fYc)-$#?W%e?rF=5sxP)?g5YqrofW9}Mr$CyvobEY=I!uq7CWtGXB zT27e)N3BhoyBK)N1kczQneDA=!uA^N%h)2Lf%hINj^=gaZyxTCZZmsZYe;-1H$Lhq9I*=M_$ zb=TYV>doWxs-NHRG%wCsKWtfN%O1ws4oXdQtr5fX8|b+HU;M0K6;C-Zy*VP4B!PRpx_7=2_ojmpKI=> z%zBEzB$Cp^A~P9NODTi)X=So8^Z+~mS}Ds>UPq!ezf}nXuiB`Dq2wb~Qn8h$_invb z3!2{5N_Vey)4STL1r4u+Ia^ItxuU^!nSFBAnDaHQb*+SwvCHOzFlcPqD%OV=ku2>u zqovLI_EM!=-KpOUmY}TNpc;j!*xuRSAKYp|J6!5k!u^AA3DwWHKaVExdbwP0)}wOS zV}I0zajKLeFBUbY9b^t2(<>w?n*TOw&QZwRNuQ99x&LbqUqEwk~PwlB!GCx|FR; zsV=Fyv2|1DQ4z^e_(wTZjgTJ|2E_r|qp3m72;mV$hXzrJtMZj_?LL+0vqyiDNgm0` zNtaU#VY}5zqp?}3?uAdX;xrPI$~Xn*=vAe#c@YdFAUFU5Ys2S}_(62gZH6KbFv4cg z^;%oj?nAE<(R5{B-Z|vX)$vBW9QL4P%V6;E#8&B3jxXY}*m0`~l zk;_1)o?vjwHc8XFVh-W+5@r_;@MY^fU9)RU>%0mGCSO6%z#-bet&VqS`|PY5=Q>8X zCWWqv)6oj{FJw<3XJv|aZhUyV&l(*{LFtYLMM|{HY**Ixu%9+J)!aU;S9M6$~5F5NC`(GhLsVMlML=nD8 zu2xhK3k0};AcR3l(%ea!e(D}Aq)l(l9Hz}-N?LdWg10Jx7G6+6T1ZL@3PvE``*clx zq)g{+>LdA3eJGZ|)QwN5SxpG@i!9Q4NfZ7f%$;IFT>6h8IXM-aL2qL!Xa>Dl12_1< zYG4XFcx>*^cV6=#Qj|82YaHkprm#y1nNNP}DkW9H1X+p(b2&Cm{=A zGb>4=yyHQ1HAc1^VjRjW`;Q|L6hH1B1pc>BvK)3)XeTG28H_L{AZrmjtdwvPX(#Wb zkS0t9B1$<5&?uOpa6UwV%QI$o!nCF#0xaH|z72(Y&k~Jj0s zyJjdMzMT}=syIK}SuwZMe1+Cb+H}n}8mG)@)uF!|dB^EzsgTx7cmcTyGl#)w^|G+o&r2!=rA5>GqeL)iMetkGpQ2jp88g|($pa8G6AOSdo07Mg zHN8*uQqI2gzT}ptxE3j_iFn2fsEFU<^k>a{|CBkz-?Z2ephW?rpczK#>^csv>xTu( z6m2BTq6#xAbY8+!=?-o5RN_A@+NXao*Ew@cb0{L{6OyEI9CNyVnj;+|g@%%5VYPQL zHW8N44yy%uT-o@Jp+2CWEY^cH5f%1DE5g;zMfW=Txuz{$_zWPY;OTN}zkLt|9`?Tb zm0r1BL5Tgs5^sluuHAH7FUsSGI>Qhw6@VoNdpGM*(A~tL1#Xwl)|SrJE_jV*r! z5&^GRMHW|Sw}WO)S$n!MieR+I!r(r4T`Ih{_O>>d_D#*KL}T$~z#P3Pw|o<0x3FcT zYuAyBbOddy6@9d*^& zp5_^~+Pb4wEF-@p_CXL|4Y-LtIsTA|$_z=UB;tZGfYwGNWXlT=AR@93NC@5QHL%X47M?e~UyM^n-}DPZ zzA26|4GbLIGi7@3tNDZ#$0dmabe0}xngrr7NC`HJx5zY8V47*VE>HuB1U(f^=QXfL zUUU)@Mt(>qc`?Xgj`ZC5r6J2)Qve%B3dbxJTgzLOZtbC#>Ys(;{im7Gh1!#zbIj>L zgjlu*KlG+0qC^(ylTG|*nOtC^I8Em-ODuku$ubk#oxj551tu>tc^OG;Owq`QPR?vN z%XsbO;Q{lXQ5kY%1A~$I#Kx0$tof8&YdqF;28MG=CX=&9GL5%sBvo7?uoJs4a58}; zH=cwaXN-fOB{mHBLhIUf!V32r8-}$L6=4{jxedSr9^&_h*#)cv&N;zI3P>N=#aD6$ z@Wso=?E)bso%qNuP*7NKAIEI%fu=tx3&28R=LrkQ)34Xl@VEsnTvh|9THMkC(0pV8 zShqMqz83auEez|Y1>h}OKoJ%IM-ESnF0I!tn9c$KKPeAs{b%Rjfchi@We+>G9`Tg)eguee8d*S(joX5%Go?A+Ku zXPnhz(D&Rac#V63pMuw@Ch!)+eV(XY#uXNZVXO5Hgsfu}&|y3OgkTWI^N5=i3f6&r zMg30aX>%R;Qb-H9#QtM>ETjcIZHEYEVpBmM<5e)1HHNn2ZCk&}J5CLbx|2AXg=xZI zKGD_#%|OGj2mC3By5w+%up9aldgE25^II zX-$e#jo%!)7>NOHWn^TJ(*e*F&;jORUtrwmn%T}L&h3I+O<%7lWWKV|qT~=6A(~|* z-1~66zcmo!?nsgw2JGb2~p% zguzj2`5T$f@>>OV%itL|^uGToR#q+$2XX z=}A7lpI?$ol1b8FX&xFtnVc86f^n=() z8Kx~b^iHP~+`SSTD%)K^-Hcnb-icD=B_sV0GLamVH4#Akj_dFQ4sf%3a%5~JJeJG2 zZ4#!?`&frO0w6aUu^}s>L2ORk;2HKSz{D`d!}q^IaULd(!Xh3n&J#sA5BfoD@&Kxg zTb}VlS|u)FPe|m)(BbE~u#9u{=;YAFV={gTWUu(Z-s)-!;S%9R*I?W;&KWo7%mFxQ z_uI}~BI9nlRj!<{p+gdMzQ7BFj*V&{aFI99Zx+Xnu)u=Q3o&KCUqb0*QcJK1WTaPU zTo~j@j$#h!^-QdKbWE5~p|hPEQlNkU?gP|7YKRm7A-t2M-qU84ME(!O3mAvHVGtNt zm%HIF2^I_r3^2`_-cRp4cvFMtM1nvv5Cc>LiTJ9`eS^kfZV>?rp{^+Y_#@ZMV^%x` zA(xJp0rYV-gr*vF{)WvycK;7s5KYRy_6m*VPvBIZou;7g~UyRq^8 zkYH8$rqatN2W&=N@%h>i+n0%I4dLb!1!ocQOU`-K@e-s<{sQH0+Ia$p=4Tzxopnp| zT(=S)IcpvJSci{-kx(Qo`}N;a1SKe9(393C|7AemNOTaZ^H<+vo6}65LLzOvU1@}Y z&;L8HoO6je5ziIot|A%9Mme$nH~c4sRr?+3b^4f)N5g41@65S#&U9k#= region1[0]) and (region2[0] <= region1[1]) + else: + return (region2[1] > region1[0]) and (region2[0] < region1[1]) + + +def iter_groups(items, key): + """ + Iterates through groups of consequentive items x that have the same key(x) + + items -- iterable of values + key -- function of one argument + """ + + NULL = object() + last_key = NULL + group = [] + + for item in items: + k = key(item) + if k != last_key: + if group: + yield group + + # start new group + group = [] + last_key = k + group.append(item) + + if group: + yield group + + +def iter_union_ids(regions): + """ + Iterate over union groups + + Yields (groupnum, region) for each region in regions + + NOTE: regions must be sorted by start + """ + + # TODO: add inclusive option + + start = -util.INF + end = -util.INF + group = None + groupnum = -1 + + for reg in regions: + if reg[0] > end: + # start new group + start = reg[0] + end = reg[1] + groupnum += 1 + + else: + # append to current group + if reg[1] > end: + end = reg[1] + + yield (groupnum, reg) + + +def groupby_unions(regions): + """ + Iterate over union groups + + NOTE: regions must be sorted by start + """ + + # TODO: add inclusive option + + for group in iter_groups(iter_union_ids(regions), lambda x: x[0]): + # remove group index from each region + yield [x[1] for x in group] + + +def iter_unions(regions): + """ + Iterate over union groups + + Yields (start, end, [region1, region2, ...]) for each union group + + NOTE: regions must be sorted by start + """ + + # TODO: add inclusive option + + for group in groupby_unions(regions): + start = min(r[0] for r in group) + end = max(r[1] for r in group) + yield (start, end, group) + + +def iter_intersections(regions): + """ + Iterate over intersection groups + + Yields (start, end, [region1, region2]) for each intersection group + + NOTE: regions must be sorted by start + """ + + # TODO: think about whether this is inclusive or not + # useful for DNA coordinates + + # endpoints queue + group = [] + + start = None + end = None + + for reg in regions: + + while len(group) > 0 and group[0][0] <= reg[0]: + # process end points + + # yield group upto this endpoint + end = group[0][0] + if end != start: + yield (start, end, [x[1] for x in group]) + heapq.heappop(group) + start = end + else: + # process new start point + + # yield group before new region + end = reg[0] + if start != end and len(group) > 0: + yield (start, end, [x[1] for x in group]) + + # add region end to group + heapq.heappush(group, (reg[1], reg)) + start = end + + # yield remaining groups + while len(group) > 0: + end = group[0][0] + if end != start: + yield (start, end, [x[1] for x in group]) + heapq.heappop(group) + start = end + + +def iter_substract(regions1, regions2): + """ + Substract regions2 from regions1 + + Yields (start, end, original_region_from_regions1) + + NOTE: regions must be sorted by start and + regions1 and regions2 should each be non-overlaping. + """ + + # add a tag to regions + regions1 = [(reg[0], reg[1], 1, reg) for reg in regions1] + regions2 = [(reg[0], reg[1], 2, reg) for reg in regions2] + + # combine all regions into one list + regions = sorted(chain(regions1, regions2), key=lambda x: x[0]) + + for a, b, group in iter_intersections(regions): + if len(group) == 1 and group[0][2] == 1: + yield (a, b, group[0][3]) + +''' +def iter_combine_regions(*regionsets): + """ + Combine two or more region sets into one sorted region set + + NOTE: region sets must be sorted by start + """ + + regionsets = map(iter, regionsets) + next = set() + + for regions in regionsets: + pass +''' + + + +def query_point_regions(point, regions, inc=True): + + ind = util.sortindex(regions, key=lambda r: r[1]) + rind = util.mget(range(len(regions)), ind) + regions_by_end = util.mget(regions, ind) + + end = util.binsearch([r[0] for r in regions], x)[1] + start = util.binsearch([r[1] for r in regions_by_end], x)[0] + + if start is None: + start = 0 + if end is None: + end = len(regions) + + if inc: + for i in xrange(start, end): + if regions[i][0] <= x <= regions[i][1]: + yield regions[i] + else: + for i in xrange(start, end): + if regions[i][0] < x < regions[i][1]: + yield regions[i] + + +def query_regions_regions(query_regions, regions, inc=True): + + pass + + + + + + +if __name__ == "__main__": + + print "union" + print list(iter_union_ids([[1, 10], [2, 4], [2, 5], + [12, 20], [13, 22]])) + + print list(iter_unions([[1, 10], [2, 4], [2, 5], + [12, 20], [13, 22]])) + + + print list(groupby_unions([[1, 10], [2, 4], [2, 5], + [12, 20], [13, 22]])) + + print "intersect" + print list(iter_intersections( + [[1, 10], [2, 4], [2, 5], + [12, 20], [13, 22]])) + + print "union" + print list(query_point_regions(3, [[1, 10], [2, 4], [2, 5], + [12, 20], [13, 22]])) diff --git a/arghmm/deps/rasmus/intervals.pyc b/arghmm/deps/rasmus/intervals.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3d0a4666006c05f1a8c0be22dd03fb3847319b2 GIT binary patch literal 6631 zcmcIoO>Z056}@jrQ6eeImi!UZcG7W>HcTK(k(!`E>I5$0ei$`WgmP@yuE0{95j8S7 zBk>Jgs*r>N_M)pU3S?1qQJ~u_y6;cuF6d9_qUfSsbRV?m+&3f9PSPr_Y-=9xyEE_e zo^$Vg@3Vi^D)0X6Zy$9e`!|i>>v-&6jYxn$BM(LHJK2+mMtYvy_r&mBK^_*QSCsoj zG2w&+MR{W6eo2A}d4fih5|mV!l3-GWvIJ8qR3s>?P?eyf!nCl5s`gM*d0OQe2~SB- zlPyn@TgC)4^1~m6rzJ$6(4UsBC&8=)bGdLvg|iZzQ{lV>FHz`D$YaAl*BayhAFZ*B z=WHEZ$R>bwKW`WL4%8&5fBjxOw+XR<35~2aF2dN+zwvLS0gFpIfugh7j$&6WqB=~Y zUZ(&de?Ek4feG1zQ5s_Q3~^H510+1DZ%s%t=ai^XiFaEV>)G#$U-q5IDsZ*h;K25m zC@MbwHN$i`h^>EnFbw@@!ymNcZs-rfZj{7J?>}hIudn&Zqj1n`Z`(}0{QiT{{O+1I z*CL8L{?)5&&0gmHbP%>v-?eSWfv=zYZR_L1-q1#mLf4Gj65)}R2FnI_A)Q2#4L>F? zr+kU_?n}sGucKJ($LZpF)Lm>3I-BTtaeI(#g`Lzw(cSF#^PeExwu^(d?GNo@6sO_f zQM+dsws&=iY<*|hX$ggtCQZZZc#t?7j@-eco0f_^7TNb`q5Xn%As1zITVRbFYqw;7 zLSVB!<-~qT_V8Dbt%4*z>;|)x_2-TuN?coff__-X`zUW~b}q}^XB#N2I4k=*6uO5u zv6OZiT0h+!B*X5e-^C9sm2CK(B(~wsFpN{|iyx(7-})nb2h(h}p-DGG-?saq|1jKb zjNYg?o9JgWCOcm5h3pA?H4Lr3$F~3t-fs-!P6~|heG-R0wtLveSo+mUgR@Mzm&rQK zp<;V`JB)({w^YfXQ&PL4*Oaha`S{kYbP7VRZPOODB#kaf-?K$Z)I2A+nvo!0XAonbuCwZ za{K%5;IHrcHf;|aKGZEL^RZQ=KXfNM!6FF&=%wQlziEdjf3hZ~GvGnv(tpFA|RAxQaW{R_B##5LO z-CT+Kksm`C-5@>yW;hN6PhjH-1P@N+O@Z~{8;Uf&fnt{6fxY2L2$s2>H|0R>TZerQ zJ$e(J{tx@o-#dOD+fq7t8is^k#bZfNh8~gnszx%kquXhtAn)@%3IgZ2ly zhp3Zh1P`ayJjLQA6dK=}bF3;Zxbv+COKZ&we2K>uClfsg2nF5xZc8zt!P^Cqa$QPC z#^*G2;vYj8t@P+@oDcXnrJTa-ST-E#ppb@ zV(k>S0gEECu&NSP#Waeak0%MNV_QTP-qKJoIANRGGWx#O4dZaMJ@^1-63fxb^4ybm zC`BLQpn4fnlM8>LGaq7rZ04DNy#$SV#y&D2TA<$^J;Rpf8K|3QQJnDH(!&7jXtIeR zYz2i>YY469@n83so6kq7@<;3(lO$lGKsK-}$mp^GTyCtLHH@ZJLm#Sq0bCeG_Ye}5 zOm!&g44y7aPC8G$pifmQF*{qrD|N8ycQuDozl-z@6`Z@&@BTs#5L(GOMp}R{LI*mu zL06dI4k9?SHja4cv)TU*eE!TdpB{2j8BSM^AyLVP{D&M&K0RcSUvdoQ4Mj$JM2&oW zM1;}LHL8s7$gvkbH9iYe@>y7OF!l5-o?ZCc#*D?HKfr=tj76GvTV#V@m2&)7Pc-g? zzSxtF_BCUhdX+4OFhy$4qEKS=kjPTA8Me1~H2I*koos8Cq4LmfHVD+CNOQ!U=6Swg zR-i#gbD7qI<+V)s_S#94i_uQ=lJSkPyU^OVP)M!hWtoXrkon>a64@m@)rpGn%~Ii_ zsT2xw#^icE`8>tMf1;B-B_a2&jHM(bnafN7Nk|(-ncJx{McL09DP;3TsAwvD*hqP( zjjqc95@k)C$rg=z05@`2ckl7o7@V(0E&6}(5+U-&+DGzJP!DIQN!g#G_4cMTpF&2B zBlIGef%^oy&(^C(Xmd^MTFb@+pS3R7>?8 z^G@LvuO_EV!*sYprl&-i#=76b21qvGj$H5pR+07*@dec*7lfcp0`Bn#7J(ZGfougn z9BL+1GvW9OoM%S}4JrsNB*NUzJvk_Ga~-|vqz<~t8kNZ|jelr_3K|vJ=+|;k&KvP) zT9G|m_layxNm9YB7l>GKhfom9sZS&D<&(xBdO`&S?}-_G$}<}}AYpm#`JU`obs1I1 zR?Mzg!0lFc<+VpOrU6atiD!};XpBWejcoCAF!yu08{a|4&0}?ps|k0PcG$6eS;u#b z*|~$*26E^*dga)xA*md|D7}b;Pg(O9bnaLK!5xWQVB)ALDNg!CaS)C){`9+Hs=-Zf z`jkrg)p~^MBHp4Yzg;ZkiFKskHS{dSn7ExF$hfFi$Nf(MhwAJ>%YMu-nqP=FavrsO@Vg#JgZ#F>@XW>D6#dubI=h%{h%U zbithS+^-UpOXCbb#ACTwMFBPA7nU{D-Ei5ek2dF|S9b2%q4Kr;9WON57f-!ltJTLv zbE~C>){I;8oQN5>`u>Q^0pC&3vJq4Mh%1TcG8GUfuWqF4H9GS@p{5 zrQPkSyTxJeybEq}grl>0lSh!I&qDq9RZ8!&cn?K3^B8xMm^r)XMK(R!LzPbE+&+cQ zul18)*b6@($nD!Grh$Xg)#>T-rSdn*Gv(Rxa`{`+7pBWKZ^B$LFMD_j{GGrx&n5kR G&HM+*C3D{Z literal 0 HcmV?d00001 diff --git a/arghmm/deps/rasmus/linked_list.py b/arghmm/deps/rasmus/linked_list.py new file mode 100644 index 00000000..41207af1 --- /dev/null +++ b/arghmm/deps/rasmus/linked_list.py @@ -0,0 +1,220 @@ + + + +class LinkedNode (object): + """A node in a doubly linked list""" + + def __init__(self, item): + self.next = None + self.prev = None + self.item = item + + + +class LinkedList (object): + """A doubly linked list""" + + def __init__(self, items=[]): + self._head = None + self._tail = None + self._size = 0 + + self.extend(items) + + + def __len__(self): + """Return size of list""" + return self._size + + + def __iter__(self): + """Iterate over the items in a linked list""" + + ptr = self._head + while ptr is not None: + yield ptr.item + ptr = ptr.next + + def __reversed__(self): + """Iterate backwards over list""" + + ptr = self._tail + while ptr is not None: + yield ptr.item + ptr = ptr.prev + + def get_head(self): + return self._head + + def get_tail(self): + return self._tail + + def get_first(self): + if self._head is None: + raise IndexError("No elements in list") + self._head.item + + def get_last(self): + if self._last is None: + raise IndexError("No elements in list") + self._tail.item + + + def iter_nodes(self): + """Iterate over the linked nodes in a list""" + + node = self._head + while node is not None: + next = node.next + yield node + node = next + + def iter_nodes_reversed(self): + """Iterate over the linked nodes in a list in reverse""" + + node = self._tail + while node is not None: + prev = ndoe.prev + yield node + node = prev + + def remove_node(self, node): + """Remove node from list""" + + if node.prev is not None: + node.prev.next = node.next + else: + # first in list + self._head = node.next + if self._head: + self._head.prev = None + + if node.next is not None: + node.next.prev = node.prev + else: + # last in list + self._tail = node.prev + if self._tail: + self._tail.next = None + + self._size -= 1 + + + def append(self, item): + """Append item to end of list""" + + node = LinkedNode(item) + + if self._tail is None: + # append first node + self._head = node + self._tail = self._head + else: + # append to end of list + self._tail.next = node + node.prev = self._tail + self._tail = node + + self._size += 1 + return node + + + def prepend(self, item): + """Prepend item to front of list""" + + node = LinkedNode(item) + + if self._head is None: + # append first node + self._head = node + self._tail = self._head + else: + # append to front of list + self._head.prev = node + node.next = self._head + self._head = node + + self._size += 1 + return node + + def extend(self, items): + """Append many items to end of list""" + + for item in items: + self.append(item) + + + def extend_front(self, items): + """Prepend many items to front of list""" + + for item in items: + self.prepend(item) + + + def pop(self): + """Pop item from end of list""" + + if self._tail is None: + raise IndexError("pop from empty list") + + item = self._tail.item + self._tail = self._tail.prev + + if self._tail is None: + # list is empty + self._head = None + else: + self._tail.next = None + + self._size -= 1 + + return item + + def pop_front(self): + """Pop item from front of list""" + + if self._head is None: + raise IndexError("pop from empty list") + + item = self._head.item + self._head = self._head.next + + if self._head is None: + # list is empty + self._tail = None + else: + self._head.prev = None + + self._size -= 1 + + return item + + + def insert_after(self, node, item): + """Insert a new item after a node in the list""" + + if node is None: + self.prepend(item) + + # create new node + node2 = LinkedNode(item) + node2.prev = node + node2.next = node.next + + # link surrounding nodes + node.next = node2 + if node2.next: + node2.next.prev = node2 + else: + self._tail = node2 + + self._size += 1 + + + def clear(self): + """Clear the list of all items""" + + self._head = None + self._tail = None + self._size = 0 + diff --git a/arghmm/deps/rasmus/linked_list.pyc b/arghmm/deps/rasmus/linked_list.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ab268df176bc09cdec24aa58ec41e74437363d7 GIT binary patch literal 6605 zcmd5>Pj4GV6rbI-6K`raPTUkq1uU&-BZL|#2#SP&HXu={irS3?q7qsh&m`H_Uc0-S zHmzC?v`22-xO3ziaN#3x;VW?G6Y%@JS$j8aS^~HcN%r~8?9BMh`}2EmCcpjJn!E7L zAJ=-S`0?=h37+gN5UrF^YSU5LQIC{*sMJ^2(QwDT-lW%6x~A5b>zI_!fn14`n?|n< zwVAB!p!*V@>;(`k%auSeQx?DOsc?PS<$zZ}E(QrE2XPW~13lc?=-&&uj2ZMg=TldKIBBTQm;b74lQxh+hbn}Nt8t<3Z{ zL(W*nV@Y-i+{z%yS2p6Um2TSGj&GZlQ98U~dU=M9t?j{J{GrV#TS>dwU?*F#n~1oH zbECU?1It8FoWyw)&4SNnK$PR-*9vL}B;A3DBGGIV4TgHB&n72CkskJLvRHq-x}dbVryay5TYIg5=_XobOGH)K54cwoEVdNaGrP@u8y}jI(aO7Gsu@kO}FT zIfEzKHeGGGyc_o|m&Nx?P6NYFO`^pyLkTq!`f661ep*)N_buSb=z-QFeYZzJ$B)Dv zeUSSb$5+-tM`-e$GzlnoFx-@gmEa3ocxgFGK6Rmsi0GRnica7QGJ2%4Qj_IFZ_VBT zu~I*9)b4582G+XatKpJz(1>02g?xGn18y~y^CjLF@N^cFFXtxh=GfhBlLq-VfQnYI za8MW${$?b0~^c=H|;LnS3Vb@LOnkNVv#w*hpVyOnsVfjbPN^SeLTXYY(+?!Cu% z?}3!qdZvP{FOISGfmCKdSofgtT3{+`!U@U}Q~1rLQ&?n`K2aF;!YQ^Njp-I|gi}FR z5$j{LGMZzIu)b5bu5obnAV3fe&s5KOe8CuIC)sCz^)MY3sP{Pf+PYT{O-SLa8mY4I zB@*caGhKDDGAPwG!Q;=-{iNRLPJAfJDo6YT!pw|H%au&s8GDoMMYW%topep=E>D05 zQ|OYOir^3g9!c?g7qZK{GNf2Y963VihPk!sLNy*P3IwagC}ACS!Hw|gqDY{~*%C-- z2C$Gl$ZV#=ffPb9L27N8<(25!#xl2R{a6B8>Llr#7-jm9##EMyh!Gw58k!j~MR{$f z<29W*r|p)U9J#o)iA7ky=AtzqI(-&elG7p4{sfdPT}AyXRWOcb$*J)Rj4f1)IvHxO zlVS}F0;<5H=1F8p;db4#nA{wuoPTDjywUrs4)xGvLuy5iv>qX3Q~cILMgtj zLxmIl5+}s-r>|t%xhoRm5Tvq25al)9w(C1>=(y?l?!5D_bt|gEwy8GnKkD{nNOm(cQ#*ye2ll}FJ%z!$CkZH@e%%>6FMQdRGuo|L zRjKy_j4hmQ&`s_ZrT$*mE2=}@LR7$X9?j=-aS$HG(%<8;Wr#P6gZYzQ^iOtq8jFq1 zzjvE^oS@iffYFDt(VVVj&%Kcd@&nq{O*6ddbab~>zPkfLC_CTSj0Ubf~ z*whw^GmBhxl}s8~ny5sO!d-<+=%1x4txJuX@#J}RII;*OxL?62sqjX4YhMgTIWM-d zyuhvo=2l=!N<2X)iKY_89!n5X0AlKN6E?pSqW*}FBCt^Hx=m*u@YQmg?tFQ&{{Lv7 z$I!jn-{Zdr(SM0+rTzuiN|-wo{rgt`*6)9Qh83uNy1>K=-=YDBWPyWRynvJ|xw(Kh z7A~kXZR-L28$p25NEpTec%kUGOi}#c1^&=ru9?H4$DD!n$1-$_|E+?5u^Pn3f z=8lbt?k27z*iqc@^8Ta}1CqZbYX_KaGdXuARlE%jQNP5+q&^aymHRR-dw%+voW_r3 zFOh8ESG47hPDKJ=WswGUsY=U3(Te*-9 z+vHgDgmNv&8`V%MWBv~yBv6LjZphjr 0: + i -= 1 + + + if value <= self.table[i][0]: + # return lower bound color + return self.table[i][1] + elif value >= self.table[i+1][0]: + # return upper bound color + return self.table[i+1][1] + else: + # blend two nearest colors + part = value - self.table[i][0] + tot = float(self.table[i+1][0] - self.table[i][0]) + weight1 = (tot-part)/tot + weight2 = part/tot + + newcolor = [] + color1 = self.table[i][1] + color2 = self.table[i+1][1] + for j in range(len(color1)): + newcolor.append(weight1 * color1[j] + + weight2 * color2[j]) + + return newcolor + + + def get_int(self, value): + return [int(x*255) for x in self.get(value)] + getInt = get_int + + +def get_webcolor(color, maxval=1): + + colstr = "#" + for i in color: + h = hex(int(i * 255.0 / maxval))[2:] + if len(h) == 1: + h = "0" + h + colstr += h + return colstr + + +def rainbow_color_map(data=None, low=None, high=None): + if data != None: + low = min(data) + high = max(data) + assert low != None and high != None + + return ColorMap([[low, blue], + [.5*low+.5*high, green], + [.25*low + .75*high, yellow], + [high, red]]) +rainbowColorMap = rainbow_color_map + + +#============================================================================= +# svg plotting + +def plothist2(x, y, ndivs1=20, ndivs2=20, width=500, height=500): + from rasmus import util + l, h = util.hist2(x, y, ndivs1, ndivs2) + bwidth = util.bucket_size(x) + bheight = util.bucket_size(y) + + #width *= bwidth/bheight + + heatmap(h, width/ndivs1, height/ndivs2) + + + +def make_color_legend(filename, colormap, start, end, step, + width=100, height=10): + from rasmus import util + s = svg.Svg(util.open_stream(filename, "w")) + s.beginSvg(width, height) + + xscale = float(width) / (end + step - start) + + for i in util.frange(start, end + step, step): + color = colormap.get(i) + s.rect((i-start) * xscale, + 0, + step*xscale, height, + color, color) + + s.endSvg() +makeColorLegend = make_color_legend + + + + +def heatmap(matrix, width=20, height=20, colormap=None, filename=None, + rlabels=None, clabels=None, display=True, + xdir=1, ydir=1, + xmargin=0, ymargin=0, + labelPadding=2, + labelSpacing=4, + mincutoff=None, + maxcutoff=None, + showVals=False, + formatVals=str, + valColor=black, + clabelsAngle=270, + clabelsPadding=None, + rlabelsAngle=0, + rlabelsPadding=None): + + from rasmus import util + + # determine filename + if filename == None: + filename = util.tempfile(".", "heatmap", ".svg") + temp = True + else: + temp = False + + # determine colormap + if colormap == None: + colormap = rainbowColorMap(util.flatten(matrix)) + + # determine matrix size and orientation + nrows = len(matrix) + ncols = len(matrix[0]) + + if xdir == 1: + xstart = xmargin + ranchor = "end" + coffset = width + elif xdir == -1: + xstart = xmargin + ncols * width + ranchor = "start" + coffset = 0 + else: + raise Exception("xdir must be 1 or -1") + + if ydir == 1: + ystart = ymargin + roffset = height + canchor = "start" + elif ydir == -1: + ystart = ymargin + nrows * width + roffset = 0 + canchor = "end" + else: + raise Exception("ydir must be 1 or -1") + + + # begin svg + infile = util.open_stream(filename, "w") + s = svg.Svg(infile) + s.beginSvg(ncols*width + 2*xmargin, nrows*height + 2*ymargin) + + # draw matrix + for i in xrange(nrows): + for j in xrange(ncols): + + if mincutoff and matrix[i][j] < mincutoff: continue + if maxcutoff and matrix[i][j] > maxcutoff: continue + + color = colormap.get(matrix[i][j]) + s.rect(xstart + xdir*j*width, + ystart + ydir*i*height, + xdir*width, ydir*height, color, color) + + # draw values + if showVals: + # find text size + + fontwidth = 7/11.0 + + textsize = [] + for i in xrange(nrows): + for j in xrange(ncols): + + if mincutoff and matrix[i][j] < mincutoff: continue + if maxcutoff and matrix[i][j] > maxcutoff: continue + + strval = formatVals(matrix[i][j]) + if len(strval) > 0: + textsize.append(min(height, + width/(float(len(strval)) * fontwidth))) + textsize = min(textsize) + + + yoffset = int(ydir == -1) + for i in xrange(nrows): + for j in xrange(ncols): + + if mincutoff and matrix[i][j] < mincutoff: continue + if maxcutoff and matrix[i][j] > maxcutoff: continue + + strval = formatVals(matrix[i][j]) + s.text(strval, + xstart + xdir*j*width, + ystart + ydir*(i+yoffset)*height + + height/2.0 + textsize/2.0, + textsize, + fillColor=valColor) + + # draw labels + if rlabels != None: + assert len(rlabels) == nrows, \ + "number of row labels does not equal number of rows" + + if rlabelsPadding is None: + rlabelsPadding = labelPadding + + for i in xrange(nrows): + x = xstart - xdir*rlabelsPadding + y = ystart + roffset + ydir*i*height - labelSpacing/2. + s.text(rlabels[i], x, y, height-labelSpacing, anchor=ranchor, + angle=rlabelsAngle) + + if clabels != None: + assert len(clabels) == ncols, \ + "number of col labels does not equal number of cols" + + if clabelsPadding is None: + clabelsPadding = labelPadding + + for j in xrange(ncols): + x = xstart + coffset + xdir*j*width - labelSpacing/2. + y = ystart - ydir*clabelsPadding + s.text(clabels[j], x, y, width-labelSpacing, anchor=canchor, + angle=clabelsAngle) + + # end svg + s.endSvg() + s.close() + + + # display matrix + if display: + #if temp: + os.system("display %s" % filename) + #else: + # os.spawnl(os.P_NOWAIT, "display", "display", filename) + + # clean up temp files + if temp: + os.remove(filename) + + + + + + + + diff --git a/arghmm/deps/rasmus/plotting.pyc b/arghmm/deps/rasmus/plotting.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ee410f4891d5febbb59a489c321f1fdfe61758ca GIT binary patch literal 7823 zcmcIp&vP8taqigx7Qhm}NbYj6Bt?z{NnF_wAV6_hVPuQ4Bs)%Im>JlDvl44F*co7l zoSj+B42VT)H?A^yNL8v*`3F)d9a5D`DmPaSsmdjlgL8@h1K*N!DtG7m`VDrMKU};Z zAm-uy=-01b_t*XUJ^uQCmMS0o*?;{>M~a`D_oL) zEsK9sS}WqeCaqQRG4Yo8^YT>WiO8R}E{l&f7UZcVtt*tE_^rrOh*FiHDy^FM;EP}I z#qW|lg=E*n2W#+Mm8aN2T>=QPc1}6wV@uG`C9n!MfM2Y5zEE&of*TTm&sAL)qnKEg zr`Xj^*7usXskOgCQE?8#D7fqNqa@G6_-M0#=2RT-B=06^cGvllm*9CM+qrP4ONd?S^cH_~eL%$ZtN%=ke#0BudgBdHpdG zO6dF^zHA3Y03^WdC`+6N2+#~rL5cRE@olZlN&x$p#ocSpaPAh0bIuM$avpop3HFr4 zPTmciH1HxP3gf`Z6Q@HFvJMyKM~Oedm#w21S~3)AqoUGd7lfg%{zNQGEubsL*k8pd ze(vPnK@{9^vTkw``OX2B!J>yr6eXvy3~Fi5%kPfq$CWR~HOKi9B5!XP>9!wiJU6ma zjO@HH@_sS${xc(+o0~!2+1wnnc`&YxD_d-yWq3ibkQ8heUD#a$DhmqGi@V)PTn@q`hw@IP$ zSr&vuu2@OZ-1KWJhz?y6{6EOkZijK0x7%-lea6LPb)lp{zl1u~q(DD|J_dIshmZ>Q zuq00`{(kykT}GwC3D7$%Ndx$AJhUX473==-HL>vRqaWQ5XCzq`i;a?efO+r<%Q(<; zb63$1e<~P)>E1JRm|i#Q%t}(7qBR3A&@1iQ$Y!(|^Bpr8_ZTv4YcwN6{MxdHKef>; zt6D2F$c*MDOd8aZ-s1yikK zee3x9);@^#CgEcZws3>|B#pXQJ%!w!ndzL)08F30gfs?#7m>gL;1^&JIy9Gt2% zs~E=NC)FM0`0#?UY(pGu_AdTp8z^Mvnv|_IYr$HwmhH9Ds&%tewQJTltYxcGDqEMW znspTvm#wOSJF!nqDBO9lz|J#7wnlYeRKZ>sUi*ryFo@pr+;n%O7GRXkM_uE;05xzKjBut+KsXTX>_0Lj)>6 zHH?o6v<|xr_Gy|=AN-gWDdLiz`c>Gf2CLu~&T|`{oWqJU+MbbzC5=ExqR>^xphh4i z_%^s6FKMXSQ!m<8q^<)9`1wt8577uYbyj~4vuybVLKq+pQ$P_%8v9YN!gO$+Y8{GT zC)CbP^(b`Cv6Id<&&rXS%w%i3#s<^rTvciK&26YhEdJ~sxf=XAD@ zk*uc7Q9n!5BvrKOgagJmTI(o9Hlx-#3z&iF>@@I=h3Trug+1-%C_B0ceHU!G%V3*& zVSJFBw$*fPuLIM5FCXu z23;aV?RTh|iaAdSuM}Cg4j^d(VUzmQsl4CvY}1DN5L`%clI7>w3+AW?XL-)?adUCJlmo<%YFdS}R>gz+OR? zVp|n!xnxctY9L&uj5fdxFlyf_2_2Of|! zGFp?ExSM5-sg0S@F!;hGO?d$oqQ%xz6FLSo*=|tzglSnCY+omu@Jzxrk^dvX9o^uA z$>=5n{^&J{KR_=?a3cW8EhQOZ7EXgYNf$^8ZY0I&lH+*>c(Zt{0FYChFUjZ@0Y18| zlN`etBood+ZlmCgR~zsIDMSHj!5O$VoUuz}=FrO`9c%Qu0G;8Y3}4qxz9GXmBwUh0 zPCVgYU)4I9<49xk5}NeL!M_QP9Snf)rw@K9qjeelqjp_4U4JK|H)Xi2M=aDfT$jf1 zO?=DpP$YpFj=#1vFIjnkWJT{#QzW*8E7bit6YBph6?IjHH#BU&2^Xejhfozu_5 z!Y}~4*@fm1lio5O{_yMpBM^IK~w)SA>IY{~oNCeU+7!po9h(NKq!&8$yWP#z8X;GrXZfy0y_a6_wZ%@&_Uw z)ab(Dv?ObIyd1*gVTBr`hK;&2M&j~w664H+MXygwTr(0^pOd&cEpg#^kQj~!uXqhI zV*g{$IG54v0Mb{vpRvR^0O_AbVys_C?5M<7jl|byjl{59UC%~ZaLv@_B&k8Y4GFLF zxfwC$8n$74d`*(eW(;y^&a3@U|6Y^v{iP@CruzYg^ucBmZ>M_B>dnOc9R2PebCCJr zzz3z`;vwq~AId5gnn~=zzWEwoPH515#UZ1Ifp8G#Zi>&|qUX%qnFJeX3t=qy&v!wd<9sxkNFAF#H>;vE)DV)SyTi|4^0cfZN5 zKSZGl=tK#`;N3DY%E``W*EZgS({M1AwfqrT{gA^nS!xWRb-dBj z7+Nn>q!JJkS|J2lb!w zVA4JrIN|{R^XO%RQ&T6iNdCJACZm%XtWDVKTqA%IPm1x3u~08ore-VKXA1+1*TB-f z#X^G{7tqHDL<;u-8-#u*>7ThjWsAXwj*z5YdZu9I|8^<9(S=80cN1;*T^8fe(mzT2 za6n$>4kEAfNO|I6u~Gm2T(33m7aZN^ zsEvZJUp&1gh}YUhz+Qv>hvq%=yK`KNp}K6Z+HYEm^91}e|Lm*x7B5|?l*_fHm6h`B NB3_F*ZolN;{{hi43&8*Y literal 0 HcmV?d00001 diff --git a/arghmm/deps/rasmus/sets.py b/arghmm/deps/rasmus/sets.py new file mode 100644 index 00000000..56124775 --- /dev/null +++ b/arghmm/deps/rasmus/sets.py @@ -0,0 +1,128 @@ +""" + + algorithms for sets + +""" + + + +class UnionFind: + """An implementation of the UNINON/FIND algorithm""" + + def __init__(self, items=[]): + self._parent = None + self._items = set(items) + + def __contains__(self, item): + """Returns True if item is in the set""" + return item in self.root()._items + + def __len__(self): + """Returns the size of the set""" + return len(self.root()._items) + + def __iter__(self): + """Returns an iterator through the items of the set""" + return iter(self.root()._items) + + + def add(self, item): + """Adds an item to the set""" + self.root()._items.add(item) + + def root(self): + """Returns the root node of a set""" + node = self + while node._parent: + node = node._parent + if node != self: + self._parent = node + return node + + def same(self, other): + """Returns True if two sets are the same""" + return self.root() == other.root() + + def union(self, other): + """Produces a union between this set and another set""" + root1 = self.root() + root2 = other.root() + if root1 == root2: + return + + root1._items.update(root2._items) + root2._items = set() + root2._parent = root1 + + def members(self): + """Returns members of the set""" + return self.root()._items + + def has(self, item): + """Returns True if item is in set""" + return item in self.members() + + def size(self): + """Returns size of the set""" + return len(self.root()._items) + + + +def connected_components(components): + + sets = {} + + for comp in components: + comp_sets = [] + for item in comp: + if item not in sets: + s = UnionFind([item]) + sets[item] = s + comp_sets.append(s) + else: + comp_sets.append(sets[item]) + + if len(comp_sets) > 1: + for s in comp_sets[1:]: + comp_sets[0].union(s) + + + # yield unique sets + done = set() + for s in sets.itervalues(): + if s.root() not in done: + done.add(s.root()) + yield s.members() + + + + + + + +if __name__ == "__main__": + + if True: + set1 = UnionFind() + set2 = UnionFind() + set3 = UnionFind() + + set1.add(1) + set1.add(2) + print len(set1), 1 in set1, set1.has(-1) + set2.add(3) + set2.add(4) + set2.add(5) + print len(set2) + set3.add(5) + set3.add(6) + set3.add(7) + print len(set3) + print set1.same(set2) + set1.union(set2) + print set1.same(set2) + + set1.union(set3) + print set1.members() + print len(set1), len(set2) + diff --git a/arghmm/deps/rasmus/stats.py b/arghmm/deps/rasmus/stats.py new file mode 100644 index 00000000..9a73bfee --- /dev/null +++ b/arghmm/deps/rasmus/stats.py @@ -0,0 +1,1507 @@ +""" + + Common statistics library + +""" + + +# python libs +from math import * +from itertools import izip +import cmath +import random +import os + +# rasmus libs +from rasmus import util +from rasmus import tablelib + + + + +def logprod(lst): + """Computes the product of a list of numbers""" + return sum(log(i) for i in lst) + + +def prod(lst): + """Computes the product of a list of numbers""" + p = 1.0 + for i in lst: + p *= i + return p + + +def mean(vals): + """Computes the mean of a list of numbers""" + n = 0 + s = 0.0 + for i in vals: + s += i + n += 1 + return s / float(n) + +def median(vals): + """Computes the median of a list of numbers""" + lenvals = len(vals) + sortvals = sorted(vals) + + if lenvals % 2 == 0: + return (sortvals[lenvals / 2] + sortvals[lenvals / 2 - 1]) / 2.0 + else: + return sortvals[lenvals / 2] + +def mode(vals): + """Computes the mode of a list of numbers""" + top = 0 + topkey = None + for key, val in util.hist_dict(vals).iteritems(): + if val > top: + top = val + topkey = key + return topkey + + +def msqerr(vals1, vals2): + """Mean squared error""" + + assert len(vals1) == len(vals2), "lists are not the same length" + + + return mean([(vals1[i] - vals2[i]) ** 2 + for i in xrange(len(vals1))]) + + + +def variance(vals): + """Variance""" + u = mean(vals) + return sum((x - u)**2 for x in vals) / float(len(vals)-1) + +def sdev(vals): + """Standard deviation""" + return sqrt(variance(vals)) + +def serror(vals): + """Stanadrd error""" + return sdev(vals) / sqrt(len(vals)) + +def covariance(lst1, lst2): + """Covariance""" + m1 = mean(lst1) + m2 = mean(lst2) + tot = 0.0 + for i in xrange(len(lst1)): + tot += (lst1[i] - m1) * (lst2[i] - m2) + return tot / (len(lst1)-1) + + +def covmatrix(mat): + """Covariance Matrix""" + size = len(mat) + return [[cov(mat[i], mat[j]) for j in range(size)] + for i in range(size)] + +def corrmatrix(mat): + """Correlation Matrix""" + size = len(mat) + return [[corr(mat[i], mat[j]) for j in range(size)] + for i in range(size)] + + +def corr(lst1, lst2): + """Pearson's Correlation""" + num = covariance(lst1, lst2) + denom = float(sdev(lst1) * sdev(lst2)) + if denom != 0: + return num / denom + else: + return 1e1000 + + +def corr_pvalue(r, n): + """Returns the signficance of correlation > r with n samples""" + + t = r / sqrt((1 - r*r) / float(n - 2)) + return rpy.r.pt(-t, n-2) + + +def qqnorm(data, plot=None): + """Quantile-quantile plot""" + + from rasmus import gnuplot + + data2 = sorted(data) + norm = [random.normalvariate(0, 1) for x in range(len(data2))] + norm.sort() + + if plot == None: + return gnuplot.plot(data2, norm) + else: + plot.plot(data2, norm) + return plot + + +def entropy(probs, base=2): + """Shannon's entropy""" + + return - sum(p * log(p, base) for p in probs if p > 0.0) + +def cross_entropy(p, q, base=2): + try: + return - sum(i * log(j, base) for i,j in izip(p, q) if i > 0.0) + except OverflowError: + return util.INF + +def kl_div(p, q): + """Compute the KL divergence for two discrete distributions""" + return cross_entropy(p, q) - entropy(p) + +def akaike_ic(lnl, k): + """Akaike information criterion""" + return 2 * k - 2 * lnl + +def akaike_icc(lnl, n, k): + """Akaike information criterion with second order correction + Good for small sample sizes + """ + return akaike_ic(lnl, k) + 2*k*(k+1) / (n - k - 1) + + +def bayesian_ic(lnl, n, k): + """Bayesian information criterion + + lnl -- ln(L) + n -- number of data points + k -- number of parameters + """ + return -2 * lnl + k * log(n) + + + +def fitLine(xlist, ylist): + """2D regression""" + + xysum = 0 + xxsum = 0 + n = len(xlist) + for i in range(n): + xysum += xlist[i] * ylist[i] + xxsum += xlist[i] * xlist[i] + avgx = mean(xlist) + avgy = mean(ylist) + + if (xxsum - n*avgx*avgx) == 0: + slope = 1e10 + else: + slope = (xysum - n*avgx*avgy) / float(xxsum - n*avgx*avgx) + + inter = (avgy*xxsum - avgx*xysum) / float(xxsum - n*avgx*avgx) + + return (slope, inter) + + +def fitLineError(xlist, ylist, slope, inter): + """Returns the Mean Square Error of the data fit""" + error = 0 + n = len(xlist) + + for i in range(n): + error += ((xlist[i]*slope + inter) - ylist[i]) ** 2 + return error / n + + +def pearsonsRegression(observed, expected): + """Pearson's coefficient of regression""" + + # error sum of squares + ess = sum((a - b)**2 for a, b in izip(observed, expected)) + + # total sum of squares + u = mean(observed) + tss = sum((a - u)**2 for a in observed) + + r2 = 1 - ess / tss + return r2 + + +def pearsonsRegressionLine(x, y, m, b): + observed = y + expected = [m*i + b for i in x] + return pearsonsRegression(observed, expected) + + +def rank(vals, x, norm=False, sort=True): + """ + Returns the rank of x in list vals + rank(x) = i if vals[i-1] <= x < vals[i] + + x -- value to rank within values + vals -- list of values to compute the rank of + sort -- if True, vals will be sorted first + norm -- if True, return normalized ranks (i.e. percentiles) + """ + + if sort: + vals = sorted(vals) + n = len(vals) + + for r, v in enumerate(vals): + if v > x: + break + else: + r = n + + if norm: + r /= float(n + 1) + + return r + + +def percentile(vals, perc, rounding=-1, sort=True): + """Give the value at a percentile 'perc' + + vals -- list of values + perc -- perctile + rounding -- round down if -1 or round up for 1 + sort -- if True, sort vals first + """ + + if sort: + vals2 = sorted(vals) + else: + vals2 = vals + n = len(vals2) + if rounding == -1: + return vals2[util.clamp(int(perc * n), 0, n-1)] + elif rounding == 1: + return vals2[util.clamp(int(ceil(perc * n)), 0, n-1)] + else: + raise Exception("rounding must be 1 or -1") + + +def dither(vals, radius): + return [x + random.uniform(-radius, radius) for x in vals] + + +def logadd(lna, lnb): + """Adding numbers in log-space""" + + diff = lna - lnb + if diff < 500: + return log(exp(diff) + 1.0) + lnb + else: + return lna + + +def logsum(vals): + SUM_LOG_THRESHOLD = -15 + maxval = vals[0] + maxi = 0 + + # find maxval + for i in range(1, len(vals)): + if vals[i] > maxval: + maxval = vals[i] + maxi = i + + expsum = 1.0 + for i in xrange(len(vals)): + if i != maxi and vals[i] - maxval > SUM_LOG_THRESHOLD: + expsum += exp(vals[i] - maxval) + + return maxval + log(expsum) + + +def logsub(lna, lnb): + """ + subtracting numbers in log-space + + must have lna > lnb + """ + + diff = lna - lnb + if diff < 500: + diff2 = exp(diff) - 1.0 + if diff2 == 0.0: + return -util.INF + else: + return log(diff2) + lnb + else: + return lna + + +def logadd_sign(sa, lna, sb, lnb): + """Adding numbers in log-space""" + + if sa > 0 and sb > 0: + return 1, logadd(lna, lnb) + + elif sa == 0: + return sb, lnb + + elif sb == 0: + return sa, lna + + elif sa < 0 and sb < 0: + return -1, logadd(lna, lnb) + + elif sa > 0 and sb < 0: + if lna > lnb: + return 1, logsub(lna, lnb) + elif lna == lnb: + return 0, -util.INF + else: + return -1, logsub(lnb, lna) + + elif sa < 0 and sb > 0: + if lna > lnb: + return -1, logsub(lna, lnb) + elif lna == lnb: + return 0, -util.INF + else: + return 1, logsub(lnb, lna) + + else: + raise Exception("unhandled case") + + +def smooth(vals, radius): + """ + return an averaging of vals using a radius + + Note: not implemented as fast as possible + runtime: O(len(vals) * radius) + """ + + vals2 = [] + vlen = len(vals) + + for i in xrange(vlen): + radius2 = min(i, vlen - i - 1, radius) + vals2.append(mean(vals[i-radius2:i+radius2+1])) + + return vals2 + + + + +def iter_window_index(x, xdist, esp=None): + """ + iterates a sliding window over x with width 'xdist' + + returns an iterator over list of indices in x that represent windows + + x must be sorted least to greatest + """ + + vlen = len(x) + #if esp is None: + # esp = min(x[i+1] - x[i] for i in range(vlen-1) + # if x[i+1] - x[i] > 0) / 2.0 + + # simple case + if vlen == 0: + return + + start = x[0] + end = x[-1] + window = [0] + + low = start + high = start + xdist + lowi = 0 # inclusive + highi = 0 # inclusive + + # move up high boundary + while highi+1 < vlen and x[highi+1] < high: + highi += 1 + + yield (lowi, highi, low, high) + + while highi+1 < vlen: + low_step = x[lowi] - low # dist until expell + high_step = x[highi+1] - high # dist until include + + # advance though duplicates + if low_step == 0: + lowi += 1 + continue + + if high_step == 0: + highi += 1 + continue + + # determine new low high boundary + if low_step <= high_step: + low = x[lowi] #+ min(esp, (high_step - low_step) / 2.0) + high = low + xdist + lowi += 1 + + if high_step <= low_step: + highi += 1 + if highi >= vlen: break + high = x[highi] #+ min(esp, (low_step - high_step) / 2.0) + low = high - xdist + + assert abs((high - low) - xdist) < .001, (low, high) + + yield (lowi, highi, low, high) + + +def iter_window_index_step(x, size, step, minsize=0): + + vlen = len(x) + start = x[0] + end = x[-1] + + low = start + high = start + size + i = 1 + + lowi = 0 + highi = 0 + + # move up high boundary + while highi+1 < vlen and x[highi+1] < high: + highi += 1 + + while highi < vlen and high < end: + if highi - lowi >= minsize: + yield lowi, highi, low, high + low = start + i * step + high = low + size + i += 1 + + # move up low boundary + while lowi < vlen and x[lowi] < low: + lowi += 1 + + # move up high boundary + while highi+1 < vlen and x[highi+1] < high: + highi += 1 + + + +def iter_window(x, xdist, func=lambda win: win, minsize=0, key=lambda x: x): + """ + iterates a sliding window over x with radius xradius + + x must be sorted least to greatest + """ + + for lowi, highi, low, high in iter_window_index(map(key, x), xdist): + if highi - lowi >= minsize: + yield (high + low)/2.0, func(x[lowi:highi]) + + +def iter_window_step(x, width, step, func=lambda win: win, minsize=0): + """ + iterates a sliding window over x with width 'width' + + x must be sorted least to greatest + + return an iterator with (midx, func(x[lowi:highi])) + """ + + for lowi, highi, low, high in iter_window_index_step(x, width, step, minsize): + yield (high + low) / 2.0, func(x[lowi:highi]) + + + + + + +def smooth2(x, y, xradius, minsize=0, sort=False): + """ + return an averaging of x and y using xradius + + x must be sorted least to greatest + """ + + vlen = len(x) + assert vlen == len(y) + + # simple case + if vlen == 0: + return [], [] + + if sort: + x, y = util.sort_many(x, y) + + x2 = [] + y2 = [] + + start = min(x) + end = max(x) + xtot = x[0] + ytot = y[0] + + low = 0 + high = 0 + + for i in xrange(vlen): + xi = x[i] + + xradius2 = min(xi - start, end - xi, xradius) + + # move window + while x[low] < xi - xradius2: + xtot -= x[low] + ytot -= y[low] + low += 1 + while x[high] < xi + xradius2: + high += 1 + xtot += x[high] + ytot += y[high] + + denom = float(high - low + 1) + if denom >= minsize: + x2.append(xtot / denom) + y2.append(ytot / denom) + + return x2, y2 + + +def factorial(x, k=1): + """Simple implementation of factorial""" + + n = 1 + for i in xrange(int(k)+1, int(x)+1): + n *= i + return n + + +def logfactorial(x, k=1): + """returns the log(factorial(x) / factorial(k)""" + + n = 0 + for i in xrange(int(k)+1, int(x)+1): + n += log(i) + return n + + +def choose(n, k): + if n == 0 and k == 0: + return 1 + + if n < 0 or k < 0 or k > n: + return 0 + + # optimization for speed + if k > n/2: + k = n - k + + t = 1.0 + n2 = n + 1.0 + for i in xrange(1, k+1): + t *= (n2 - i) / i + return int(t + 0.5) + #return factorial(n, n - k) / factorial(k) + +def fchoose(n, k): + if n == 0 and k == 0: + return 1 + + if n < 0 or k < 0 or k > n: + return 0 + + # optimization for speed + if k > n/2: + k = n - k + + t = 1.0 + n2 = n + 1.0 + for i in xrange(1, k+1): + t *= (n2 - i) / i + return t + +def logchoose(n, k): + if n == 0 and k == 0: + return 0.0 + + if n < 0 or k < 0 or k > n: + return -util.INF + + # optimization for speed + if k > n/2: + k = n - k + + t = 0.0 + n2 = n + 1.0 + for i in xrange(1, k+1): + t += log((n2 - i) / i) + return t + + +def multinomial(vals): + n = sum(vals) + + res = logfactorial(n) + for v in vals: + res -= logfactorial(v) + return int(exp(res) + .05) + + +def logmultinomial(vals): + n = sum(vals) + + res = logfactorial(n) + for v in vals: + res -= logfactorial(v) + return res + + +def sample(weights): + """ + Randomly choose an int between 0 and len(probs)-1 using + the weights stored in list probs. + + item i will be chosen with probability weights[i]/sum(weights) + """ + + total = sum(weights) + pick = random.random() * total + x = 0 + for i in xrange(len(weights)): + x += weights[i] + if x >= pick: + return i + return len(weights) - 1 + + +def rhyper(m, n, M, N, report=0): + ''' + calculates cumulative probability based on + hypergeometric distribution + over/under/both (report = 0/1/2) + (uses R through RPy) + + N = total balls in urn + M = total white balls in urn + n = drawn balls from urn + m = drawn white balls from urn + + ''' + + from rpy import r + + + assert( (type(m) == type(n) == type(M) == type(N) == int) + and m <= n and m <= M and n <= N) + + + + if report == 0: + #p-val for over-repr. + return r.phyper(m-1, M, N-M, n, lower_tail=False) + elif report == 1: + #p-val for under-repr. + return r.phyper(m, M, N-M, n) + elif report == 2: + #tuple (over, under) + return r.phyper(m-1, M, N-M, n, lower_tail=False), r.phyper(m, M, N-M, n) + else: + raise "unknown option" + + +def cdf(vals): + """Computes the CDF of a list of values""" + + vals = sorted(vals) + tot = float(len(vals)) + x = [] + y = [] + + for i, x2 in enumerate(vals): + x.append(x2) + y.append(i / tot) + + return x, y + + +def enrichItems(in_items, out_items, M=None, N=None, useq=True, extra=False): + """Calculates enrichment for items within an in-set vs and out-set. + Returns a sorted table. + """ + # DEPRECATED + # TODO: remove this function + + + # count items + counts = util.Dict(default=[0, 0]) + for item in in_items: + counts[item][0] += 1 + for item in out_items: + counts[item][1] += 1 + + if N is None: + N = len(in_items) + len(out_items) + if M is None: + M = len(in_items) + + tab = tablelib.Table(headers=["item", "in_count", "out_count", + "pval", "pval_under"]) + + # do hypergeometric + for item, (a, b) in counts.iteritems(): + tab.add(item=item, + in_count=a, + out_count=b, + pval=rhyper(a, a+b, M, N), + pval_under=rhyper(a, a+b, M, N, 1)) + + # add qvalues + if useq: + qval = qvalues(tab.cget("pval")) + qval_under = qvalues(tab.cget("pval_under")) + + tab.add_col("qval", data=qval) + tab.add_col("qval_under", data=qval_under) + + if extra: + tab.add_col("in_size", data=[M]*len(tab)) + tab.add_col("out_size", data=[N-M]*len(tab)) + tab.add_col("item_ratio", data=[ + row["in_count"] / float(row["in_count"] + row["out_count"]) + for row in tab]) + tab.add_col("size_ratio", data=[ + M / float(N) for row in tab]) + tab.add_col("fold", data=[row["item_ratio"] / row["size_ratio"] + for row in tab]) + + tab.sort(col='pval') + return tab + + +def qvalues(pvals): + import rpy + ret = rpy.r.p_adjust(pvals, "fdr") + return ret + + + +#============================================================================= +# Distributions +# + +def uniformPdf(x, params): + a, b = params + if x < a or x > b: + return 0.0 + else: + return 1.0 / (b - a) + + +def binomialPdf(k, params): + p, n = params + return choose(n, k) * (p ** k) * ((1.0-p) ** (n - k)) + +def gaussianPdf(x, params): + return 1/sqrt(2*pi) * exp(- x**2 / 2.0) + +def normalPdf(x, params): + mu, sigma = params + # sqrt(2*pi) = 2.5066282746310002 + return exp(- (x - mu)**2 / (2.0 * sigma**2)) / (sigma * 2.5066282746310002) + +def normalCdf(x, params): + mu, sigma = params + return (1 + erf((x - mu)/(sigma * sqrt(2)))) / 2.0 + +def logNormalPdf(x, params): + """mu and sigma are the mean and standard deviation of the + variable's logarithm""" + + mu, sigma = params + return 1/(x * sigma * sqrt(2*pi)) * \ + exp(- (log(x) - mu)**2 / (2.0 * sigma**2)) + +def logNormalCdf(x, params): + """mu and sigma are the mean and standard deviation of the + variable's logarithm""" + + mu, sigma = params + return (1 + erf((log(x) - mu)/(sigma * sqrt(2)))) / 2.0 + + +def poissonPdf(x, params): + lambd = params[0] + + if x < 0 or lambd <= 0: + return 0.0 + + a = 0 + for i in xrange(1, int(x)+1): + a += log(lambd / float(i)) + return exp(-lambd + a) + + +def poissonCdf(x, params): + """Cumulative distribution function of the Poisson distribution""" + # NOTE: not implemented accurately for large x or lambd + lambd = params[0] + + if x < 0: + return 0 + else: + return (gamma(floor(x+1)) - gammainc(floor(x + 1), lambd)) / \ + factorial(floor(x)) + + +def poissonvariate(lambd): + """Sample from a Poisson distribution""" + l = -lambd + k = 0 + p = 0.0 + + while 1: + k += 1 + p += log(random.random()) + if p < l: + return k - 1 + + +def exponentialPdf(x, params): + lambd = params[0] + + if x < 0 or lambd < 0: + return 0.0 + else: + return lambd * exp(-lambd * x) + + +def exponentialCdf(x, params): + lambd = params[0] + + if x < 0 or lambd < 0: + return 0.0 + else: + return 1.0 - exp(-lambd * x) + + +def exponentialvariate(lambd): + return -log(random.random()) / lambd + +def gammaPdf(x, params): + alpha, beta = params + if x <= 0 or alpha <= 0 or beta <= 0: + return 0.0 + else: + return (exp(-x * beta) * (x ** (alpha - 1)) * (beta ** alpha)) / \ + gamma(alpha) + + +def loggammaPdf(x, params): + alpha, beta = params + if x <= 0.0 or alpha <= 0.0 or beta <= 0.0: + return -util.INF + else: + return -x*beta + (alpha - 1)*log(x) + alpha*log(beta) - gammaln(alpha) + +def gammaPdf2(x, params): + alpha, beta = params + if x <= 0 or alpha <= 0 or beta <= 0: + return 0.0 + else: + return exp(loggammaPdf(x, params)) + + +def gammaCdf(x, params): + alpha, beta = params + if x <= 0: + return 0 + else: + return gammainc(alpha, x * beta) / gamma(alpha) + +def invgammaPdf(x, params): + a, b = params + + if x <=0 or a <= 0 or b <= 0: + return 0.0 + else: + return (b**a) / gamma(a) * (1.0/x)**(a + 1) * exp(-b/x) + +def loginvgammaPdf(x, params): + a, b = params + if x < 0 or a < 0 or b < 0: + return -util.INF + else: + return a*log(b) - gammaln(a) + (a+1)*log(1.0/x) -b/x + + + + +def betaPdf2(x, params): + """A simpler implementation of beta distribution but will overflow + for values of alpha and beta near 100 + """ + + alpha, beta = params + if 0 < x < 1 and alpha > 0 and beta > 0: + return gamma(alpha + beta) / (gamma(alpha)*gamma(beta)) * \ + x ** (alpha-1) * (1-x)**(beta-1) + else: + return 0.0 + +def betaPdf(x, params): + alpha, beta = params + + if 0 < x < 1 and alpha > 0 and beta > 0: + return exp(gammaln(alpha + beta) - (gammaln(alpha) + gammaln(beta)) + \ + (alpha-1) * log(x) + (beta-1) * log(1-x)) + else: + return 0.0 + + + +def betaPdf3(x, params): + alpha, beta = map(int, params) + if 0 < x < 1 and alpha > 0 and beta > 0: + n = min(alpha-1, beta-1) + m = max(alpha-1, beta-1) + + prod1 = 1 + for i in range(1,n+1): + prod1 *= ((n+i)*x*(1-x))/i + + prod2 = 1 + if alpha > beta: + for i in range(n+1, m+1): + prod2 *= ((n+i)*x)/i + else: + for i in range(n+1, m+1): + prod2 *= ((n+i)*(1-x))/i + + return prod1 * prod2 * (alpha + beta - 1) + else: + return 0.0 + + +def negbinomPdf(k, r, p): + return exp(gammaln(r+k) - gammaln(k+1) - gammaln(r) + + r*log(p) + k * log(1-p)) + + + +def gamma(x): + """ + Lanczos approximation to the gamma function. + + found on http://www.rskey.org/gamma.htm + """ + + ret = 1.000000000190015 + \ + 76.18009172947146 / (x + 1) + \ + -86.50532032941677 / (x + 2) + \ + 24.01409824083091 / (x + 3) + \ + -1.231739572450155 / (x + 4) + \ + 1.208650973866179e-3 / (x + 5) + \ + -5.395239384953e-6 / (x + 6) + + return ret * sqrt(2*pi)/x * (x + 5.5)**(x+.5) * exp(-x-5.5) + + + +def gammaln(xx): + """ + From numerical alogrithms in C + + float gammln(float xx) + Returns the value ln[(xx)] for xx > 0. + { + Internal arithmetic will be done in double precision, a nicety that you can omit if five-figure + accuracy is good enough. + double x,y,tmp,ser; + static double cof[6]={76.18009172947146,-86.50532032941677, + 24.01409824083091,-1.231739572450155, + 0.1208650973866179e-2,-0.5395239384953e-5}; + int j; + y=x=xx; + tmp=x+5.5; + tmp -= (x+0.5)*log(tmp); + ser=1.000000000190015; + for (j=0;j<=5;j++) ser += cof[j]/++y; + return -tmp+log(2.5066282746310005*ser/x); + } + """ + + cof = [76.18009172947146,-86.50532032941677, + 24.01409824083091,-1.231739572450155, + 0.1208650973866179e-2,-0.5395239384953e-5] + + y = x = xx + tmp = x + 5.5 + tmp -= (x + 0.5) * log(tmp) + ser = 1.000000000190015 + + for j in range(6): + y += 1 + ser += cof[j] / y + + return - tmp + log(2.5066282746310005 * ser / x) + + + + +GAMMA_INCOMP_ACCURACY = 1000 +def gammainc(a, x): + """Lower incomplete gamma function""" + # found on http://www.rskey.org/gamma.htm + + ret = 0 + term = 1.0/x + for n in xrange(GAMMA_INCOMP_ACCURACY): + term *= x/(a+n) + ret += term + if term < .0001: + break + return x**a * exp(-x) * ret + + +def erf(x): + # http://www.theorie.physik.uni-muenchen.de/~serge/erf-approx.pdf + + a = 8/(3*pi) * (pi - 3)/(4 - pi) + axx = a * x * x + + if x >= 0: + return sqrt(1 - exp(-x*x * (4.0/pi + axx)/(1 + axx))) + else: + return - sqrt(1 - exp(-x*x * (4.0/pi + axx)/(1 + axx))) + + + +def chiSquare(rows, expected=None, nparams=0): + # ex: rows = [[1,2,3],[1,4,5]] + assert util.equal(map(len, rows)) + + if 0 in map(sum,rows): return 0,1.0 + cols = zip(* rows) + if 0 in map(sum,cols): return 0,1.0 + + if not expected: + expected = make_expected(rows) + + chisq = 0 + for obss,exps in zip(rows,expected): + for obs, exp in zip(obss, exps): + chisq += ((obs-exp)**2)/exp + + df = max(len(rows)-1, 1)*max(len(rows[0])-1, 1) - nparams + + p = chi_square_lookup(chisq,df) + + return chisq,p + + +def make_expected(rows): + rowtotals = map(sum, rows) + coltotals = map(sum, zip(* rows)) + grandtotal = float(sum(rowtotals)) + + expected = [] + for row,rowtotal in zip(rows,rowtotals): + expected_row = [] + for obs, coltotal in zip(row, coltotals): + exp = rowtotal * coltotal / grandtotal + expected_row.append(exp) + expected.append(expected_row) + return expected + + +def chiSquareFit(xbins, ybins, func, nsamples, nparams, minsamples=5): + sizes = [xbins[i+1] - xbins[i] for i in xrange(len(xbins)-1)] + sizes.append(sizes[-1]) # NOTE: assumes bins are of equal size + + # only focus on bins that are large enough + counts = [ybins[i] * sizes[i] * nsamples for i in xrange(len(xbins)-1)] + + expected = [] + for i in xrange(len(xbins)-1): + expected.append((func(xbins[i]) + func(xbins[i+1]))/2.0 * + sizes[i] * nsamples) + + # ensure we have enough expected samples in each bin + ind = util.find(util.gefunc(minsamples), expected) + counts = util.mget(counts, ind) + expected = util.mget(expected, ind) + + if len(counts) == 0: + return [0, 1], counts, expected + else: + return chiSquare([counts], [expected], nparams), counts, expected + + +chi_square_table = { + 1: [1.64, 2.71, 3.84, 5.02, 6.64, 10.83], + 2: [3.22, 4.61, 5.99, 7.38, 9.21, 13.82], + 3: [4.64, 6.25, 7.82, 9.35, 11.34, 16.27], + 4: [5.99, 7.78, 9.49, 11.14, 13.28, 18.47], + 5: [7.29, 9.24, 11.07, 12.83, 15.09, 20.52], + 6: [8.56, 10.64, 12.59, 14.45, 16.81, 22.46], + 7: [9.80, 12.02, 14.07, 16.01, 18.48, 24.32], + 8: [11.03, 13.36, 15.51, 17.53, 20.09, 26.12], + 9: [12.24, 14.68, 16.92, 19.02, 21.67, 27.88], + 10: [13.44, 15.99, 18.31, 20.48, 23.21, 29.59], + 11: [14.63, 17.28, 19.68, 21.92, 24.72, 31.26], + 12: [15.81, 18.55, 21.03, 23.34, 26.22, 32.91], + 13: [16.98, 19.81, 22.36, 24.74, 27.69, 34.53], + 14: [18.15, 21.06, 23.68, 26.12, 29.14, 36.12], + 15: [19.31, 22.31, 25.00, 27.49, 30.58, 37.70], + 16: [20.47, 23.54, 26.30, 28.85, 32.00, 39.25], + 17: [21.61, 24.77, 27.59, 30.19, 33.41, 40.79], + 18: [22.76, 25.99, 28.87, 31.53, 34.81, 42.31], + 19: [23.90, 27.20, 30.14, 32.85, 36.19, 43.82], + 20: [25.04, 28.41, 31.41, 34.17, 37.57, 45.31], + 21: [26.17, 29.62, 32.67, 35.48, 38.93, 46.80], + 22: [27.30, 30.81, 33.92, 36.78, 40.29, 48.27], + 23: [28.43, 32.01, 35.17, 38.08, 41.64, 49.73], + 24: [29.55, 33.20, 36.42, 39.36, 42.98, 51.18], + 25: [30.68, 34.38, 37.65, 40.65, 44.31, 52.62], + 26: [31.79, 35.56, 38.89, 41.92, 45.64, 54.05], + 27: [32.91, 36.74, 40.11, 43.19, 46.96, 55.48], + 28: [34.03, 37.92, 41.34, 44.46, 48.28, 56.89], + 29: [35.14, 39.09, 42.56, 45.72, 49.59, 58.30], + 30: [36.25, 40.26, 43.77, 46.98, 50.89, 59.70] +} + + +def chi_square_lookup(value, df): + + ps = [0.20, 0.10, 0.05, 0.025, 0.01, 0.001] + + if df <= 0: + return 1.0 + + row = chi_square_table[min(df, 30)] + + for i in range(0,len(row)): + if row[i] >= value: + i = i-1 + break + + if i == -1: return 1 + else: return ps[i] + + + +def spearman(vec1, vec2): + """Spearman's rank test""" + + assert len(vec1) == len(vec2), "vec1 and vec2 are not the same length" + + n = len(vec1) + rank1 = util.sortranks(vec1) + rank2 = util.sortranks(vec2) + + R = sum((vec1[i] - vec2[i])**2 for i in xrange(n)) + + Z = (6*R - n*(n*n - 1)) / (n*(n + 1) * sqrt(n - 1)) + + return Z + + + +# input: +# xdata, ydata - data to fit +# func - a function of the form f(x, params) +# +def fitCurve(xdata, ydata, func, paramsInit): + import scipy + import scipy.optimize + + y = scipy.array(ydata) + p0 = scipy.array(paramsInit) + + def error(params): + y2 = scipy.array(map(lambda x: func(x, params), xdata)) + return y - y2 + + params, msg = scipy.optimize.leastsq(error, p0) + + resid = error(params) + + return list(params), sum(resid*resid) + + +def fitDistrib(func, paramsInit, data, start, end, step, perc=1.0): + xdata, ydata = util.distrib(data, low=start, width=step) + ydata = [i / perc for i in ydata] + xdata = util.histbins(xdata) + params, resid = fitCurve(xdata, ydata, func, paramsInit) + return params, resid + + +def plotfuncFit(func, paramsInit, xdata, ydata, start, end, step, plot = None, + **options): + from rasmus import gnuplot + + if not plot: + plot = gnuplot.Gnuplot() + + options.setdefault('style', 'boxes') + + params, resid = fitCurve(xdata, ydata, func, paramsInit) + plot.plot(util.histbins(xdata), ydata, **options) + plot.plotfunc(lambda x: func(x, params), start, end, step) + + return plot, params, resid + + +def plotdistribFit(func, paramsInit, data, start, end, step, plot = None, + **options): + xdata, ydata = util.distrib(data, low=start, width=step) + return plotfuncFit(func, paramsInit, xdata, ydata, start, end, step/10, plot, + **options) + + + + +def chi_square_fit(cdf, params, data, ndivs=20, minsamples=5, plot=False, + start=-util.INF, end=util.INF): + + from rasmus import gnuplot + import scipy + import scipy.stats + + # determine ndiv and binsize + binsize = len(data) / ndivs + if binsize < minsamples: + ndivs = len(data) / minsamples + binsize = len(data) / ndivs + + data = sorted(data) + bins = [data[i:i+binsize] for i in xrange(0, len(data), binsize)] + obs = scipy.array(map(len, bins)) + ind = util.find(lambda x: x[-1] >= start and x[0] <= end, bins) + obs = util.mget(obs, ind) + + x = [bin[0] for bin in bins] + expected = [len(data) * cdf(x[1], params)] + expected.extend([len(data) * + (cdf(x[i+1], params) - cdf(x[i], params)) + for i in range(1, len(x)-1)]) + expected.append(len(data) * (1.0 - cdf(x[-1], params))) + expected = scipy.array(util.mget(expected, ind)) + + chi2, pval = scipy.stats.chisquare(obs, expected) + + if plot: + p = gnuplot.plot(util.mget(x, ind), obs) + p.plot(util.mget(x, ind), expected) + + return chi2, pval + + +def fit_distrib(cdf, params_init, data, ndivs=20, minsamples=5, + start=-util.INF, end=util.INF): + + import scipy + import scipy.optimize + import scipy.stats + + # determine ndiv and binsize + binsize = len(data) / ndivs + if binsize < minsamples: + ndivs = len(data) / minsamples + binsize = len(data) / ndivs + + data = sorted(data) + bins = [data[i:i+binsize] for i in xrange(0, len(data), binsize)] + obs = scipy.array(map(len, bins)) + ind = util.find(lambda x: x[-1] >= start and x[0] <= end, bins) + obs = util.mget(obs, ind) + + def optfunc(params): + x = [bin[0] for bin in bins] + expected = [len(data) * cdf(x[1], params)] + expected.extend([len(data) * + (cdf(x[i+1], params) - cdf(x[i], params)) + for i in range(1, len(x)-1)]) + expected.append(len(data) * (1.0 - cdf(x[-1], params))) + expected = scipy.array(util.mget(expected, ind)) + + chi2, pval = scipy.stats.chisquare(obs, expected) + return chi2 + + params = scipy.optimize.fmin(optfunc, params_init, disp=False) + chi2, pval = chi_square_fit(cdf, params, data, ndivs, minsamples) + + return list(params), pval + + + + + +def solveCubic(a, b, c, real=True): + """solves x^3 + ax^2 + bx + c = 0 for x""" + + p = b - a*a / 3.0 + q = c + (2*a*a*a - 9*a*b) / 27.0 + + # special case: avoids division by zero later on + if p == q == 0: + return [- a / 3.0] + + # + # u = (q/2 +- sqrt(q^2/4 + p^3/27))^(1/3) + # + + # complex math is used to find complex roots + sqrteqn = cmath.sqrt(q*q/4.0 + p*p*p/27.0) + + # find fist cube root + u1 = (q/2.0 + sqrteqn)**(1/3.0) + + # special case: avoids division by zero later on + if u1 == 0: + u1 = (q/2.0 - sqrteqn)**(1/3.0) + + # find other two cube roots + u2 = u1 * complex(-.5, -sqrt(3)/2) + u3 = u1 * complex(-.5, sqrt(3)/2) + + # finds roots of cubic polynomial + root1 = p / (3*u1) - u1 - a / 3.0 + root2 = p / (3*u2) - u2 - a / 3.0 + root3 = p / (3*u3) - u3 - a / 3.0 + + if real: + return [x.real + for x in [root1, root2, root3] + if abs(x.imag) < 1e-10] + else: + return [root1, root2, root3] + + +def _solveCubic_test(n=100): + + def test(a, b, c): + xs = solveCubic(a, b, c) + + for x in xs: + y = x**3 + a*x*x + b*x + c + assert abs(y) < 1e-4, y + + test(0, 0, 0) + test(0, 1, 1) + test(0, 0, 1) + + for i in xrange(n): + + a = random.normalvariate(10, 5) + b = random.normalvariate(10, 5) + c = random.normalvariate(10, 5) + + test(a, b, c) + + +def bisect_root(f, x0, x1, err=1e-7): + """Find a root of a function func(x) using the bisection method""" + f0 = f(x0) + f1 = f(x1) + + while (x1 - x0) / 2.0 > err: + x2 = (x0 + x1) / 2.0 + f2 = f(x2) + + if f0 * f2 > 0: + x0 = x2 + f0 = f2 + else: + x1 = x2 + f1 = f2 + + return (x0 + x1) / 2.0 + + + + + +#============================================================================= +# testing + +if __name__ == "__main__": + + + # iter_window + from rasmus import util + from rasmus import gnuplot + + vals = sorted([random.random() * 20 for x in range(600)]) + + vals += sorted([40 + random.random() * 20 for x in range(600)]) + + ''' + win = filter(lambda x: len(x) > 0, + list(iter_window_index(vals, 5))) + + p = util.plot(util.cget(win, 2))#, style="lines") + p.enableOutput(False) + p.plot(util.cget(win, 3)) #, style="lines") + + for i, y in enumerate(vals): + p.plot([i, len(vals)], [y, y], style="lines") + p.enableOutput(True) + p.replot() + ''' + + def mean2(v): + if len(v) == 0: + return 0.0 + else: + return mean(v) + + x, y = zip(* iter_window_step(vals, 5, 1, len)) + gnuplot.plot(x, y) + + + + +#============================================================================= +# OLD CODE + +''' +def smooth_old(x, radius): + """ + return an averaging of vals using a radius + + Note: not implemented as fast as possible + runtime: O(len(vals) * radius) + """ + + vlen = len(x) + + # simple case + if vlen == 0: + return [] + + x2 = [] + + tot = x[0] + + low = 0 + high = 0 + + for i in range(vlen): + xi = x[i] + + xradius2 = min(i, vlen - i - 1, xradius) + + # move window + while x[low] < xi - xradius2: + xtot -= x[low] + ytot -= y[low] + low += 1 + while x[high] < xi + xradius2: + high += 1 + xtot += x[high] + ytot += y[high] + + denom = float(high - low + 1) + x2.append(xtot / denom) + y2.append(ytot / denom) + + return x2, y2 +''' diff --git a/arghmm/deps/rasmus/stats.pyc b/arghmm/deps/rasmus/stats.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b61489a36f5d25fa8987c2f8e96428ac082855de GIT binary patch literal 46991 zcmdUY3xHhLS>Cy`@73zH+SRTkKSq*et*qDX>TOHb%Da~BSeEU(j+MyXETcKItC4nR zcIVDYBgZlnJFyK3DUk4fBm@d1ZGa>|Xd7q(fdZi^g+fzW9x0`P1e%0bp*;G1-+%9& zSxF8F8mFza_sqHHeg5i8s}HZv)1|5^7J~tMxJ%f zua&3I`CfVYonI%9GH_3CG z^S8)zyYsiolXdOJOO0Qwwr{$9o2>-=$fjypdl&zSQkH|ee&Gr{J1>h z&QHiQ;r#vb-0%EJc}_b2fIJU4|DZe%I{%P74>|uDd0ylE!}2`r{3&@(Isb?}k2rr? zp3}}hD$k?NKPJy(&Oa{Ceu;s+7&L(c!O^Y3x~z0Uu0=l_Kw-iL@EasEf0|1sy^ z@B9ax|DYm%91%a^{7*XnA-2c=l=J^m5&sGi|JwNvJO9(p|BUnh#`%vZ;-iT8S?7Pw z`HwmO^UnW*^Z!;6zlexma{iZ{|G4wN;{3mJ{u7G$_lWpa=l_HAzvld}JO3Y@{|!a_ zCL;cm^PhD7KRf?h&i@zZ|EnT?8xgydd(MB#`Tyqp?=G5cm6M&|4Zlpm-Bz6h`&a}|91Xw zoc~+r|IYcp2Zs-cK0$EMX9C|6_}0L;1->0@SrMIx=n8yy;8z5`C-5r+ze*9S5%Dj; zYXiSF@V$Xw7x+F!^dn+@;0FRf82Al=-x&B!ins+4w+8;Uz~3JDY~VKsev2ZuBI1s~ zZwvgLf!`kZp}_A@#9hI`vu^-T3&?qCf58<% zTt|Wn8CU*wo7=9P>+TLX_szMpb1o>%yIJR2&z4*Pba+qjeBdUg2He>NS3ppkE4I1k z9h=nGyXZTfDmf%LyXXq|i*r^Qb8b>jQ1H=US>}P>#qa5Os>0ldklh~>xFT1my21!c zBTE@|g(dfVt9y%cPh&n>aAz;M!kjB!b_F!&l6yYmf>MA!wYtfndjm)WS%?MQ-Pvq* zygWZ&E@h)?u3C($#e9^VE1s+5Dp$I@i+_)QLrCqabnY^5E4;;LiVLUfuS?b9oV`KL zaMj$oxdIBwGZUW{JmYwxJ~&mtByd22XPuisr|>s9lws@+CM(Y`EL96pwtBvhU8t1( zQofok&t!9GRU}_&Y5rWH5~*-zW8o2>+u_s(S?ttWcQs%|&#L$*gT;1t#yix**2L{* zzPa+Os;;u2J;Ul{aR`1VCprq>-uY5>@44da-drVrzIdsy7p=Nb$X6o-%$}d0Pkwx1 zA=+EXMe|G1UIH`Pvv4KqLIFo-3#CGBp>ppaULw}iOvO@ z#`Q_vE+fdXYChu(-v$R`0+<>6CZ^W7s~JM?*^E1a;Kh~h4Bq}R;{+1nzt0`FjHNCn9Ru4AW- zs@e9rLaC~9%av-u4>2fQ3CD<|P!f=dkP#B_rJ2~sisuV#{w~&F_}CNlWYz|ojSe~N zY?Ht{3C}Wo93zB_fTMt1Y~h45F|~s{9vFJH)jf;9z+Gr{3q+86Uxy%np43ZD6Bh&?gLvOm@Gg20a7 zi-jvy1|`1$ax-B}K2O zKZq8Wa+QM57Alo;CAt;CoYSH#0<)!ZRa0J+n=fR6v$NIn7)2c`084=wsae} zCG$!!Jh~e%DHLCZKX5OC=Ly_zZWIRqcL3Z*2M`?u#KCBVRV36Wa}^L&z94>17%^ra zo@g_i+D7J_S|?Hn+)K_4Vz}4EGC(qi3AU#V4Ov9UR-!@3Z|s=fCB-*c2zV3)$3lQ{ ze+@7PG9-S6J#;wUYKI8Qn#mT0cfz^p230#z%B2{Jr&uD};4(e+K_f`V86yl6Xly(% z7A8drttYFwlFwCq))y`nAwQN&#&}s?v{qk-@ybb{mpf(9wSmcF-k7+38`_i8ZRek0!C>63~7lU;yxjzZi5h;EJg#VWhC*8qD|n>Bla_@iJ)4pHk}W# zwfVHQug6;~&|tR3EU*p|;1)w~HdB3Lnmn93WP_C715R2xOiUe*IXwW-3Wx#9Nz83A zd=2WF1`N_zZ1pZ4a?iEm=T^=IF{Y8K>k!R8lB-sVHA6Nrc#gODxS)jINFdV7e6A{p zM#X0e;TEJ50Asd3Iwj6^!T1Csn}C^|>Cpnzi#8HU#@fLnI{t5ik@M+zxl$?2iQ`-c z$8Z!01e!cj35MZ*1ce9aq`(U)FocKTG!t%yhn0!}u87hb0Y_ROb4nT1LwGU#BUWh< z(n!ZdE#YAw_%7EZqW23et_)o}h3Xc{(V*H%$kp*$q#s2-R>)PNa%p>%Z2%M-lPoQbd->V>E{ zTbe26DN9feCblFvb#JzkyXt!zUw^pcG>|#NP zM|v#vu#9O4;CsW8kIIS1mvSX&kqWyZB+HXsm@8KsG*Il!Y-vfM@H!31=^+xFL|{|B zg!kb+B#RImFv*#i1jg#i^HnbWN@bAeoMyIa!PqL+ej!_-H&!vk>h$Jw)m-=h-c+=a z0)j@3lWa?pvCt+`hq|=5$Ws3as<4Gk%Np?CBSCj&Ww180CJ;7?ToS%QvWckr0Ij@u zL)m&oti$1iGkHE&Dv3*Aja(@&T**@)Vp+s+Nz`PIwP8{mR-Ig2Vw9cMo9i{?-GvZt>DhPx(E#WTLyS*J{OSX#jZW6Rj zP-RDp;{ zJbWtap^UCTw~X0qrd-KZFP9M%AT?57-3n)o-XSx7nPjK1J`FWdLO#U6wxhkPF&0tFBY=J5_$zO3O&f? zsbi|ZXA>|ISuZrObEP@WuNRxy6Ov|54Vf(+*oNeR52Yd)X z6LbM1VZv7<{=VE52wu>f-4GYFdqqe90kXSy<7ep9js!X-gd@y0izwZ38qY42i>0c8 z2k{rLO|y`zK>Y?xiy`=T$@Z7cTO9WA)dLR+n|G8`%d><6JOZgHDC!*z%0_1mQ! z(9m4%kVFRou^s4*XApH$*AJyB(EDnqEA6mVNt~pEV!9SBR8uFJ(5il7quhtK_&V zsjqe=ikCUH8hl0_|EYY=a zUj_N6hya6L4;hfS}5YWuyGM3&Gw`;+gZ)HBsOl|ir~-@jgRqd61F6A zjg3U?kgA&SES+gOUj;{Sl~$}i4$XuxS8Xwb)#8uiJz^kt4tg>Ka=eHlNcoWRBeoO( z1=7C&4*&(wfiP0p%+{X<@0XSIMH9g@?#&v9ScU_QVI>8u=jweB1Pkhj*X~A5Dtd$}jgFdFdM)Nvw(N$w~AjDR~a>ks}MH+=7ljXf)`8?6oo0f{0)}Pxx}^J-qI7? zlEDTUIY=j-l!F|_3ve5_Ig4~uBXi%eUpxaZKwWZk@QdEXweGn#hpBTeBY$vG&bOm=1^UF)Z6ZH_M9L8Wk(;w^E_iHl7(24VVy6Ts z=FLIOmTlVVY9&`0Kyh~?isd6bmV>qcloDzhnJ$zW7s(Q8kc`YQh8uoL#;Bp%j_k2) zFj-NZD#RtAeTcLb(dGLRpGLEX@c$Tcu`jH=Q}qgeNLXgEpu@O2`tVC5Mr=DY3a;##Yr^B`=RW*=l zy_yzfhl+a&d$J3KN*-Jh>Y~UbKBGnAvFU!O-q8MZ0TN)LlB*U>2oRxCgb*@@uc7ZG z9ct-9>g$A{my8(k+5|des|cDuVrfK2x1u9hqlt!F)LohOOh+(i^Eh!Y#lIiG%XRqI z>Pj8J4@b-+7D~)Bgo?$nI}xot9jHPfwQx#rb5tNhNP`b^&*1aetd{)+UMRusTAl?H za6Q)zGD0~;MId&`QD|zo@ylrL16Y%4_BV{mRkIMf)4|Gar+<4YxC^af6im#!47G$5 zh+%prAa2mlAy48e<)xA@mS!0zf7X{TW34cg-93U;QyjLmpheP1l1qpgmzW}_0@a#2 zN=o%;!V9@sMC+}zN*H-niO8yG_sD6JWu$f_>Y23!*=c?b>SJ+w2#S&+^M&G^Q1kv; zzObM@Q;xWqP7qhrPcqD;Hp9@(OhhzoCT?;fV0}hRZRlvkPPi^$D1{;5=bdRex3ifm%w#L>)N6W$(7u29*;9&`S6rZwl(%tt+>&IMgV9|qY;t+g&VSKgrcfW#*= z4=(dT3rIEqnt~1vL>0?(RK=d~Leki5Q4G60yE|IQ0TKQT)dPn7_qpRSTnJ z6<~|2-6s(lj7M<*3+3afo#a*oWviANoD%!Q#27%>Vlx3NPp~08XTV#)&}y&av|2BR ztCk6sgQCE;Ffos$Wnd(@xy^!aL7>g5X??UHIfNn?tEiwVqbM>)ErpAp9U{UF#T5AJ~kD@lw#={ zK~0$OIkcc9bnqw#fjrxFke1fq>B{ulRL?f?5!h=;h&FQu2MU|E$0{)f_}gnUV`Ms7obmp!qq-rg`jKoiZBx6@p9lII6BpXWXcdWO+Jm~QQ)MQ@1Xq*AlblX&J+Q-GxbeeK(YzZ z*TE6B$EyKog24$YxmofITZTm0rHF4itPdzKD7l=|*;N*~fjvNJiz zZunim7EKW+6|9COEGOoX;n5*jUk*_g+>yO2F0^rt!O5aN6KGDJhiFG|om zvpUg2gIn|AW;mJvE}?Xr06Gw4ZEiaAiRR1Y>iNZ#`E({~t&PE58L8sgGBTe$p6Fvp z*f_2jK9VCS7~C5qE~y3~8*CB>5pTeNB48FW0G7F!uA$7um|`HI*+s!GRq2wT2MeT9e1}JwKxC((vb?9ohTRiK6Gf?>=ggW9*H`D5#3~INq=Zsc@QmW?1>cz&IPt6(xmUF2V}pa<&XAf;K=FE|-fQ zb}rj%+%k-pW=3wvht-tIz-mfal0_OyDdtfyv;;LQ_pr>X6c(V5r>4@XXduK|Hd!sj z%l^3nDGL|t4O zXckxdMUI7NA$)5RjFUK(ss)$4sMOl0AZ!wgD9;yX&&z{=qDo^(QCd3$O-EHM)Vfe4 zgZ0)-hRvQ4%+uDgX#lrSdlQNGLBzPOp7xbNTc#VL8&m}pW2N*kC`6=e{ZKZ32yDGCYBk)SoM@@H1`;RT7ebBJP1kyKs6zxIaQt-h|2pkMO zA#f7=3XwU8jT;E9jbv_NjbyHnL&&mtKajTe2@+|X=OdsTkU+Z%_#j>o(~TL#2&i9l z#jrY45J90?U>4Xz8n})=d1$B0)pm=QSi^2dF-C$Yr-An{DR|GP*eLLIBhI^RO)Tj_A3 z326Zlemk6I;vrR2*WsbC{pW~`T<_CH9EfOW2sc5*7Pk>=i+-RYVu75u1*?Ukd3HK^ z#^INZ8U|pp-f*-3A!P&q>%lj#|L#PQN1=W>=X3Y|0egUV+jIC*TBHGLVspZGq4s(+ zC`OQ@a5(dLqVsTSInKkJ3PgyIkU?>9C#D{7Pis*OY6O;Z_7GVn#Bqm_PKTo~)z8%c za2$lu?kHG6^oD5`v=(@5o*=X%VE>mhPnfKd#YvOpa@Lo~-XT8fUwk$z{4TtS3D4&i zh6o9xh;L*txzo&2DIb0h!zr29F}_X;O&pvmh2{p_*EkV>T*n623)KFS5jlJ(GBvSf zLYpt7*n1t?aCuvMC>Gy=6UrOVic}qbFTtOpljd?YDg186 zzMGDaD&?TB4nD0xMGg2QE~-%^y}PdUK&m{)lFC;8j^l}b5}YFik_}j98RQBL^yOuda2^;?KkkZl1uV8I*#| zYh6F$14s%CKx5*7#rK1kz8@=90HH&Wmy8;n!zf;Z&CAxSHAvJxdnI17{$n|*2^Pe5 zLv+fIl8EbPG%09L&*w^4!mmeU%w}tLR!7puTbNth72BA5j;T7}#BDMfImswV(p-(# z>?qHb(Yjb7MqR^k9(k_tT+ebrMLLd}3DR-Vqv25$)LE_UD{>g(Vf`{5rrbls$g|MACEk*%TOV*7Fur!ZPJ8rHsT{1W#J?R z92P&2Ox7Y$1P*ZK77g)5^ES@WgAl=!TX2pZr*iLVRBvoSS(8#uWK$J|**ikFF$2|T z83!HbOnOAeoQ)sF7}I5H=xDX^xpi>9qOhz6M108EkYfGWA+Tx|-R2azKN&OOV4 z14^p{hh#^gW_=7CE^wtXrcqi7BFQK_QDU+hrBM+STSIA^xXrz{4yd7vI~quX+3Nsn z&;Y;6P$PMvm1!f7UkkWC+i4=#=Oe885In|o$RG9SHXbYEo@Aq@dAA&xux0U zQ(Xhg`#1#4v3}}lsa-+w)HT=?06ho*&HA91;ik=Q4i>l}YfT1cP+Kh;83+{A)%6R5 zzX8^P5;LHnAonz-L%iA{Q-NPJ~Sy#zYmH3{}~#vnq)tVy)=b2 zc}F@RW*)_HFnzTQAJQQRKY)m)4*+MDnY>2e?3X+O%_a_RQVAm1q%x9uWdhMbyaoc< zN8AL$lR${}Lww%;<@GMjY`7TuUIv-idAv8wMg1hJEG6 zh}<50%SH*80xqYMu4Qy`i>-&)z;u)z!Ao6u#kLQageFE#mA8?E8W=v}#PAQ3;X~yQ z=n{;!yU4jSIX`rI_B+0v|9<$mO?kwm@cneiM6}Ik5i9vhT$Zo{Rb;q;mu7MiE)nOK z=CEH`p2xI|md8e;u;jt1Sg;Rpb}~SMg^xJ?fo@<4u2A9n%-eT z6ByCZ=QqD%pNR%(pXc#%O`n0~&ub3q1KcPgVAi=?`}-JniA)&siK#n9Qwjn>L4t=@ zcoD165jYtv9st55*)U(>>6$!B9ggp@gSVrbk?%wS~gX z9*Yv)TL`Q-m`2>eLU<($)@B)3%D7BW-kwM^+-#vJ1hm3wIK0H75Yh&=R&KSmGPZk_ zzq}2pbuAn6K=k3l)Ub~uCV0O?3G26GPA;+@RP_S4!XseO#L_@k)E;+6I z+M;Z4yhG-6otVJMi=#onQFxZm&E=PH)*bsT`6W;zmIyG1Xhbm{oWwyPI~sKU3Jkht z3uPQ+sTA|ePJ*d8ZshERl2iaz*llG)U&)ZmGXQx$vvM&Hs)TI zM77k91%xhAg(3CnUUCDkIdPOKFPbP0Gv%L z++NaN(Sm>}iaUuN;eu`JlE2wpL*D!dW;$?ar8iKj$YxVwMNN4ZDX|arxm9agP6vBh zS7x>aD>Iw$oTwXWrJT5ogmq2~|MELRupWR^0**`<92u90Bz$#Z>Mj{FOY01L<(Ycv zdjgOQU+@Q2Gq`qut1h@_k8t9#MCp(}N@_y#$Qf>$P_+D84PqPn@rc`f z0Eq!%%x}i?!1iXr&RhT`Tf=p7&^bw0v;px@Dr^8pI@EHMq7M^s>cgFL^+YX=i6#JE z?8vWk7dpl9(lMe(%#@2b?S)}(wPf5jPKFZW%Sh;Ga3dwS6F8lR0dEKIXvXc#vlv!1 zx6{=Y*+`u8f1a{hUc=uZx64tLhFdj}2SPAhbOt+sXfXE8p*w_j47xu2bz=w6*f;9w zx={pr{yb}J3l@JJ9Wyf@Y_76+6$X^TA5?$!z%GU`5WDP124y5tLLIjPfiMHGASeo@ zE@V)=F<8!|vCt197J~x{0AM;<9BiV~_*IHW4CIMdO0$}2Kq{?Jfue+osqNBy;sv>J zYO0FnnU6mmZUg1m{@%s^6+E}Hv88L$a-=8arJ>ZI;ss(b%FyJ8d}EpCRyXEbo90_% z_2YU5FU;g9KBG<( zc@%oSki$$ANrA`;wdY$?#-GR%622=!|KVe}e=xvoL2nqn8`3baRClj+6Y@k;EEtS09THUyhnIpCc-TIB#P`px$*I& z9smHdhINqDjpK>#fdl^kv^v9;l1aj`QV=hYdcj^3r6Br^5c0SFNdPA9I@p>0i#^Fb ztpug!Ta$x$FgmIuQ4{+r!i$dpJmzQm*lU8^VSuTlOZ%M;?g%0q`%M8yy~nVkdjP{a zT`NG7aD@-9>obGG)AddgWx@}_v4xiuV1LLkvIPlu&Fmv-;9RUYKqqfuC)po%QXNbN zR|6YkFZzWsgUH6SJN>qVLcqEem_jLdp;Ij5v@}SKT30t_ILsa(8xW#sniBd=h+MTC)NM?CDAO*Vdrzr>~$9^uoG(N^VWtztz#_(M)jHk zY-6v{K_aeo9G%2Cqvsq}_IiM0oOGGEdd>I}3SwBe>Lg^B42_97q0snG==-0Hz+=wM zTh!G1JML$QW6P141C;jnVZ2<^Uv?HlxeTxSh}h@};yBnuf*8d)@bC&8F{(#TpwRgx z$sZa_U7bpmjqY2fSOR;$Yi^|EvV%#qaDnunAk&+JiYUWr``d4Nic~lBgjyOOHLe&u^Nfa02*;PtRDxEmHLXP0R z8&}ZU_5|&wfQR3<3$n3l6GH$jC&udx0*PI>Zaf5~ruGw7V4O?=ZVV5tNEu$ND`YdT z+1oVvkxjNGb;=3E`D6uW(>J@8nS$!9GYKogWbMcBV$*Lr1mT}E3{SK8 za0ASN#t#za48nmuNYQPXyXyN8gt(wR2@kMi1%zRus`vmA_6V^N@QUw)F`$DTJ}-Wp zDB$h%kKH+m#Dh@6ucs;@KGU@Yg-`05#H@66x$G;#GMTy)pmHKJHZTF7u>pb$0}2ip zN-ksHnh8FxubV>u$OKp>J91;{aYZh*Ghh&QePqo#x_ zkkXNymr>%*O5kQ+gXk^XO5xEU!LYu%nAUXcVfS(J5SO~+$V+YxRGVY>iNy)k-ek9d zBFnoyXx+X6L%T*I7X$ms_8%04v!vJbpV-HT=sAuj>W6a;ad1o_qcK^-(cMsZ%)q7( z9Mz-^0~h0N#KyCot}Yg&*h(C06bsN~kUuv_xOI}uVnSX!^wTh?cmKhhsKqI&~J84Nu+NWxiZ_@Ae4NwLU^kV8j+g!3j zjVB$0lz8N0h!vxh)<{zjE<9tS8(0N#HVoi2lU;K03RzTD0)}5r7#g!64C4!XmqGAR z#K{*c7 z2*ASQ3=lO2jNRZQF~jOpDaEjB3~bUlrGfd7?=+Z;)MYczhyt8ulT5L6DM6HXGysl9 zXLZ1__2YP=OK@%gI0!0rMjfv`j8Q|%SjKc3SxcR>5O%b^#E${f_>lRpo0SEx6nZye zK4HrZ2<=q?A*B>>odzg=cLNk(f$@C(HBiJ{2W){I0ib2K6vAhA1g}r$Kj=tL;l;p; zm*dzg!(fO}bK#L9Bt6Ev`$?|G-l1z0^-|YJb`M=+09ZHNc(8~@oxsJMJcV4jfu%1e zyk(1yEbeVJvl8yr^D5g!cfosJZ4*J8shl&V3{ep!-1U_m86LJBa#U(Eb}C`U_i^1| z27dKe2OEYtWAq2pv1{uI4rh+nDIi^LfA^O@b{8AR?hTD&_qfK+shA$CP7^>kL zpykK4Q?rd83B`s}gaRiXO)AVezcd)Srhv8O9#F!GJZ`8ZdR3Tn2da!A`~dfOvt#1cWBnr<1LNFG44GVZ#tWfzSxjxD_ucovgvt(}0zMGRA() zU%Zz}k_{o;5R6@CL}FL`2sf?>Wx1vuxg**Ah6)6y*Z|9ftorZ{%j0&p_zs;;;@LOk zwY`m_E~qW(#EF#!69p7(Mc$$!k-A=_71ik$Kkd$xjv=p_61SrUJ^e$kczq!>&H|BIpu8GiVEbY#Fth86x0ol!XTgG;8wO_9b7rDa#g z0$}^&M`Qyb*_p4S&78hC>Bo4r@281acObxF6Y6jn15 zQ<$^{_cE3g(M-rt7A_TLwU-0F_8$oxvIXAkokt_IU}V5Jp6Gcv=zyUQCWV21e)$d^ zqY*LuKq2^n@9+cJ;O~+j#uo6ekRSE{@UNubHZlQC+NuE{1PUTK8R5g(BpO%&P)-uM zu0WXJHaSGiMsyL5riN4a;Keg#?BXu~acaf*iftUnr6Y(wMcPu=tR?`%0dhU03GtKahRUwP~PAOF*zfqfC){!{N~H$=ZOc^pxuxLw1fFH;IRu;{dxTFop16T=T;k7N_v(e%_1Vy9$9 zQmf(CreU)Kd86tBhGZY&t;r?Urm70~?yN~XxKqzVGex6YUtYpC4GbL%`64fR-j&T| zOSpCdHnqBP;!1fbn}=Nr%(1Jy^=}4dk-KM#vrCoqiok3xpI^cyQ&+OZC_Bpw)A11- z*fXY9D@i4_T~~Hh=NES2y4HKrMq)3C?TXuxFV8%6aO&6_4;|bya(H<7$jG75Bl{1H z>_50`_u+$k4h$dIH#)ozK_dqb9om(a!Omt!_wN}V**|>b@aX>G!~2kR*Y1%$qx(h< z?K^Vd(CGdH!y^X{T$5#Z&&cTT;e!W;j~v=}_~5~jLq`g`M|bTW-g5wXM)w`rcX^duwt1R|UkBeat4DV>V$LDZfZrMam_v zXAmhr5(6<_)Sm1fbB(R}qkb_pm?+LX)*#BjSf@OrBa&ti!4PyoF=%hIKOrR<@=wTH zbWmx4Rxmxf{$o&y^Xo@fpZSdQuUF`xER$A4cu1B-F&PfYB^ zgTQ45AYDmb1AmBj0l==fVuDKMF?_%*z{9Wqf@Q7a)PidrHS^%g4U>uW<>gZ}x&_(t zLfAQef@1kh(x~jloUnWRmd||Z;KrD3^SDcu8rBC+Jo3ni>4#2_Kl;dH(C@UkH3v3l@Z z>}lhT5+MKy?_j^p+yEfwYaW5X4<-bjYM==`*g9r(o3)t$f&sEo@iEXxgG>C9b8|Mw z1$$pc>J_Ft771tQpqmrgfC*#f9AA`V108Nn>+HErim(=RW zy5kqpf$sHWSP|m*K%A@{$$;OwbqNaq?SWzJXhlh?!d=gkGz>=sQX${O)P1tQ(sj=@ zoDxGEs@6xZI7dH?8Q!kz&>{p2hC!gjI{ade#^)E`>&`5`4I95Op|$Q{Y9tGx1V?NlS;mJXN%qJcEh;hYQ@1$YoT-ZW=_U*Z2OT6e({zEK>u1z& zA2`@YEZu?-m&s<M=!!?DxS$#4v=M+m)hIS2_SQyG#Zorr<^c zAq(mV7E6*88Gzf{8E{Gg$<~%Q@Askg_MN2xuC;pjiY7Y^WVn%(Nl;-lbuc8-8|+%+ zH{#MPF2VMC*La7YLYVf>lT5IE)&`eu$)BcCYL?}aZzUz!Tz~+R=5N}hDdZ`e*Kk-j zvQQYE_6NQ3VA_zb!fgO+1Fb=1{m(@W$5k8&L&4E$g?gesw4hYu)%r}d8TEr2ECKa`sgSY7?L>f#&>CwHa{Q=H-vL`de*g>6jd@{; z5A5wxAW#h`))R9jCPRqX<|7RZ`;lJD7*2sWJ=F*9id7B{qo4)Ixx&prff|4z;0?z3 z2%rn52qu}vV>(KXI%5=^js&6O`Zp%-W?vo9Sz56y;;S(<3^=|35?=^6Tj1?7;xF^C z$_{zszLrR0J~gcMjSXF)PglqYB}?&#Aw%9FEh|>u-DMw(lxcNcK|{Op$i6rFyKDoS zDei?PG;t;s{iMw5WU=~sBK9q)%mpWcwJj%r*aHw_+Ji$an;^{eX4c^r(Ai_%zxnQk zV}E^o_I(fh-ZOpgYa5&W&~0z}%;O(gH8$I)puVx$w|(@_hn{}h#n)W0G*JtwU&say*l? zV=Ho4H+IC8?b(wwdjHt0)w4M%9$i&~_8{>L|NX(PxWZ2i$9-NpIA&T2JFC2PY*rBM z9*R4@6*&a1402doM#pADK^dx#4gkWsuo^8h*rPA%Zrk3t9WRcI&2pH!2ht|qJtmPZ zE)kUx{6kOgj^X|pRAy!Di$mUfU~E><{=Iv~@HI}qURtm@ExsLVzI&9iY!D*Fs7v%VaTdw<{YF&y1NRzdiCP}#419RJ>e%2Y-N za;S@EP?-Ve?idBDV`D{54skotGqv**sLVQjaLmy7!>CLM`J?y5z#l&`hSM*|s=>Px zl_~3LRAvoEWkS!b$C9Y~#){m1ieZ5|3@E4rq4;+suIwu65QV(`-Z49;{z+tAjTF}W z8lWdk_&U^~zMevdge0FlniMt`d)_%dR-~TJI`my=o(At@=#YT&t*A^r8%AZS^@GT& zG=+(=A{ksvlL;*T7VQ+Me;btv!Y>|=J+mib&-VMriZm;UQ5)?P8SYDGJMXnTMc%q$#323MBA7s^V=24lTvuz^Azby}r6?w;@je800 z6#56qp{(yfW!4u|ChGiEWK}!p5;Y^2 zEyu~pl8Y^V@pz0%Kb3fXIPrY@@py<{IG)~xE4e0SLs-(W|60=7foO^~RWgx=^_Gs| z=^8NUw(JH{f=kkk6t>}JN-zZDLL{lY*%!cs8&JVFQ5ca*D>EoF)pb}x*(V9QaKN_B zEU`Elav-Uvh~5JS!}F3BLiA`s4!ADxQUFQCU&}99CrP+^($#mC10xx~+Cm;8YpZ(^ zUq;rIm_Uy=3knz;jPL|U8Ebs(AY#y|F2|7Rua67wKHU}oq=3S#kIQ^{wZGC2=^sn} z*cP@AZJy)@r*Qomju_~hNW9}%XVNYe@*`OuQ-EhwhZjm^+=L9xIRr=CZdEAFR?p`F ztaKk~AK|bAjvXeR+QiKRbPssE0pQ@zp#S3EcjLg;zt5|czJy)2A@Phz487^))beko zy^kZ#+Pk;{h%wc^lp}duDg?%BvB*2?2(8GLnG)OCD$8&PKnV}h;nXbLW#W<4Hwt_} ztlYEejcxxZ4|Csi=Opz<@zDMN`^KiYmaVw{Wi>W{wq^Ebbip!3E8?+m_-*iz8R;nE z%c(zb@M8eE@)P2{+>*SC;eeeOroNbQFXCi9n2hih_{m&$1|O?0X^Ni4vPu`j#>yw4 zv0dm0MHI9QKu@hQxnZay`fQjTdQ5SR9D%cWvaUpEZ5<{oRO==6^`Y#fIK|^Uk4^kFa^H*5N#>Vfb-W zQ6)yH5qo%1#XQ!HA@5TW>Lnpkl<5xjSS!X`*+n#FKQP7&?C_Pe4_cm@YVDy?v1+`2 zIAlv?X`sdEd^Bs+gm3P~%Tl&5WWz!hsxcBcZW;*c5Wc}VzJ$*s{!0Rb(-x0I_GCy) zgRPq~J(*dCaSu69aw10YcN|aj3>@%%On&6VaG}!$DNz}bAvs9viu|p-&Xi~M*GfZ* z7bpA>5$SKDn>B0=cSEI zt{%X^;7@#tcTixsFjg;_23^2?8AV`Fppw)TJXjAS4OkVDVHnPf{ zRT)BGyCWx3&0!tkRJ?GvDSS|OZYPa|vg+4UE|6IxmHHZ7YjX?Cj70qGG@pm@MC|Ps zBYtVT!Q?RcfP_iQpnG7GFb3Gz4-NpFdMhu&K&|d{osy;8Fk=$+_hCZ!2@)M^Zvf+s zNLQmU$s8$=n@Ovj90;(Oy#QvvPnZ$gU1oD|yU{8|Y>HPcc%moJiDk6fshunjX#`AC zA90{xA9OI{G{WmAMdHssntyfw!l!CP=xt^HR4#G0Zg&aOh)(cfHE}Cgf47G0y zm@zIFg-wbsWPzTTf4DeibMN9ex#z%XaiI$M2v{+gGpfPeDJFERqM(QYdNCE@Tg3>J zfHzMg7G>%x6=cEPC~5`nM_FkT0B(843$9VY7i@YL+XGyZa<#XiD?k_AlEUwygYjc~ z8(Qb9>)f;J9F$Kgs?TAU3kle)Uh*jT2a$4O>WF%T_!KTG3|&JKX#G_n@v)cPjo$~M zv-iPseJm+pHRxfI-vdRhJrMCN`yd54e#nNfvA&{pxFW{D13v;+JE)NegLMq&REW>^ zt|zN3-;2!HYgn6PMpKj-sgd;RfO~e}MzJc$uGHyw&#rIWVC2^=>MRyABbJ^(f5(U9 zBT0~wJh}or+W;QTRkPhl3%ukK25WH>BJI;JNE}aaETln76r&=`5=q^(1R^Q%Z%IF_ z5pqPLgn+nVH)E&VR}zrem5FAw3ySRC7wXRJBre4?+a49)*umGGa6kdv)T;8!P_*&X zu30=8p%^WFw86QQs}#h8he*NU&(o1)OirT=_~_21DBR1EA7E^qf8WPf36LG|QQV;9 zxP_st9!O=O+E5EeSOO1>OYB2xJ3#~tcj13alA}$d;l~+7#%F9yJxj;oriVb%ZiA`< za>yTZK=zN+u7M*DA<(Qgo)y|0N*iWx5*wU zBZ~380evLnvt^fdm*~f`EA?mS2MAuW9V|=>+!w=No#~~KG1E)mEwPwhYh1v~ZN?+C z@`5Z{FBzg`%=9FSqCUb zFU}MBmxA*h#1UJTZfoQ@l!d@tiqV1$t8IOJg6(*cj#OL!@VpS-hr}vr2K&b0N7$KZ z=AVFLmyNN%CME0oC43mHShXeNStis(P@e;yVpz)Z!>{72U|feH%5%0UUI%5$&aEHI zfqL^So@52SJ{`-fYYEdV+kv|kZ_M9s>)8Ywnml35tu$n?761FtAuucoMKqCvZ6kH= zq)G|;vaC}K1brIX!8o3VR4|*DnhZ3S-X{yD2PLwd1LLi+pnjGIaZlR;pjxghp#z2x z)(?WfUmqV8N0JfAdK^pvP11Cqx!2k3VP)%PTBwpO$zVeg&KY3*|ZZ%`#w>4w2 z$Bkj2(%|3y8QC_@{;LoF<|lvW+21;LT0BE?Qyxbg&da1x6aS7ju{n#f`miDRI3O>Ra>!;xx zO~dvXm2N{mF|8(XF1Ai^9$z|F%pW145%jL5H@FiJU*;04(A0^|lzgeEOuBK{!G&`gpq((W|8D~Rv4vzqO+^D;4Y zN|tT9f)7Ll8Ue0z3cV%-^V2mUphMK1tle4g;z^tXqwNpddOA;OkG@tslX#*TIGksV zTd1+9srCp$!$31Y|3rEihho*u{U)%PHhq9opHX8A zSodi|yX6MW{Pe2u%f3{}_SI1;B-#pJYLQKQbBJ#yDi9Xja{` zX*xi|sG9w-Rm=o^_cKgwrbrz|?92_i(99Ndnw_p=?=)L~j;-%Qgv)ewtidW^4Ymo} zaX%z2mu4UQk$?RiU-->u?$t!m$wYX@@kB4dvC${12KUCRt&nkuCPzO8n|ICi10#5< z@nK;zN1lOX?#Q976S8y^&;iyDJU~+1tBV+t1EeDu@bZFiyrW3G;{)?}m#KVQR(v!4 zQMBkJY`wC%ESZ8`ge;>oUOyxy9X@4aA2Q}<;JIQ18%W0C67Tb6UuSqID8>|aSPL-mFygI%Ab!M}WGW8x58M$U znT$AQRW8tn$DErNa!5qHfo}9Y>_17xfv>h@hHCI>vY4Ls<^1%tW*N>#TK3~Q*Gjbv zE9a0M4tY8^Bmrui%27ycHu;%j8gfPq32=R3I;0g$NZW~UE1m6hcF?(t&Ip~ubnd2e zgw6>%w15d8rgNH3NQZnge3H%?I*#RvQ(t=2clpBa=j%VC^P_ZrjL!S%e2~tM)AHHa;zo7HibpD3U-_rRzI)6_mLuj|sX`|Clr-M!>oh~}v zbXL;gnowu{?11+=zWxcde-!`^|ULj z1F`E7wyLXr^`_P9m1XTDf|lYjw{+3vN*S$Ns;i6{Yo~C7tL? iS68rpD?S^wy=O 3: + stroke_op = strokeColor[3] + else: + stroke_op = None + + txt += "stroke='%s' " % stroke_str + if stroke_op is not None: + txt += "stroke-opacity='%f' " % stroke_op + + if fillColor: + if isinstance(fillColor, str): + fill_str = fillColor + fill_op = None + else: + fill_str = color2string(fillColor) + if len(fillColor) > 3: + fill_op = fillColor[3] + else: + fill_op = None + + txt += "fill='%s' " % fill_str + if fill_op is not None: + txt += "fill-opacity='%f' " % fill_op + + + return txt + +# common colors +# r g b a +red = ( 1, 0, 0, 1) +orange = ( 1, .5, 0, 1) +yellow = ( 1, 1, 0, 1) +green = ( 0, 1, 0, 1) +blue = ( 0, 0, 1, 1) +purple = ( 1, 0, 1, 1) +black = ( 0, 0, 0, 1) +grey = (.5, .5, .5, 1) +white = ( 1, 1, 1, 1) +null = ( 0, 0, 0, 0) + + +class Svg: + def __init__(self, stream): + self.out = stream + + def close(self): + self.out.close() + + + def beginSvg(self, width, height): + self.out.write( + """ + \n""") + self.out.write( + """\n""" % \ + (width, height)) + + # default style + self.out.write("") + + def endSvg(self, close=True): + self.out.write("") + if close: + self.close() + + def defLinearGradient(self, name, x1, y1, x2, y2, + stops=[], colors=[]): + self.out.write(''' +''' % + (name, x1, y1, x2, y2)) + + for stop, color in zip(stops, colors): + if len(color) == 3: + color = list(color) + [1.0] + + self.out.write('' % (stop, + color2string(color[:3]), + color[3])) + + self.out.write('') + + + def defRadialGradient(self, name, cx=50, cy=50, r=50, fx=50, fy=50, + stops=[], colors=[]): + self.out.write(''' +''' % + (name, cx, cy, r, fx, fy)) + + + for stop, color in zip(stops, colors): + if len(color) == 3: + color = list(color) + [1.0] + self.out.write('' % (stop, + color2string(color[:3]), + color[3])) + + self.out.write('') + + + def writeAttrOptions(self, color=None, **options): + if color: + if len(color) > 3: + self.out.write("stroke-opacity='%f' stroke='%s' " % + (color[3], color2string(color))) + else: + self.out.write("stroke='%s' " % (color2string(color))) + + for key, val in options.iteritems(): + self.out.write("%s='%s' " % (key, val)) + + + def line(self, x1, y1, x2, y2, color=None, **options): + self.out.write( + """\n") + + + def polygon(self, verts, strokeColor=black, fillColor=black): + self.out.write( + "\n") + + + def rect(self, x, y, width, height, strokeColor=black, fillColor=black): + self.out.write( + """\n""" % \ + (x, y, width, height, colorFields(strokeColor, fillColor))) + + + def circle(self, x, y, radius, strokeColor=black, fillColor=black): + self.out.write("\n" % \ + (x, y, radius, colorFields(strokeColor, fillColor))) + + def ellispe(self, x, y, xradius, yradius, strokeColor=black, fillColor=black): + self.out.write("\n" %\ + (x, y, xradius, yradius, colorFields(strokeColor, fillColor))) + + + def text(self, msg, x, y, size, strokeColor=null, fillColor=black, + anchor="start", angle=0): + + anglestr = "transform='translate(%f,%f) rotate(%f)'" % \ + (x, y, angle) + + self.out.write( + "%s\n" % \ + (anglestr, size, colorFields(strokeColor, fillColor), anchor, msg)) + + + def text2(self, msg, x, y, size, strokeColor=null, fillColor=black, + anchor="start", angle=0): + + if angle != 0: + anglestr = "" #transform='rotate(%f,0,0)'" % angle + else: + anglestr = "" + + + self.out.write( + "%s\n" % \ + (x, y, size, colorFields(strokeColor, fillColor), anchor, + anglestr, msg)) + + + def beginTransform(self, *options): + self.out.write("\n") + + def endTransform(self): + self.out.write("\n") + + + def beginStyle(self, style): + self.out.write("\n" % style) + + def endStyle(self): + self.out.write("\n") + + + def write(self, text): + self.out.write(text) + + + def comment(self, msg): + self.out.write("\n\n\n" % msg) + + + + + +def convert(filename, outfilename = None): + if outfilename == None: + outfilename = filename.replace(".svg", ".png") + os.system("convert " +filename+ " " +outfilename) + #os.system("rm " + filename) + + return outfilename + + + +# testing +if __name__ == "__main__": + svg = Svg(file("out.svg", "w")) + + svg.beginSvg(300, 500) + + svg.comment("MY COMMENT") + + svg.beginTransform(('scale', .5, .5)) + + svg.line(0, 0, 100, 100, red) + svg.rect(10, 10, 80, 100, black, (0, 1, 1, .5)) + svg.polygon([80,90, 100,100, 60,100], (0, 0, 0, 1), (0, 0, 1, .3)) + + svg.endTransform() + + + svg.beginStyle("stroke-width:3") + svg.beginTransform(('translate', 200, 0)) + + svg.line(0, 0, 100, 100, red) + svg.rect(10, 10, 80, 100, black, (0, 1, 1, .5)) + svg.polygon([80,90, 100,100, 60,100], (0, 0, 0, 1), (0, 0, 1, .3)) + + svg.endTransform() + svg.endStyle() + + svg.ellispe(150, 250, 70, 50, black, (.5, .5, .9, 1)) + svg.circle(150, 250, 50, black, red) + svg.circle(150, 250, 30, white, blue) + + + + svg.beginStyle("font-family: arial") + svg.beginTransform(('translate', 0, -200)) + svg.beginTransform(('translate', 0, 400)) + svg.text("A", 0, 0, 40, blue, (1, 0, 0, .1)) + svg.endTransform() + + svg.beginTransform(('translate', 0, 440)) + svg.text("C", 0, 0, 40, blue, (1, 0, 0, .1)) + svg.endTransform() + svg.endTransform() + svg.endStyle() + + svg.beginStyle("font-family: helvetica") + svg.beginTransform(('translate', 0, -200)) + svg.beginTransform(('translate', 0, 480), ('scale', 1, 1)) + svg.text("T", 3, 0, 40, blue, (1, 0, 0, .1)) + svg.endTransform() + + svg.beginTransform(('translate', 0, 520), ('scale', 1, .1)) + svg.text("G", 0, 0, 40, blue, (1, 0, 0, .1)) + svg.endTransform() + svg.endTransform() + svg.endStyle() + + svg.line(35, 200, 35, 400, red) + + + svg.beginStyle("font-family: courier") + svg.text("* FIXED WIDTH *", 100, 400, 10) + svg.text("* IS THE DEFAULT *", 100, 410, 10) + svg.endStyle() + + for i in range(0, 300, 10): + color = (i / 300.0, 0, 1 - i/300.0, 1) + svg.rect(i, 450, 10, 50, color, color) + + svg.endSvg() + + convert("out.svg") diff --git a/arghmm/deps/rasmus/svg.pyc b/arghmm/deps/rasmus/svg.pyc new file mode 100644 index 0000000000000000000000000000000000000000..42ddda7ee7d377aa77ccc5de7aaade78aefe7b02 GIT binary patch literal 11408 zcmd5?Piz}kdVfPwlt@dK{7*6yCmAcNrR)LrHcl*>TsFosk^Z>IPbxGo;3* zI5cNuQGpEe$}L%D$oY)4EfE1HN6qw7wtmE@qbP%{Qy?L{RMtjs|K=s2Jzg$<9~z%odPinFOa5H;WS3a6`6w- z1G=J?R^w~aS&U!xN07GbkEf^X_vG(_`wP^#c$CZK+!T^&<^mWa9+U)=QOkb<2J3lv-`?iBk=HRMfY&x4SRfY5Apg#oylXOaAlvY-=wV z?oK)H2W_`m&!Fs+N@bijP8d~E!rhY&;eZLUz=4-3h3$KL~}Ucx#e z&8X)4Xiu{pJVI@d3G*xoYDtRZ6QsZplN59gNy#d%H=te&grvX<_K<=O z*^rcThCm1d)~3m^RWJs!BjbE>8T|F0It#@qzi9fWqly`?RjInc9vW(B$fKd=HX32S zlVP#9*EE3;u&e94O+TnKtBxQAYhYhk+iN-&MJKflrzsQ#^l_Su8$?l%Lz#=|5-ANj zfy}FWZE`+#3X5JV7z&3-6Sv9nn_jDbh$N8$@|&*Huzgwn66#bcn;JC6jI5DQ4I8J7 z<0v0DMvU>JYtvejIIzw{sh=$gH`boltGYrd?&f1W{uog0XfLnmnl~VJLo$G}4gg?g zfF;BLaKl;7i9bN`RZ5&bni79uYgWA)lwbhfc2EW)7(gE0ACth{4D*l9ACth@>tbGt z$GgR2QbcH=TZg4M#>jwUB@Tf1e+9dT;2>E!CY9pfxg+@Vld>aN{Zo>G(LOC1kaR{e z@ciSFDJW}g`Ybpj27pN7BL;xR3z`F+-UI^_&`v|O8k;L`CzuxO5sALjs0FkqfRIz! z?jKAjy3I|u=?0sd3!I-zDkC!4BueN#feeU+bp;~!(Ir7M$5`wTh-qzjz9ajvsCRv% z)jEUOuHos@m`0ut86-3=A`{UF;K4-Ej%u2LKk_@JYAH3|+me=^vLVj3>i-03d8xDA zFrPbZ-}Rac#ktu_Mbl|kJsakIq4;3^=FFAi(xRC+6F=qi%lB@qKf1qS!Y`QjAAEf0 z_6@T*Qz|{Ye4|uaUSBrXKD%Yk&CZodE33ucvBj+*Xk9Ipc6WDYcQ4O+?Rx1}yVBZn zt9}WiN*sk5=yB%eX6?W(F6RB;CZSww*R_K!SUbCDZaHp!E718K>ad@tzi`Al^Or7N z;=IM=cIP-h*;}qJ3J_xn@3!5*nP!_vSfnll2t@VzeG|Lb%EwOKZLU49FJnz{1Ioyy z2aQ}RZ5&S@R;~-^77IQE0-T$6l!y!V0bq$#sCb0$s<@1DslHeyCl>YYh-qt_39+ZS zKuqf*GX04jsMt=^#{ND+LgNG%yJ8cyJ z@9?V##Z!p(D(br_^>RqP%+Z;>$Pw|-2p*9p5L{reu1c%Ap(fCa+=&-L`w+Doy)eqe zV4W*49^-<~E~|%e_kmcUbf|UU4Y%o3+Q7E$I?cdz?S;uHf70yC;a!`W znl$&AvzhD6M|IuYqJ#qQ4E*`|l3&KF><3=U^lCNV3AzLPV6WjUOtNK0{NU9o|07e1 zVW@C*sy10#^l6^UrCy={0wGocO~JjOKpfu}Zp#|P7>N)Xt{+H*Fik0GZJ-HhR<<1o zK&j50y!P~~Gp|eYqB11L7nKnLeVLJ!7Oigo6o@FscJB1>{WDa`-c>4Rq>YKxm|{3E zfuJCbaNtzRokx!<;|7Kj5di-jg;>zQf!NdtfPh>Lp~3gE{pwFBr@p#-OeW zF_=G$L4X$+g!d0ID7hGeAABhW?}PF#1_{1O;}8a`oi2Q;dtD5+yH&NQvBpM1^=krS zFLkd0qxB~CBS^H)F-NpbQ&fUN>n-F2Y}Jl_?IEDYc}WU2U02()COP0;Cf{K4wE)lR zVfGf*NdWJSbO=173p_FB3?%q`hrRz832^2;(#IsR=Wy>l`b=vpW$>`4Jv+7F=FWC<#sBQ;ADs5foeZ~fEhOS}x^KXi|O!M$XuVijM^ z)Kh0q-p?zI{#lYSEF#YTdJwel>D9hKozDgZhjj{4JePVqH3DN&rRgVGe2W~vi_Gh6 z2W}ih7I0$V>65f}*3fpG6;rC*bw?0DR{b8W+t}w7U{NlW|Cu{R;#yJ_YM=2$$2K^zcquTe~cz2bV=XG+dqy3@eb_r z>C}jU!&<7!cA}2#mAHUXh*^TuJG?*sz?(4I?c5~j!AkmitwCtyMbLX)lnc;pX+|_r zpk>FM!GM}5M2PRPu-Vwlla5t6j+a|rW3TQt%_-k(dAM@{tPy}?Zemzso2lA+Q#Df# zmOeL86y`IRVE!nI;u|~dO0({WLd2LC|B2W<@Ga_&MK>k(&F!BIxTkP4e~4^1?1T^s zDr;Ocs_ejri;ED5m81~JK@kG@NP!5NA&_pAX2;Rz9EO(LxNI^zAw>178`tAfBZ}*h z2XhJ7G)5iZS&UWTM@P~csr&AE$^$(oQ=aMz2Jx7yv2}MI;QOk{rn(atFXe z+z9T3mLS2MDU2&u-FCI%nDEh}!|>4ZZtK^Va76T2EaeB7$NCV7Ad7BxCjp@TF@!5> za(|4R7#Ic1*jL05Y-Oj>a9h4(9_C8OrFO`rFJTWMtc_dD9@ate$RF`6dtV=~xD>bH zx12vgHeyy7B3Ighm=B3W8Dz7+fQbp5eW+nWiboL~Tw;6yQz-VT9?nD1${q!z60Q10 zqybEgU-R1A3q{E`DmdnfwYp%oy+D^P6#d)iTdqS17t4Xu5tYT|062}b<~4&E-~EDD zlv+{QWd?VPTOOh67Z<1ea*34)?CP&k;`lT{>bCv570ZU&#sy+c7BuS>gljOE4M)z+ zl0)p#KWKAx691>9f@G?6+8C2FL?oifAg-Yq(LZA~IP^R&jc^FX*^>9dl8c1}$9|Nl zq`V+Rh-^^D2VxaS#Hzr;B7gj4B#|h{_7L^ITo@E$B#)yFY9tG53CqNSu&%L4+a~VE zy3X9kOo+_>ah#5k9H0M7WOOi)9>MKxHccaYR^QdJ6Q5h`^HU@-XT{kYogb5n%b)*1 zzBx#+T)?LUFx^h8cNpg)I>^X{z}aMU1)mk#<7c=iV5jWiP7u<8PD6(~i7x8P0$w>_ z;kFqXSGE5zEo78KuTt)McMhgWiovI`8avZ}* z$SA6-C$VNp8-<9NeaGn^fEYi@1=@PozWp6){5Oy&sO})5nLO|`Vw~+hnbWB%qgp-; z+X!fh?hc|#B*Y%863J8Bne=g4r9ZVXOT!>^GwMmw4K)xH zCTM4{33TZ@BeBu>_{vkRe&V~o&@bV3seclpiP2Y_ytVoll)Ah5|15N0x0pzf%$U;RQA9xgxKXJ-wZ6@|D@?93;YT8wq`A{*h=i;a*56@rm&rXQ_nC|{ z`4maa2FZQJukF?trYz&}X)&|-5|GQ}MuxMaCq|9Y6T|o!&5dS9Gou4pV>FNFcy1g$ z*efHKOFib{8s-2dfk$43k9Y5h5VIge!ExlJA@`8*SGeJUuY@U?fTNtimk?Xn6Io;Hq+I-CP2~-C#n=)i2A>KP_pGtuM^vtb6sc#Tdr@DU&~9 z@;^*uwt3|0|M2(x`OhVnS25zxCCaN+w%x|wRkPB@#jbk=|99g4KBy8Zugk4K{f9r` z&tEUOf7NZc;S-`gKR(~cG@n!7c$1U zXx_a2TPw@v!`sX2pYRLe#jyJJnz{bTin+XU^ZJ83>*huGhghbL5la0{`-;|195U<2zSSjx?c{L@4fENVn@j>O2mtw*yQ z(Hv(YunWb)h#Y#xM3Ejr`%x52Beg(NEW}D3lSmz2JCbVh>y4!Py>?pam2PCLD>BuH zC4Il%dQ{D$ri}MU{wDMSr33@0&ekI)^bD3QKWEPB_N7^u8++JRan<5EwO^%6 z);?Ig@c}nWm#m%hGAAQf^_}VaY#>*~*~k7ua$nVH3}N1~jxrXxhW|TAhSRx$TrrFP zE;{g=KJ^#F$3~1Zsj<`uehbDqV=9%$IX24U1ai(O7=;Y#@Ow^9BuK-v$q4>uiFJ}n VT>+!V@taSbLJ;t_VW#q_{{u>HiR%CW literal 0 HcmV?d00001 diff --git a/arghmm/deps/rasmus/tablelib.py b/arghmm/deps/rasmus/tablelib.py new file mode 100644 index 00000000..67bdf208 --- /dev/null +++ b/arghmm/deps/rasmus/tablelib.py @@ -0,0 +1,1465 @@ +""" +tablelib.py + +Portable Tabular Format (PTF) + +Implements and standardizes Manolis style tab-delimited table file format. + + +--Example---------------------------------------------------- +##types:string int +name num +mike 23 +alex 12 +matt 7 +------------------------------------------------------------- + +File is tab delimited. + +Directives are on a single line and begin with two hashes '##' +No space after colon is allowed. + + +Table can also handle custom types. Custom types must do the following + + 1. default value: + default = mytype() + returns default value + + 2. convert from string + val = mytype(string) + converts from string to custom type + + 3. convert to string + string = str(val) + converts val of type 'mytype' to a string + TODO: I could change this interface... + I could just use mytype.__str__(val) + + 4. type inference (optional) + type(val) + returns instance of 'mytype' + TODO: I could not require this (only map() really needs it and __init__()) + + +""" + +# python libs +import copy +import StringIO +import sys +import os +import itertools + +# rasmus libs +from rasmus import util + + +# table directives +DIR_TYPES = 1 + +# a special unique null type (more 'null' than None) +NULL = object() + + +class TableException (Exception): + """Exception class for Table""" + def __init__(self, errmsg, filename=None, lineno=None): + msg = "" + add_space = False + add_semicolon = False + + if filename: + msg += "%s" % filename + add_space = True + add_semicolon = True + + if lineno: + add_semicolon = True + if add_space: + msg += " " + msg += "line %d" % lineno + + if add_semicolon: + msg += ": " + + msg = msg + errmsg + + Exception.__init__(self, msg) + + +#=========================================================================== +# Types handling +# + +def guess_type(text): + """Guesses the type of a value encoded in a string""" + + if text.isdigit(): + return int + + try: + float(text) + return float + except ValueError: + pass + + try: + str2bool(text) + return bool + except ValueError: + pass + + return str + + +def str2bool(text=None): + """Will parse every way manolis stores a boolean as a string""" + + if text is None: + # default value + return False + + text2 = text.lower() + + if text2 == "false": + return False + elif text2 == "true": + return True + else: + raise ValueError("unknown string for bool '%s'" % text) + + + + + +#============================================================================= + +_type_definitions = [["string", str], + ["unknown", str], # backwards compatiable name + ["str", str], # backwards compatiable name + ["int", int], + ["float", float], + ["bool", bool]] + +# NOTE: ordering of name-type pairs is important +# the first occurrence of a type gives the perferred name for writing + + +def parse_type(text): + for name, t in _type_definitions: + if text == name: + return t + raise Exception("unknown type '%s'" % text) + + +def format_type(t): + for name, t2 in _type_definitions: + if t == t2: + return name + raise Exception("unknown type '%s'" % t) + + + +#=========================================================================== +# Table class +# + +class Table (list): + """Class implementing the Portable Table Format""" + + def __init__(self, rows=None, + headers=None, + types={}, + filename=None): + + # set table info + self.headers = copy.copy(headers) + self.types = copy.copy(types) + self.filename = filename + self.comments = [] + self.delim = "\t" + self.nheaders = 1 + + + # set data + if rows is not None: + it = iter(rows) + try: + first_row = it.next() + + # data is a list of dicts + if isinstance(first_row, dict): + self.append(first_row) + for row in it: + self.append(dict(row)) + + if self.headers is None: + self.headers = sorted(self[0].keys()) + + # data is a list of lists + elif isinstance(first_row, (list, tuple)): + if self.headers is None: + self.headers = range(len(first_row)) + self.nheaders = 0 + for row in itertools.chain([first_row], it): + self.append(dict(zip(self.headers, row))) + + + # set table info + for key in self.headers: + # guess any types not specified + if key not in self.types: + self.types[key] = type(self[0][key]) + + except StopIteration: + pass + + + + def clear(self, headers=None, delim="\t", nheaders=1, types=None): + """Clears the contents of the table""" + + self[:] = [] + self.headers = copy.copy(headers) + if types is None: + self.types = {} + else: + self.types = copy.copy(types) + self.comments = [] + self.delim = delim + self.nheaders = nheaders + + + def new(self, headers=None): + """ + return a new table with the same info but no data + + headers - if specified, only a subset of the headers will be copied + """ + + if headers is None: + headers = self.headers + + tab = type(self)(headers=headers) + + tab.types = util.subdict(self.types, headers) + tab.comments = copy.copy(self.comments) + tab.delim = self.delim + tab.nheaders = self.nheaders + + return tab + + + #=================================================================== + # Input/Output + # + + def read(self, filename, delim="\t", nheaders=1, + headers=None, types=None, guess_types=True): + for row in self.read_iter(filename, delim=delim, nheaders=nheaders, + headers=headers, types=types, + guess_types=guess_types): + self.append(row) + + + def read_iter(self, filename, delim="\t", nheaders=1, + headers=None, types=None, guess_types=True): + """Reads a character delimited file and returns a list of dictionaries + + notes: + Lines that start with '#' are treated as comments and are skiped + Blank lines are skipped. + + If the first comment starts with '#Types:' the following tokens + are interpreted as the data type of the column and values in that + column are automatically converted. + + supported datatypes: + - string + - int + - float + - bool + - unknown (no conversion is done, left as a string) + + """ + + infile = util.open_stream(filename) + + # remember filename for later saving + if isinstance(filename, str): + self.filename = filename + + + # clear table + self.clear(headers, delim, nheaders, types) + + + # temps for reading only + self.tmptypes = None + + + # line number for error reporting + lineno = 0 + + + try: + for line in infile: + line = line.rstrip() + lineno += 1 + + # skip blank lines + if len(line) == 0: + continue + + # handle comments + if line[0] == "#": + if not self._read_directive(line): + self.comments.append(line) + continue + + # split row into tokens + tokens = line.split(delim) + + # if no headers read yet, use this line as a header + if not self.headers: + # parse headers + if self.nheaders > 0: + self._parse_header(tokens) + continue + else: + # default headers are numbers + self.headers = range(len(tokens)) + + assert len(tokens) == len(self.headers), tokens + + # populate types + if not self.types: + if self.tmptypes: + assert len(self.tmptypes) == len(self.headers) + self.types = dict(zip(self.headers, self.tmptypes)) + else: + # default types + if guess_types: + for token, header in zip(tokens, self.headers): + self.types.setdefault(header, + guess_type(token)) + else: + for header in self.headers: + self.types.setdefault(header, str) + + # parse data + row = {} + for i in xrange(len(tokens)): + key = self.headers[i] + t = self.types[key] + if t is bool: + row[key] = str2bool(tokens[i]) + else: + row[key] = t(tokens[i]) + + # return completed row + yield row + + + except Exception, e: + # report error in parsing input file + raise TableException(str(e), self.filename, lineno) + #raise + + + # clear temps + del self.tmptypes + + raise StopIteration + + + + def _parse_header(self, tokens): + """Parse the tokens as headers""" + + self.headers = tokens + + # check that headers are unique + check = set() + for header in self.headers: + if header in check: + raise TableException("Duplicate header '%s'" % header) + check.add(header) + + + + def write(self, filename=sys.stdout, delim="\t"): + """Write a table to a file or stream. + + If 'filename' is a string it will be opened as a file. + If 'filename' is a stream it will be written to directly. + """ + + # remember filename for later saving + if isinstance(filename, str): + self.filename = filename + + out = util.open_stream(filename, "w") + + self.write_header(out, delim=delim) + + # tmp variable + types = self.types + + # write data + for row in self: + # code is inlined here for speed + rowstr = [] + for header in self.headers: + if header in row: + rowstr.append(types[header].__str__(row[header])) + else: + rowstr.append('') + print >>out, delim.join(rowstr) + + + def write_header(self, out=sys.stdout, delim="\t"): + # ensure all info is complete + for key in self.headers: + if key not in self.types: + if len(self) > 0: + self.types[key] = type(self[0][key]) + else: + self.types[key] = str + + + # ensure types are in directives + if DIR_TYPES not in self.comments: + self.comments = [DIR_TYPES] + self.comments + + + # write comments + for line in self.comments: + if isinstance(line, str): + print >>out, line + else: + self._write_directive(line, out, delim) + + + # write header + if self.nheaders > 0: + print >>out, delim.join(self.headers) + + + + def write_row(self, out, row, delim="\t"): + rowstr = [] + types = self.types + for header in self.headers: + if header in row: + rowstr.append(types[header].__str__(row[header])) + else: + rowstr.append('') + out.write(delim.join(rowstr)) + out.write("\n") + + # NOTE: back-compat + writeRow = write_row + + + def save(self): + """Writes the table to the last used filename for the read() or write() + function""" + + if self.filename is not None: + self.write(self.filename) + else: + raise Exception("Table has no filename") + + + #=================================================================== + # Input/Output: Directives + # + + def _determine_directive(self, line): + if line.startswith("#Types:") or \ + line.startswith("#types:") or \ + line.startswith("##types:"): + # backwards compatible + return DIR_TYPES + + else: + return None + + + + def _read_directive(self, line): + """Attempt to read a line with a directive""" + + directive = self._determine_directive(line) + + if directive is None: + return False + + rest = line[line.index(":")+1:] + self.comments.append(directive) + + if directive == DIR_TYPES: + self.tmptypes = map(parse_type, rest.rstrip().split(self.delim)) + return True + + else: + return False + + + def _write_directive(self, line, out, delim): + """Write a directive""" + + if line == DIR_TYPES: + out.write("##types:" + delim.join(format_type(self.types[h]) + for h in self.headers) + "\n") + + else: + raise "unknown directive:", line + + + #=================================================================== + # Table manipulation + # + + def add(self, **kargs): + """Add a row to the table + + tab.add(col1=val1, col2=val2, col3=val3) + """ + self.append(kargs) + + + def add_col(self, header, coltype=None, default=NULL, pos=None, data=None): + """Add a column to the table. You must populate column data yourself. + + header - name of the column + coltype - type of the values in that column + default - default value of the column + pos - position to insert column (default: right-end) + """ + # ensure header is unique + if header in self.headers: + raise Exception("header '%s' is already in table" % header) + + # default column position is last column + if pos is None: + pos = len(self.headers) + + # default coltype is guessed from data + if coltype is None: + if data is None: + raise Exception("must specify data or coltype") + else: + coltype = type(data[0]) + + # default value is inferred from column type + if default is NULL: + default = coltype() + + # update table info + self.headers.insert(pos, header) + self.types[header] = coltype + + # add data + if data is not None: + for i in xrange(len(self)): + self[i][header] = data[i] + + + def remove_col(self, *cols): + """Removes a column from the table""" + + for col in cols: + self.headers.remove(col) + del self.types[col] + + for row in self: + del row[col] + + + def rename_col(self, oldname, newname): + """Renames a column""" + + # change header + col = self.headers.index(oldname) + + if col == -1: + raise Exception("column '%s' is not in table" % oldname) + + self.headers[col] = newname + + # change info + self.types[newname] = self.types[oldname] + del self.types[oldname] + + # change data + for row in self: + row[newname] = row[oldname] + del row[oldname] + + + def get_matrix(self, rowheader="rlabels"): + """Returns mat, rlabels, clabels + + where mat is a copy of the table as a 2D list + rlabels are the row labels + clabels are the column labels + """ + + # get labels + if rowheader is not None and rowheader in self.headers: + rlabels = self.cget(rowheader) + clabels = copy.copy(self.headers) + clabels.remove(rowheader) + else: + rlabels = range(len(self)) + clabels = copy.copy(self.headers) + + # get data + mat = [] + for row in self: + mat.append(util.mget(row, clabels)) + + return mat, rlabels, clabels + + + def filter(self, cond): + """Returns a table with a subset of rows such that cond(row) == True""" + tab = self.new() + + for row in self: + if cond(row): + tab.append(row) + + return tab + + + def map(self, func, headers=None): + """Returns a new table with each row mapped by function 'func'""" + + if len(self) == 0: + # handle case of zero length table + return self.new() + + # determine what table will look like from first row + first_row = func(self[0]) + + # determine headers of new table + if headers is None: + # try order new headers the same way as old headers + headers = first_row.keys() + lookup = util.list2lookup(self.headers) + top = len(headers) + headers.sort(key=lambda x: (lookup.get(x, top), x)) + + tab = type(self)( + itertools.chain([first_row], (func(x) for x in self[1:])), + headers=headers) + tab.delim = self.delim + tab.nheaders = self.nheaders + + return tab + + + def uniq(self, key=None, col=None): + """ + Returns a copy of this table with consecutive repeated rows removed + """ + + tab = self.new() + + if len(self) == 0: + return tab + + if col is not None: + key = lambda x: x[col] + + if key is None: + last_row = self[0] + for row in self[1:]: + if row != last_row: + tab.append(row) + last_row = row + else: + last_row = key(self[0]) + for row in self[1:]: + key_row = key(row) + if key_row != last_row: + tab.append(row) + last_row = key_row + + + return tab + + + def groupby(self, key=None): + """Groups the row of the table into separate tables based on the + function key(row). Returns a dict where the keys are the values + retruned from key(row) and the values are tables. + + Ex: + tab = Table([{'name': 'matt', 'major': 'CS'}, + {'name': 'mike', 'major': 'CS'}, + {'name': 'alex', 'major': 'bio'}]) + lookup = tab.groupby(lambda x: x['major']) + + lookup ==> {'CS': Table([{'name': 'matt', 'major': 'CS'}, + {'name': 'mike', 'major': 'CS'}]), + 'bio': Table([{'name': 'alex', 'major': 'bio'}])} + + Can also use a column name such as: + tab.groupby('major') + + """ + + + groups = {} + + if isinstance(key, str): + keystr = key + key = lambda x: x[keystr] + + if key is None: + raise Exception("must specify keyfunc") + + + for row in self: + key2 = key(row) + + # add new table if necessary + if key2 not in groups: + groups[key2] = self.new() + + groups[key2].append(row) + + return groups + + + def lookup(self, *keys, **options): + """Returns a lookup dict based on a column 'key' + or multiple keys + + extra options: + default=None + uselast=False # allow multiple rows, just use last + """ + + options.setdefault("default", None) + options.setdefault("uselast", False) + lookup = util.Dict(dim=len(keys), default=options["default"]) + uselast = options["uselast"] + + for row in self: + keys2 = util.mget(row, keys) + ptr = lookup + for i in xrange(len(keys2) - 1): + ptr = lookup[keys2[i]] + if not uselast and keys2[-1] in ptr: + raise Exception("duplicate key '%s'" % str(keys2[-1])) + ptr[keys2[-1]] = row + + lookup.insert = False + return lookup + + + def get(self, rows=None, cols=None): + """Returns a table with a subset of the rows and columns""" + + # determine rows and cols + if rows is None: + rows = range(len(self)) + + if cols is None: + cols = self.headers + + tab = self.new(cols) + + # copy data + for i in rows: + row = {} + for j in cols: + row[j] = self[i][j] + tab.append(row) + + return tab + + + def cget(self, *cols): + """Returns columns of the table as separate lists""" + + ret = [] + + for col in cols: + newcol = [] + ret.append(newcol) + + for row in self: + newcol.append(row[col]) + + if len(ret) == 1: + return ret[0] + else: + return ret + + + def get_row(self, *rows): + """Returns row(s) as list(s)""" + + if len(rows) == 1: + # return one row + row = self[rows[0]] + return [row[j] for j in self.headers] + + else: + # return multiple rows (or zero) + return [[self[i][j] for j in self.headers] + for i in rows] + + + + + + def sort(self, cmp=None, key=None, reverse=False, col=None): + """Sorts the table inplace""" + + if col is not None: + key = lambda row: row[col] + elif cmp is None and key is None: + # sort by first column + key = lambda row: row[self.headers[0]] + + list.sort(self, cmp=cmp, key=key, reverse=reverse) + + + def __getitem__(self, key): + if isinstance(key, slice): + # return another table if key is a slice + tab = self.new() + tab[:] = list.__getitem__(self, key) + return tab + else: + return list.__getitem__(self, key) + + + def __getslice__(self, a, b): + # for python version compatibility + return self.__getitem__(slice(a, b)) + + + def __repr__(self): + s = StringIO.StringIO("w") + self.write_pretty(s) + return s.getvalue() + + + def write_pretty(self, out=sys.stdout, spacing=2): + mat2, rlabels, clabels = self.get_matrix(rowheader=None) + + mat = [] + + # get headers + mat.append(clabels) + + # get data + mat.extend(mat2) + + util.printcols(mat, spacing=spacing, out=out) + + + def __str__(self): + s = StringIO.StringIO("w") + self.write(s) + return s.getvalue() + + + + + +#=========================================================================== +# convenience functions +# + +def read_table(filename, delim="\t", headers=None, + nheaders=1, types=None, + guess_types=True): + """Read a Table from a file written in PTF""" + + table = Table() + table.read(filename, delim=delim, headers=headers, + nheaders=nheaders, types=types, + guess_types=guess_types) + return table + +# NOTE: back-compat +readTable = read_table + + +def iter_table(filename, delim="\t", nheaders=1): + """Iterate through the rows of a Table from a file written in PTF""" + + table = Table() + return table.read_iter(filename, delim=delim, nheaders=nheaders) + +# NOTE: back-compat +iterTable = iter_table + + +def histtab(items, headers=["item", "count", "percent"]): + h = util.hist_dict(items) + tab = Table(headers=headers) + tot = float(sum(h.itervalues())) + + if len(headers) == 2: + for key, val in h.items(): + tab.append({headers[0]: key, + headers[1]: val}) + + elif len(headers) == 3: + for key, val in h.items(): + tab.append({headers[0]: key, + headers[1]: val, + headers[2]: val / tot}) + + else: + raise Exception("Wrong number of headers (2 or 3 only)") + + tab.sort(col=headers[1], reverse=True) + + return tab + + +def join_tables(* args, **kwargs): + """Join together tables into one table. + Each argument is a tuple (table_i, key_i, cols_i) + + key_i is either a column name or a function that maps a + table row to a unique key + """ + + if len(args) == 0: + return Table() + + # determine common keys + tab, key, cols = args[0] + if isinstance(key, str): + keys = tab.cget(key) + lookups = [tab.lookup(key)] + else: + keys = map(key, tab) + lookup = {} + for row in tab: + lookup[key(row)] = row + lookups = [lookup] + + keyset = set(keys) + + + for tab, key, cols in args[1:]: + if isinstance(key, str): + keyset = keyset & set(tab.cget(key)) + lookups.append(tab.lookup(key)) + else: + keyset = keyset & set(map(key, tab)) + lookup = {} + for row in tab: + lookup[key(row)] = row + + lookups.append(lookup) + + keys = filter(lambda x: x in keyset, keys) + + + # build new table + if "headers" not in kwargs: + headers = util.concat(*util.cget(args, 2)) + else: + headers = kwargs["headers"] + tab = Table(headers=headers) + + for key in keys: + row = {} + for (tab2, key2, cols), lookup in zip(args, lookups): + row.update(util.subdict(lookup[key], cols)) + tab.append(row) + + return tab + + +def showtab(tab, name='table'): + """Show a table in a new xterm""" + + name = name.replace("'", "") + tmp = util.tempfile(".", "tmp", ".tab") + tab.write_pretty(file(tmp, "w")) + os.system("(xterm -T '%s' -n '%s' -e less -S %s; rm %s) &" % + (name, name, tmp, tmp)) + + +def sqlget(dbfile, query, maxrows=None, headers=None, headernum=False): + """Get a table from a sqlite file""" + try: + from pysqlite2 import dbapi2 as sqlite + except ImportError: + try: + from sqlite3 import dbapi2 as sqlite + except ImportError: + import sqlite + + # open database + if hasattr(dbfile, "cursor"): + con = dbfile + cur = con.cursor() + auto_close = False + else: + con = sqlite.connect(dbfile, isolation_level="DEFERRED") + cur = con.cursor() + auto_close = True + + cur.execute(query) + + # infer header names + if headers is None and not headernum: + headers = [x[0] for x in cur.description] + + if maxrows is not None: + lst = [] + try: + for i in xrange(maxrows): + lst.append(cur.next()) + except StopIteration: + pass + tab = Table(lst, headers=headers) + else: + tab = Table(list(cur), headers=headers) + + if auto_close: + con.close() + return tab + + +def sqlexe(dbfile, sql): + + try: + from pysqlite2 import dbapi2 as sqlite + except ImportError: + try: + from sqlite3 import dbapi2 as sqlite + except ImportError: + import sqlite + + # open database + if hasattr(dbfile, "cursor"): + con = dbfile + cur = con.cursor() + auto_close = False + else: + con = sqlite.connect(dbfile, isolation_level="DEFERRED") + cur = con.cursor() + auto_close = True + + cur.execute(sql) + + if auto_close: + con.close() + + +def sql_create_table(cur, table_name, tab, overwrite=True): + """Create an SQL based on a tab""" + + def issubclass2(t1, t2): + if type(t1) != type: + return False + return issubclass(t1, t2) + + # drop old table if needed + if overwrite: + cur.execute("DROP TABLE IF EXISTS %s;" % table_name) + + # build columns + cols = [] + for header in tab.headers: + + t = tab.types[header] + + + if issubclass2(t, basestring): + cols.append("%s TEXT" % header) + elif issubclass2(t, int): + cols.append("%s INTEGER" % header) + elif issubclass2(t, float): + cols.append("%s FLOAT" % header) + elif issubclass2(t, bool): + cols.append("%s BOOLEAN" % header) + else: + # default is text + cols.append("%s TEXT" % header) + + cols = ",".join(cols) + + # create table + cur.execute("""CREATE TABLE %s (%s);""" % + (table_name, cols)) + + + +#def sql_insert_rows(cur, headers, types, rows + +def sqlput(dbfile, table_name, tab, overwrite=True, create=True): + """Insert a table into a sqlite file""" + + try: + from pysqlite2 import dbapi2 as sqlite + except ImportError: + try: + from sqlite3 import dbapi2 as sqlite + except ImportError: + import sqlite + + # open database + if hasattr(dbfile, "cursor"): + con = dbfile + cur = con.cursor() + auto_close = False + else: + con = sqlite.connect(dbfile, isolation_level="DEFERRED") + cur = con.cursor() + auto_close = True + + # read table from file + if not isinstance(tab, Table): + filename = tab + tab = Table() + it = tab.read_iter(filename) + + try: + # force a reading of the headers + row = it.next() + rows = itertools.chain([row], it) + except StopIteration: + rows = [] + pass + else: + rows = tab + + + if create: + sql_create_table(cur, table_name, tab, overwrite=overwrite) + + # determine text columns + def issubclass2(t1, t2): + if type(t1) != type: + return False + return issubclass(t1, t2) + + + text = set() + for header in tab.headers: + t = tab.types[header] + + if issubclass2(t, basestring) or not ( + issubclass2(t, int) or + issubclass2(t, float) or + issubclass2(t, bool)): + text.add(header) + + + # insert rows + for row in rows: + vals = [] + for header in tab.headers: + if header in text: + vals.append('"%s"' % row[header]) + else: + vals.append(tab.types[header].__str__(row[header])) + vals = ",".join(vals) + cur.execute("INSERT INTO %s VALUES (%s);" % (table_name, vals)) + + con.commit() + if auto_close: + con.close() + + +#=========================================================================== +# Matrix functions +# + +def matrix2table(mat, rlabels=None, clabels=None, rowheader="rlabels"): + """ + convert a matrix into a table + + use table.get_matrix() to convert back to a matrix + + """ + + if clabels is None: + clabels = range(len(mat[0])) + nheaders = 0 + else: + nheaders = 1 + + if rlabels is None: + tab = Table(headers=clabels) + else: + tab = Table(headers=[rowheader] + clabels) + tab.nheaders = nheaders + + + for i, row in enumerate(mat): + if rlabels is not None: + row2 = {rowheader: rlabels[i]} + else: + row2 = {} + + for j in xrange(len(mat[i])): + row2[clabels[j]] = mat[i][j] + + tab.append(row2) + + return tab + + +def write_matrix(filename, mat, rlabels=None, clabels=None, rowheader="rlabels"): + tab = matrix2table(mat, + rlabels=rlabels, + clabels=clabels, + rowheader=rowheader) + tab.write(filename) + + + +def read_matrix(filename, rowheader="rlabels"): + tab = read_table(filename) + mat, rlabels, clabels = tab.get_matrix(rowheader=rowheader) + return mat, rlabels, clabels + + +#=========================================================================== +# testing +# + + +if __name__ == "__main__": + import StringIO + + + + ################################################# + text="""\ +##types:str int int +# +# hello +# +name 0 1 +matt 123 3 +alex 456 2 +mike 789 1 +""" + + tab = read_table(StringIO.StringIO(text), nheaders=0) + + print tab + print tab[0][1] + + + tab.add_col('extra', bool, False) + for row in tab: + row['extra'] = True + + + + ################################################# + text="""\ +##types:str int int +name num num2 +matt 123 3 +alex 456 2 +mike 789 1 +""" + + tab = read_table(StringIO.StringIO(text)) + tab.sort() + + print repr(tab) + print tab + print tab.cget('name', 'num') + + + ################################################# + # guess types + text="""\ +name num num2 status +matt 11123 3.0 false +alex 456 2.0 true +mike 789 1.0 false +""" + + tab = read_table(StringIO.StringIO(text)) + tab.sort() + + print repr(tab) + + + +''' + ################################################# + # catch parse error + if 0: + text="""\ +##types:str int int +name num num +matt 123 0 +alex 456 2 +mike 789 1 +""" + + tab = readTable(StringIO.StringIO(text)) + tab.sort() + + print repr(tab) + print tab + print tab.cget('name', 'num') + + + ################################################# + # timing + if 0: + from rasmus import util + + text=["##types:" + "int\t" * 99 + "int", + "\t".join(map(str, range(100))) ] + + for i in range(10000): + text.append("1\t" * 99 + "1") + text = "\n".join(text) + + stream = StringIO.StringIO(text) + + util.tic("read table") + tab = readTable(stream) + util.toc() + + + ################################################# + # specialized types + if 1: + text="""\ +##types:str int strand_type +name num strand +matt 123 + +alex 456 - +mike 789 + +john 0 + +""" + + + + + class strand_type: + def __init__(self, text=None): + if text is None: + self.val = True + else: + if text == "+": + self.val = True + elif text == "-": + self.val = False + else: + raise Exception("cannot parse '%s' as strand_type" % + str(text)) + + + def __str__(self): + if self.val: + return "+" + else: + return "-" + + + def strand_parser(text=None): + if text is None: + return True + else: + if text == "+": + return True + elif text == "-": + return False + else: + raise Exception("cannot parse '%s' as strand_type" % + str(text)) + + def strand_formatter(val): + if val: + return "+" + else: + return "-" + + strand_type = TableType(strand_parser, strand_formatter) + + + stream = StringIO.StringIO(text) + tab = readTable(stream, type_lookup=[["strand_type", strand_type]]) + print tab.types + print tab + + ################################################# + # quoted strings + if 1: + text=\ +r"""##types:str bool quoted_string +name num blah +matt True hello\tthere +alex False hello\nthere +mike True hello\\there +john False hello\n\\\nthere +""" + + stream = StringIO.StringIO(text) + tab = readTable(stream) + print tab.types + print tab + + + ################################################# + # python data structures/code + if 1: + def eval2(text=None): + if text is None: + return None + else: + return eval(text) + + python_type = TableType(eval2, str) + + + + tab = Table(headers=["name", "list"], + types={"list": python_type}, + type_lookup=[["python", python_type]]) + + + tab.append({"name": "matt", "list": [1,2,3]}) + tab.append({"name": "mike", "list": [4,5,6]}) + tab.append({"name": "alex", "list": [7,8,9]}) + + tab.write() + + ################################################## + # join tables + if 1: + tab1 = Table([[0, 1, 2], + [1, 3, 4], + [2, 5, 6], + [3, 7, 8]], + headers=['a', 'b', 'c']) + tab2 = Table([[0, 6, 6], + [1, 7, 7], + [3, 8, 8]], + headers=['a2', 'b2', 'c2']) + + tab3 = joinTables((tab1, lambda x: x['a']+1, ['c', 'b']), (tab2, 'a2', ['b2'])) + + print tab3 + +''' diff --git a/arghmm/deps/rasmus/tablelib.pyc b/arghmm/deps/rasmus/tablelib.pyc new file mode 100644 index 0000000000000000000000000000000000000000..49a689cdfb7d9bfb68cf5bb6b84952fd75a59c70 GIT binary patch literal 31244 zcmchgdyrgLe%H_Ko`-rgqmf3>k^JbjEYHZRhb4O-(Mr3vG?G_~ckI2RUD;X>v#sft z)YeS*Oy3@Bw6Qj@c5H9RW?_p!NTmo>Bq1b~@W>w|Kmk=tk%CkOl7fPSC4>+v5THT= zB!5uF^7($x?Z?P=HjiFLx@XQg_uTXRz0dEQ`Gwz^82j+&e&bWiA^(}+_tQMtj~7Cy zhH#BA3DrWlorG&dV!RNp6*}=^xK`}Mhr+d?PJB3A8+N=HZjXeO(Qs`vBvndm3$;>s zovOz|bvV?vhZ|Hn65a^PeCgVFsE#UeZ6efmgzC2N1}#s9YAMuqhMP1y)`@KIM8-pH zmtXP8lp{MrZMPyfN5Zu|p*rab_lD|DNA`v4u29?WhNrp>9|+amZgNOrZ8}uOf*$+h=~+WW(^A15GVnsH7$Q9l-{ zM>Uu9`}JZ-HV>%W+~Uq~?YO&oT<_1WPZzFMAAgqyXd_l#KcUr4YA5wQxvo7n@kFT7 z>ap;8f!ylxKU^)1gkcuK!t4nF?Y}IBFxIYIU#YFsub)`EJ2v)0v*mR8O6B^-N~Kl4 z)NHL*+U407u3S1gHg`c~f)f+d<^+tQFQCY2( z8yl-*tM%Kp^64kXDl4_k@~P8fjG$fq5Doq@e#XWwsrw9&!IZm0V<2<&R&BXmf0a3? zv})yMqg*Lx^oi0d^+wGzb-i}8-YDOxw{Ml(cber}mFyP9XAU2p8JlmGv$e`H$vGSPUv#u2Azzrmq`Sr8ly6bG7VgI8iQN=u4JYNtCP2a{HEM zTs6_5v9a>06LkGXWn-mXezmf)Q9Dx}E0?=j=gO;hRo(2-vh!QD_C~9bb>I6FYB_y^ zZZ=-6wc6zyExHibKs8aK+^r>M=XK>DWc^>1+s*z_Q1g?$nt9i&H(%v*%9^E-qiU>M zrCM#?@Jy6v;t*z3H|t>NP~j`j&pm&pe3_3nR;uM?=I|y^dW(q$bZV^|tiXvACseXi z<`vE1Mpi55vvOi-iRzY?V*6v|r%uEc>x~<=R;|Gb&NkQD^=2dQo@cVt*UnVd8$ue& z(%(*3-`TxJvt4f0);EBx*vHvsW94pnwX(+2v}%CH-EyN=tJ3+l;cIEB-l(_f+flG| zto{vt=4TUdygfv)(XOx5o!UXl&~$Ngx#rF;lj9#~K{(HoJxEZ4p+N5l;UMvUViG$P z(uG+S&87*uwUn1vDj93rY>9HL)RxukD36L>;K}|n0R;BmP-us6Z`k2Tcn!+?P)J^b zQ52Ne_aPOa7zGGNf(X`Y#n3z+l9=(@aA@v`UlfB%y9D141uAi}Uo3-onkpk&G5YC}$dnG5JWvVs?{Y^iMAvT zr)sNp(~4O#f`ja1#7?d@+9$8qZ=S5QmTy6nPOi0@uRxzNGH%{lUG4m;*4DC0Gs zQuQq@Rh!GiHEZ+DM$L`%`aQVLYrUS~$<&g%zN{QQ+ju;z8$?+5y2YpFu)%5}xjQA0 zARk_v5L>v~*d2ygf>*{u6G4oh24M1{$^mG3x#9OL$2u+;|FV;`_v}V3%b@q7#-a*B z{z@dkWe7*J3X6n2RC01PYfw>8&#LvC^|nC)GG&OnvC@Q{X_h}D3VyNGf@f=Z5U11E zo6QwZvm?cr+am<++Ge{q?}LCBcjTtVx1^CRsl|f?p*WEY7mCSPvJH6Yk9kRiU+Ucj zp6oLOOeq-r%C^v55-n0eQqa95fVI#LiD{h$Q%<3qQw$3U!>$^o_pydwj>yt%iF=ndYc;W1B_i*LbKb@BHdvlLK!qEP+l}U( zMqbL4PhXd3j$|_tEz&89rCmxPZr)1Op>!X?tfnBD)^U?)YX6TPGFXVu$`zFEA8+kr2{i8)X&y)Q$%Tnu?B#ubix&^8eL7s^EFL5Lc^g$v+ z&K!1p*gD!s-&^#%QR}{_ncLoxj~YyV$vQBNqW5kOb<|@jFIjd$-Rj(lyq{6og8R~+3Hd;+%z*-f||#|!&)PKg|rAI z3E`DeXug^x@I2Dgh^%<`g#@{AQ+YpVDpo($JsL(0rIq>{D7};mvgH25(86UB8ngJXf!-|6DTE z-rpzukdeHo+L`|aOTY7(Xl}`B1&a$=7OSxAvD{p{Yq{@Mtx~PEvdG}1t*0WO7FL#< ztF|NfowaFaH}X%+J6##}gO(!=IHaYGI?8oaqMTo?FSpIiDr;-CMm6#^M50>NuW#4x zW{zN+hyql51Dk`}ZAl5VaE0`3Ioqwl#$%t+5|O$@;Xhwrb0uO@7Qq+V&9%#Pts=fO zD`-z8gSk+vc_&LBSJwL#c+8^C2y!ZTqt28pQOGrSe$&qZvwv~*ALq%=5`>YdWD1_; ze?t?=cv4Kpi#y?CqX^3T3piDfjgs~+;aZB!yyfTgC`IEcSPt`2h@7Rz70CW&>Cnp-%XMFP-3GfFhPP$) z{Oh!wOJt)g$wV?yhzql@#u=CYu<1TE?iP`)>zQ)Z-oPzU0XMwA?Pm@D}|Ko4Y|@K3a1lq;AX?&K!DXwhQk zGuhs-gfz?7H`?V!vs|sTE8Q>kc{h=xc=>p_ego52ZMlAV7c`@lf+-2($-C4e;G z^;aXz+i&tKuKuW2esAFbC^w#r7nX&t+jz!#E~wOHg3a?@vj_$(GtvqximU``fLp9I z?2(lgYa5nFlh(`*T4^F7$_(I?AzJq<34;GHe@!1+;T|Yb z3iWN^Tu|Jy=!Yl;edh%vO;nGjupF#^Ha3D>ny^!VMS=YHN7(?O!KK&s#I&W;n5FOf zzO5sGnA6|m8~VE=teuPBu+>9bL!p`1u`|3j7Uh`FhV{dUNyr@Z%6U1h-4V%0Cs(gs zC9mn++8Cq-DP=5t5sQc3><(-HIL445KF-_tfdTCPj~Ou^qkbptJs}rVcDVkItfGnE z9oF|35}>yEGy+T21v>*{i6rp6j8eHh+(T@059&xQ2x|MoYe;pbYsvaQOs=Y$&Dp&F zmg(;Mz)PO2|8A6xZn0hyVRJRK4{Vv0xy6^lz1U5{9bx+3WZv#;lO&kaouNH#Dtv&E z9n|com?o5!nM8DYg7J9n-;sZnBA?M(UcK6Q0;AF)%TZZ&Dl1LG+OAv*<9N~^LAn)Y@j;#P~cXgYT13VRWlv*DAXg)&mN z>b0y(N4F4Y5||&^Tj_uO97ahoT{eRdzOek8^v@ig@uo)`?9@gNhLetDXS=jEM6%oU zwLX$f?dMl2joaQFq5pXaim=t8e&j!wBTKim)TuN!opsv2BAN2cj9GtomjlE9?OJ2Y z(A0*l{A)}{?4_gyv8b*+P8rx!H&(I!FmKrHG4#kPry=+Et5b+ND;sRUAOSAhAfInX z+&_+NV{OgisN0hOxpiWX=O(27HeikHXLV+R-^a@~`)$pVy=iMs$Cfe+1CHIvSZeJi zRWZ9BE3edUuwT$KZn0U=X`~M66wQQoDmXkp`w+h(7u`;*iEyp`k6L9lJ*77`6qysu zDJc?c5x%{;=0REzN78N8*HT&F+LHuJqT)-{&W>4XPAu+X`6ydkskb9%l6+=CdnuY= zq8yNZSlQadcm{)NOK@!aXXNdt$xqL!gt~B3{hF$}klC65$Glie7* z#*@7T)PdwsafI}UWLEwiC>$tG6n7TN!y^UDuHw>qG_vePf}G8I$YPa+u=z0)IoS=L z3iqIU{71Tl5x~9>XyIkr#eo)z{=r3ftBo4SIEnQK5N5P{T~rHN)fa5hwdilG7jWky zVuN6AlfR+6e3fP;=ctD-fM=zYO7Kb93?^| z=IJBaiQBxR*K>S&L8=*&%(kPY?qyQ==#?p9o zZgol^<>VPxS-BytA^g(jm%`;wtf#DE*Es(V7}C`iQf9f5>lEI&wXXoXiIL)XU$#T3 zx+IGXn8ZwH?`+1q<~cRh#%M=p5b26kzhcI?rM7pk0V>yD!yP(^qNzRQNV`_<_9`(x z--)6%Y&p!&iYDAoB*_Zta|CS(_^PnBloJJu&X4Q8DWO(7-)c5td8JuzyqoZCaBXhn zij66YXq>DM?bf#dRE-Dj_}hZl2WTs7gJcw@3cG<@gz*AH^9r)%1=9fD13kG^D}$ARSkg1V)G=6@PMlp|=CuWNN&7{_Foaj0 ztMpaPjM(V4pF+TQl&?m@e%^G(TVFvWVl0pWV?j=V6U);fY^hm&ycK`qB$c_ zAAo`cF{anVX1Y@XNXav z5>bzl%Z8pSjLc#OOp^w>G3XX&5Dwg9wqoa3MYQr=Y#G`_4#j%eAF!iL)zsBqSaJl> zD~Wr!tdMf2XYIGsA>=nU8dgQjslA0H$0!+(IwXBQvSNvI(Y)j77nSp#wx@rvAZ?19 zRbH+AqUKgD1%P)?6mT?`-T4%v3X-s;8{pQ;5wb`dvetHX(`JX=t!qJ?p>Bv8=Ivc_ zIBGI69#w=)WV7SsW&n*+t#q$Ux>tIG0-hCHY$7NQ9>-DRAvaa@rq(JdU{7}cRZWmq zCV=DoD-0LAQs5RGkKn~rfo|F)M#{qqf zTIQ3Ze27jAEe5l_=_C09DDKz>h%aF+n4ogCKzni1BAcPR&%nyjQ3SmL&$3iw-1Mx^ zR&xH1y2t{v!}&)P0W{Z)k=@3{Jeq}6qq=CQZY?W zpOC3VGh%nixuFo3gaL19r_a;Uf9n#B;-tI*3j0~RiR96Q_0zcY2<}A_Gkw6?AaiQo zKLA9-*vyj+ZiSkZ-($T3s4MmtTO-iGw1vHoiyOcvZW!>v+#J#n6`T7-}Hp( z?+$wl%rDbVW1Bi8!6-!ReLSH1voKU}PD|t)2RIKH`Oz+@w4jHif?@ zp77${x{rtq!XG9R*FJj_fpc@M_0b}&*=4L0H&Mn+tvgzJcid-kI%kYFiMy^&i}lRU!gXnvDliI!^@DS~h2hT`yve;b*&D@{h@!ItxwH&AO{TccHJW~}; z!Jaz&!Mig3QhR@#mnWoiVOBYH4y((lV=}{>R`j%^Pb&Ikk8wo5#%PjOmpptU<83_L zvN$i~hk;DVA{FiEtYR2m7G{j{2q-V`Wb!T(0W%NnD6$blk}Zm?&GjOC+Wm@pnFZaB z3nSPO9iUEuWmxrN0ww~iMvekQUjHI#u!;9k7KWf&O)iQab#{rMmxxnrv`N@;$F%oj zBt$6`4l5T~HwkC+rjQ4o#1#94sA}vT2*v+7p&B1Ysf*7fa_PJEuLo~8_cXbq%tT!K z+(ed(2VQ<0lwWFYM8D*<<{G}`Xnc7&+u`muH*n5cxzW?B`k^FhO0t7lyvVov;`jaK z$PxRboPXuzwi@&DX8S+u_(UJy>gwFtgnDN?Yt773%<(cY`z8t4*_W12bUv)vyw)@2 zR{iF!_Hj0+dr+J`4DjTmlG9W4_!d99Yonv&E2~)c1;YSC=p=ng8^2&*erd^)O1dS125}N`>xL7`dLfS={0@K6N5pZ zn%jdCEYbcxVO&|&mk_=nM7wz=;8TiEV7Ho39@>S8Q-^CkOZiNJTp$FxAI_c9m?v?G zT#O)LDdgOy{V3)ZCpwB~A)M2GnVi;oBuxvI)>fPL@9(Uw-QI0>iaX9l719!TmRl|! znT|RaNt9xYE%Rg&`BrR{Ngj)>YZ^bIX^eIyZI$hm2d^egy_F2oC1P)r$O(nHDTp6+ zbEZLzKR3=XZChztBqV%`>aFh~Yf{r=tk9QtZu77jm|Hw#eb1z5S`c1W;g30ErDYb%-MY4u-= zWp48~i7>@l7AW?4XnABkq*0PjM{@6F(h^2@fXHj2_)WZHA*4XkI08Gda{!A&#(RBq z(`x$WOkBf}v#Rex8IqlVjz4jPUq4 z*e*TpMxd~FZgI4tO!;WrlV?$fD0{VK@2!+i&v|#N|HG(b=553+DygFgCHJ+~QARu6 z=$6f4?7rgn`x=-cF5^WM{Y}1Iqtqsaw>&JQq@>~w!79a~WkCej5 z4pRvR{;dK>`m#jzt@`F?1&(q_4y6Z^NnBEP;gB+gsb2v3V9&HKs$n4f7!lz(AOl=r z)IS#C;!zU~05a;kY;HzvA*Pdc1RtKOWahr{&rneZCMx|NC6zue5T=MjWgE+!oYLka z+gjCG@{X3zoh$3iMU?WNRaJIVxS!xQs_7-YJWnuSv=(Y&sz5bH%M6(ey8#z#&D#W9 zCS5wK<<{(FUnf9oiO81I*6k5#K|bsP38q!v@=6MgXfR^os0F)EP!8N>ZawJ$o>XI& z!FKg8#fpuBUov|GI}sb-iaMHg?l-ky_$z|=(w&X*K%sPJ+^>@GR$^stC{R$H7!D6x zo8x2K;n+LKjdl1-4uvH`nrFkExWfgTsk4tT%HFcx=PXpKu#&<Y#E8tr#CctyLk+zOc6zB`fWyF4ur?$LR?yXV9XJ!IiX-Ps(z{kw>`7y zm1gty#+pAGi14B%KD$y`y;h0^)OT~c8g0u-F3euwU= z?;~ih6HC8aNe@Wyt$7+)-?yr)UsdPTX8=R(S*n?zolw+5r7_J`kPj%2NB*-qA(gdH z$7zj{kUE;N^BqmvXGo=n3~8_T```Up<(X$cqnHrZnk)#mgwm8s-R%Sqppu zl|dntIQTS#@2x&VN$$TVJ|304XH5cx^xBmBifXSGLB7Su&1ubSU*Qm(D5^;S>KN@A zlZo=t`VF+P&+vK+`Jl4%y~;l5$|&HEbL%=Cc(01RMY#S`5Nt7V4L|!dL6^(+P`t;M zPaA zYi#xxgXp&9kc~!t{rlB*H!wV2*o}D)MTuuD*>0}1i|v@5j_VA|F2qg?nOfa0fd3$g zTan3_JOSX)*`_vFlJKR_G5RIq6Me88jk+>eV97Nj%J=X$pA!(yEzWI`55!6=CTiy| z--}?2h8CgeL*cBA7miNH4QxUD`a&*0n7AAOAD?YCH`d~>$t-Be0xkI7D2-gGAN_ z3yfL$dL@JH;1whFtqaVEfn7EvS8VD!ip`=2<8qtI#Zi4IxALy280`!Fb+u|76w{`& z6~#`4_AJuZxj~|WY#V#kvwF~x{)#ScZt=pRH0P_YNYR=7?6=QIqnJ6vL1In>&K%S4 zSDGzlTv(X7cdWmju9&Mx--n!(|08{OnLLOd4IQl)!*w*4d*^epBdAcKlaf3=i!@7!{X8RoWU85_FX#jUbAwqe^6W2 z_d@4TzYdz?%NG?}t!tEdD_fn#`e!nqy)9GTtNV{gOf4Jc^ZMKRPQS&3Nd*l2wabE%*h--=n8FYMZQfNpcqfW?00Tex$+Zxyr2-b&l?s_V&N) zk8?<(%lNuJB-S_ zJ+H^wN0oa|Bs@opmEgGAbwp!ixfhzG=5#@UAziSW@htiC`z`-=n;i7{odNwN=+V}}nYvYHCGbtDZVLuaRh zDF)2*JrEKWC{(x@Hs@5`+~Qfu$G4DFa4#tfmpWS-G{?O?0PS?#-8d^DTJkl(9OY|E z8(j;Q)t%geRe3LKg}A%m#jHM3^DHN9UgVx_$l6Yiu!*`v!Y1=d%CMsvD^0sPh|LYR18^eNeKA!lY?gGy(0yAhfKuMT!MU>U6e7aMMEK=7l)XAJMEU%ZmV+ zV|rPNByQQxE$+Aef!TsLOmW=5O4g5KaG&rgan^?N0sy4Vl8GafaT*7KEqnMUXt_g2 z@)_PL_F)t33VqT_vxtj)*4YylD=~8Q%yFJ^qX)8^#d0$jtQWN5`5*AizfK?af(6m^ zR^*Zm5Cp@Ke@`R2m*T8%qnsYk`kx~r1Cnh9`Alh7-dUQgX^??67!s$j`Z6a&11n-v zk^p{c{bcxp^ivmOA%rAP=9_%ZgOcZ;?)9fm8&CZCE8z>HVG7*%0=xOJs@%20bSXy< zFkm)2T4o_M|3u%h0;X(2fj-Yh1HS2%Na50FPc5c@MdOXB(~CxhJoV(_V3)(xQ#Mh$ zKBHBU$v$8g*uhcA(WHCsQ5>Du5&p46!4x|hezcxNtq6*Af@O*CA!qUpvKcMk0Di0A z91zJDSnf|<`=N5^m}5}8*3}vU}Q8Es)EtqT!YqB z_q0&Y=O6^7RN~%Q%^|;|4NP6qtnKE>CJ6vAMk-MV)gIkm1xR)JW57^eL_Ei~>8*+k ztdAe-^wDU)yt)=;%2C>A>5`C)qonF}WX~T|?5hO5eWdiSDer=UA6203BX=0c-4b)Dkev*d8lCqcDM z;OX)_E-lS#AVSy>3BICwJb=fDu$aOW(|n{LjmLu#b7f3%VdFO2e7eL39)?X&3%>vC z^7DO8=Q;z`zU#LvnJp1LaP@$*2*Xq~kkxWiOPu^!m_8>$3nZ>$9KI7Xm*_ zGN;ooD(g)J5d1;Q+kb;9U($4%AS8zhBOJ$>7$^*-KdHXg6lmwU2Pf&@Qj%wYB)LLu zTnpL{9`m!0lbMQRq&pStQeZ(^R63ng@T>yKrRgUWTvqTY0#o%2ONPKIYgctt!bAC- zO59S3Z3K2qPO;L0W4Vv#^NImvA(i`A3jT%k^9sJ8;7=38~Liv5IwzpX$^XutB_R5~^g7N?qilqZ`f&@FC7#0YMEII(xf_yKgW zQJ!XL$J9ep4~~zI6iYiwTEj9o{la49*U1ME=`7JPJR-yF8#ZD!870)LT+fq8G2nar0EF7 zlE^qb0GYze!^NH4fG4zHtF@N7^(QE^$bJw2 zRuui{gJMAIpJ6*LgAsMB#W|B|7pV5-{%Sw^j%rzR>ty#=s}_dBUbTXn7hCxWTG5FO zP9NF2$9@YOko|nDi+OjdxO3taA2^*ZO-;OOJ{1O-^NQrfJua9h6L^H+YOBc&Ph6vW zofGB2Q1nm9%Co0+jN?gP({a>dt%crMA%BlCa-nLQGXmJIjDLH18R_uGYP`IMrH;EF z*1x^7TH-7fuZj9x?Z8N*ZZfid`abmF;Fb3he4Sbq*LX1yvdinEPc~$b^>? zS|&O8iUT6YV)t-wPA9gV!?ELqgN0?4(v;{C-^ykHQ-Bg21~PJ$eH)7=kVNW^xIHcL z#^*JWaH8#$F(Rrq@z6p#kEfUf(f|j^TrHH>1`yeJI)WOogK%p&y#)!z`umV_<`(yL zE`kCsD8vN=^5Fdr%w3~-6(ClWq`riM!jGHc6tn~VgW)}U_GHi&w?6Fa9~D1M6%xmA zG3o0mFOv12>3kx@==3)KvT46s=~R&B?+JE;kJOe3u&5VqbF1{ zw|H7OtXUfKjTK0!ObLE*6K;?DBuGLdWDT?Gk*t56VK6mJkXIsE|3}WqS>VqBHJ{+j zCmzKJ=s3W2b}qf0iS;_ByIY1A6b)rvk?xb9%!?Cv$WnU2GMrL0}(4O{8mK##^OOQ+H! z!fjK4m@+d77_rT;FiR?DFDljN^CIRb`!!BIDl-uA=qpt8b44msjXjb$E2NEvceT}b zjQ4dVE#5U0ds%^r)^{n!6IBJA9+q+2JKITbtl=%9n}t6&k`HNc*g%5 zL*A`Q;D(V0+KYJM77hcI+>#Hz#fz=b^jfRdgJ*mq)X@`>jkDs~I4f38~yS1Eo}4Sxs2 z%04f-dXT-*7g<`?91visn-AEim8%)7zWE)~3WaXW@(wVtk0an>;V8MkXvsWr)b5WK z{UjmLsw}imQNd6cZypQLO3tOy|R;A}IUb>j37w1g!O{r%0P!tZ0(0(^IaXS4s z%GyrAUz4D+c!9Oy&3$;%Eycn{6V3$V-qK2ciBIBn$|?+*ZE&r*tXyr~jUmX$1X>zc*SIcy&k6k}FUDDrL;nT!4chac@s}JYeh!rtTCwhjM?1%=k?fx}u0@`Q%X)Dky7UkG@ zm!yoeje|;DWz_@Wwg(LF_n9zie=`=c)b7S&A#&eY$dW)`Ay)mJh3tLPR|s{aztFq% z;VL49IEsMFpacnIyyPHc4C~Tg_qJLBe#9*8SbPE7k8ZYUlovkrT+j2cBALTT)scS| z^u-Z^uBJ#tp1C?B&u&G2*{2}4b9y4*n>xowkt(Xydd6*+oFK{aYaCU7`&9ZEzXl9S zBH+E&PM;J!2|$8&Vv_6tvAOj57s^+jIse?n^5skAi!WYYxMEqz*Bl(l%2zJFc*TMR z@yqj9E-v8FFp7CGu?ORpMUF z_RB!S{X`r1`sPGih2E8D|9_FuSP+CKS+TgBav5TPN`RtnnhnDssWT|PNE6xY?h(mm zDBofcsQfzuG8W>KuK(c@Ouik*;p7JUMN`h=-po9hG+%O3iQQ?_j7`W%1I~{+Wbi7?r5>NWS6E2BS@&)SY(<*&lf!%~mf)6VB zl7dGGtT1B-9`yFLxEi#0ZKM5ZtyGm{;1b366s9d&moRt~aZ8v+kmhbC_GU+ugV62# zmL{Br^>0_QcPP4j#e37H3r8@l9Ku{Ont1crvX>aUI$Otqam(md^oF{eGE0EiWV6a8L5)oAx2L zsyes0DT^oiw8XbqFDfsx+bxIy|K(9x3?GyxPW!hYNmxyv(lW=6sz4+ubBho70%aRh zr=j8cn*=nk*xB`ehJ6}Ij7D-ZsSMV-K}M2vhV&I|U_|5cpQ5YioG|BEJJiEG+$9{y z!QNRXP>b@Tud_$Xy0@WI!}ZGYZKXRk^*&Z(aihe@Fdx0v#9jMN`x>2O5wgNtLoM{q zTu=Fo_msb-ufC|@O$FbnKwm|KxTcuk+~AD5dD>2J1C}pDc;jrH_MH5XAX`Wjw&4Pb zT8LbMQ#-bvqINoTnsuZg58i{A^lbeRKyKqPP@W`$z$##fY?39603{dDq9BQd6HKWX6e-FCrk0qah9I?zz0kGMNZBgl|J<0Z=--=-}vRtKs)efn%8%0!qtLU zC7$?qJ%9hY4^xlzYs&jFt1T6jxXHS`k>!0naa41+_=omVk|Ev^rnzli*B|DK?j=h55?WtY99>-G==pAc zk1PHSs3Uqiqd+#S$je0-Q$dj_kk<2x6q)qzce1q89<%<5ivGNUf31M^4(Wea@LLLg zPr?6DU}O~2NwswoU2)8JFDdWy3Pi2!-V#X`eqQ!{=|QMt12JF-4$sEFE%;HnC1kG> zj1+Z6vHul!a=~$_xbGqSSN7$9?F5{i94U>K#`k?Ad?Wn4uy}lIq&PV`Qrvr>^ljr~ zQ;(Mpjvt+Te(La4X?#0>8)e5x;ZT7;lco!jp?N#Dk6= pval, p + + +_do_pause = True +def pause(text="press enter to continue: "): + """Pause until the user presses enter""" + if _do_pause: + sys.stderr.write(text) + raw_input() + +def set_pausing(enabled=True): + global _do_pause + _do_pause = enabled + + +#============================================================================= +# common unittest functions + + +def list_tests(stack=0): + + # get environment + var = __import__("__main__").__dict__ + + for name, obj in var.iteritems(): + if isinstance(obj, type) and issubclass(obj, unittest.TestCase): + for attr in dir(obj): + if attr.startswith("test"): + print "%s.%s" % (name, attr), + doc = getattr(obj, attr).__doc__ + if doc: + print "--", doc.split("\n")[0] + else: + print + + +def test_main(): + o = optparse.OptionParser() + o.add_option("-v", "--verbose", action="store_true", + help="Verbose output") + o.add_option("-q", "--quiet", action="store_true", + help="Minimal output") + o.add_option("-l", "--list_tests", action="store_true") + o.add_option("-p", "--pause", action="store_true") + + conf, args = o.parse_args() + + + if conf.list_tests: + list_tests(1) + return + + if conf.pause: + set_pausing(True) + else: + set_pausing(False) + + + # process unittest arguments + argv = [sys.argv[0]] + + if conf.verbose: + argv.append("-v") + if conf.quiet: + argv.append("-q") + + argv.extend(args) + + # run unittest + unittest.main(argv=argv) diff --git a/arghmm/deps/rasmus/textdraw.py b/arghmm/deps/rasmus/textdraw.py new file mode 100644 index 00000000..3c512ebe --- /dev/null +++ b/arghmm/deps/rasmus/textdraw.py @@ -0,0 +1,79 @@ +# python libs +import sys + +# rasmus libs +from rasmus import util + + +class TextCanvas: + """Draw ascii art on a automatically growing matrix""" + + def __init__(self, default=' '): + self.mat = util.Dict(dim=2, default=default) + self.default = default + + + def set(self, x, y, char): + self.mat[int(y)][int(x)] = char + + + def line(self, x1, y1, x2, y2, char='*'): + # swap coords if needed + if x1 > x2: + x1, x2 = x2, x1 + if y1 > y2: + y1, y2 = y2, y1 + + nsamples = int(max(x2 - x1, y2 - y1, 1)) + dx = (x2 - x1) / float(nsamples) + dy = (y2 - y1) / float(nsamples) + + for i in xrange(nsamples): + self.set(x1 + i*dx, y1 + i*dy, char) + + + def text(self, x, y, text, dir="horizontal", width=10000): + x2 = 0 + y2 = 0 + + if dir == "horizontal": + for i in xrange(len(text)): + if text[i] == "\n": + x2 = 0 + y2 += 1 + elif x2 < width: + x2 += 1 + self.set(x+x2, y+y2, text[i]) + elif dir == "vertical": + for i in xrange(len(text)): + if text[i] == "\n" or x2 > width: + y2 = 0 + x2 += 1 + elif x2 < width: + y2 += 1 + self.set(x+x2, y+y2, text[i]) + else: + raise Exception("unknown text direction '%s'" % dir) + + + def display(self, out=sys.stdout): + ykeys = util.sort(self.mat.keys()) + + y = min(ykeys) + for ykey in ykeys: + while y < ykey: + y += 1 + out.write("\n") + + row = self.mat[ykey] + xkeys = util.sort(row.keys()) + x = 0 + for xkey in xkeys: + while x < xkey: + x += 1 + out.write(self.default) + out.write(row[xkey]) + x += 1 + out.write("\n") + + diff --git a/arghmm/deps/rasmus/textdraw.pyc b/arghmm/deps/rasmus/textdraw.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a0e8ea6f3f4e44c38075b99c97e13abcd3d79295 GIT binary patch literal 2783 zcmcImOK&4Z5Uw6S6B0IA5=e-&7$Fqz0|E{m|K7m?Y!QQJ$UI z^r!<)Zc9hYJ3S9Q8)I@Gt>~h2a1WNLaWLz#f%o2Hk-#HO0_F+=LC1F;-HUAB@*LOV zBz_o<5~s^P=2e7tY|p7I4Dh+Ls~boaaq_TYQJN-5@h-Zpe(JWqut!^A-aEFB;;liR zoy0v?fN^x(?_Ug2JSevEu;`D9Er<9=2*#^}v1{XuAh4-*LGTn0r7|%arbptUsoyVx z@fg0sb=V6pJ$YheofOR?Gps*j!mbY(sd1M72L7nqIVqT>d@7B#dsI7 zTjIbFiczh%dNhfE;HXah3W6D{$w^(ZWij;7h&M4ZsYz=na#&phES}UDkxBDn#o>@9S*w#4kE7T>cx@@S8LCQV=kTUX<}}cd#Oa`0vho}) z0of+mpb9hmPWCnGbaEGn1T=_Ke-7OgNu?v_VUmT;(d{x1)1%m51gUq!*QD@(@^zRb zGOmX5ZZ*5>FMz2)NsF*QNa8{lqOuys8d`hB86@%&o5m{`3wGiynq`CLbK{D8D_)$_$JjuQiQ*N4B3XvZCcMfBjv^8;U zS;yt3QD32w-VNcM_ObO}6Mk*W|hMYGnzfb8Nt%^G%M_kQ{# zHfbrNif>`e4=gMdo-T~Z|7FZzai$oMomvLB;2$k(%JAozz|^vt(f?%`c4GH_ii-gF zpti!kXJN|MT95a-#L6zNKOnY&t{~#(6?Y1dM{%yTwBX_>Jx#N-w8L`KiENIlWKncC zUoJMQy0;7=1!)qeibu!1eNgt|0VTVfDLQ>#S)b)e3kDC=eK=IXeP$zf>=PyaD!X-b zSBppg1swSjT5$`Vn5Jn0kIw*u%b=eIF6Yf{^yJ*Qc@h2uoGGbQc!lF{( z?F6_ydWSnsx+di>q<0Q~Vx=i>DjrdWkkAceSD)E!h@m`KJc7E*4Br&9KTUC6)+db% zO%1tJ1DMo!4a!dmnVrLr39#}GOAG)?1vj7oh%pNDN&yem`elaSQ-^8=^cB)9k~8_? zH?lvxEmM)HVOpbjrK=h>g$?GSh`M|Mi!685yFuyGcwAIf3@->Hc$QliSI^)K94e5F zoC=J&N*~FT`zT=a8e|3dbL2QUUypzl5qSs)aciOm^E*~w1AD85GmJzomHUZ zlCugNT{l{fr%!Sk_TwPX3Ks2$`(}aT01MkB^W3Ik%?b=$UjQR8V*`>BrUmlug-S|Hkq;b1fMd}^C MWBMtxP+O|~4KtD!lmGw# literal 0 HcmV?d00001 diff --git a/arghmm/deps/rasmus/timer.py b/arghmm/deps/rasmus/timer.py new file mode 100644 index 00000000..073ca9a8 --- /dev/null +++ b/arghmm/deps/rasmus/timer.py @@ -0,0 +1,241 @@ +""" + Timer class for timing nested sections of code + + file: rasmus/timer.py + author: Matt Rasmussen + date: 2/4/2005 + +""" + + +# python libs +import os +import sys +import traceback +import time + + + +# GLOBALS +_RASMUS_TIMER = None +_GLOBAL_NOTES = None + + +class Timer: + def __init__(self, stream = sys.stderr, maxdepth=1e1000): + self.reset() + self.streams = [(stream, maxdepth)] + self.showErrors = True + self.showWarnings = True + self.quiets = 0 + + + def start(self, msg = ""): + """Start a new timer""" + + if msg != "": + self.indent() + self._write("BEGIN %s:\n" % msg) + self.msg.append(msg) + self.flush() + self.starts.append(time.time()) + + + def time(self): + """Get the current duration of the timer""" + + return self.starts[-1] - time.clock() + + def stop(self): + """Stop the last created timer and return duration in seconds""" + + duration = time.time() - self.starts.pop() + msg = self.msg.pop() + if msg != "": + self.indent() + + if duration > 3600: + pretty = "%.1fh" % (duration / 3600.) + elif duration > 60: + pretty = "%.1fm" % (duration / 60.) + else: + pretty = "%.3fs" % duration + + if duration > .1: + secs = "%.3fs" % duration + else: + secs = "%.3es" % duration + + self.write("END %s: %s (%s)\n" % (msg, pretty, secs)) + self.flush() + return duration + + def log(self, *text): + """Write a message to the timer stream. Message will be written with + current indentation level""" + + self.indent() + for i in text: + self._write("%s " % str(i)) + self._write("\n") + self.flush() + + def logExact(self, text): + """Write the extact string 'text' to the timer output stream with no + additional indentation.""" + + self._write(text) + self.flush() + + def warn(self, text, offset=0): + """Write a warning message to the timer output stream""" + + filename, lineno, func, code = traceback.extract_stack()[-2-offset] + filename = os.path.basename(filename) + + if self.showWarnings: + self.indent() + self._write("WARNING: %s, line %d: %s\n" % (filename, lineno, text)) + self.flush() + + def error(self, text, offset=0): + """Write an error message to the timer output stream""" + + filename, lineno, func, code = traceback.extract_stack()[-2-offset] + filename = os.path.basename(filename) + + if self.showErrors: + self.indent() + self._write("ERROR: %s, line %d: %s\n" % (filename, lineno, text)) + self.flush() + + + def indent(self): + """Write the current indentation level to the timer output stream""" + for i in range(self.depth()): + self._write(" ") + + def reset(self): + """Stop all timers""" + self.msg = [] + self.starts = [] + + def depth(self): + """Get the current number of running timers""" + return len(self.msg) + + def _write(self, text): + """Private function for writing to output stream""" + for stream, maxdepth in self.streams: + if self.depth() < maxdepth and \ + self.quiets == 0: + stream.write(text) + + def write(self, text): + self._write(text) + self.flush() + + def flush(self): + for stream, maxdepth in self.streams: + stream.flush() + + def addStream(self, stream, maxdepth=1e1000): + self.streams.append((stream, maxdepth)) + + def removeStream(self, stream): + self.streams = filter(lambda x: x[0] != stream, self.streams) + + def suppress(self): + """Calling this function will suppress timer output messages until + unsuppress() is called. + + If suppress() is called multiple times, unsuppress() must be called + an equal number of times to resume timer output. This is useful for + nesting suppress/unsuppress.""" + self.quiets += 1 + + def unsuppress(self): + """Calling this function will resume timer output messages that were + disabled with suppress(). + + If suppress() is called multiple times, unsuppress() must be called + an equal number of times to resume timer output. This is useful for + nesting suppress/unsuppress.""" + self.quiets = max(self.quiets - 1, 0) + + +def globalTimer(): + global _RASMUS_TIMER + if _RASMUS_TIMER == None: + _RASMUS_TIMER = Timer() + return _RASMUS_TIMER + + + +def log(*text): + return globalTimer().log(*text) + +def logger(*text): + return globalTimer().log(*text) + +def logExact(text): + return globalTimer().logExact(text) + +def tic(msg = ""): + return globalTimer().start(msg) + +def toc(): + return globalTimer().stop() + +def indent(): + return globalTimer().indent() + +def warn(text, offset=0): + return globalTimer().warn(text, offset+1) + +def error(text, offset=0): + return globalTimer().error(text, offset+1) + + + +def note(*text): + print >>notefile(), " ".join(text) + +def noteflush(): + return notfile().flush() + +def notefile(out = None): + global _GLOBAL_NOTES + + if out == None: + out = file("/dev/null", "w") + if _GLOBAL_NOTES == None: + _GLOBAL_NOTES = out + return _GLOBAL_NOTES + + + +################################################################################ +# debugging info +# + +def current_file(offset=0, abbrv=True): + filename, lineno, func, code = traceback.extract_stack()[-2-offset] + if abbrv: + filename = os.path.basename(filename) + return filename + +def current_line(offset=0): + filename, lineno, func, code = traceback.extract_stack()[-2-offset] + return lineno + +def current_func(offset=0): + filename, lineno, func, code = traceback.extract_stack()[-2-offset] + return func + +def current_code(offset=0): + filename, lineno, func, code = traceback.extract_stack()[-2-offset] + return code + + + diff --git a/arghmm/deps/rasmus/timer.pyc b/arghmm/deps/rasmus/timer.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00f15a0dd033b27d3270b5c335ee8a398473805d GIT binary patch literal 10388 zcmdT~TaO$^6|SC{z0B;cFT3%^&b4f^@i>It#0fXYIKC~`LOh0EBa2m{N$+&eOgl5( zUc;*EO2?+^=1VZ8k2?_DW6HiF+zysn30N;12duG;bLWpM} zjNQkltE#I{oy&JlRr_~;o2z`{)jxf{p|al;{ymS&lpLjer8YnvQQRrEs%=F?|Rq`65Z%{#^`38=rrtxQARWD7oD+5drNtf@X z^4RH>=T}ZY^UOypmGB?<)zS$FB(e-FS47K2%K8hC02+2wP*5QZASUFh#WpYr6+4kx zWdo+N7J;>_MPMUq5m*6yS;oUm;H^<$DQgs1${GaJhA=g$uvGzR}>)colG6!ew3nxvf+vPUYNo)a`<5 z7E}w2QguhA1!0OggN55JtXjdQoT!GUm8wpHh&GeJ1ZhhDH)#@hU6Yo;Ur+i$T47^5 z-d;_TI5FvLc6-B1A{gJKoa0tM3{q32nDU?rI!)=Yv!&DM=z2ST&`aA@@Jfg|3vQ(w zr7N3ZYsE_%?Qknt=_T<^c#=UwtKIG9|KM4(a*qSay!Cn*g=xKh43ilOtDK56@09T; zPrW479-?HM0TU$d47(`l#nmfoHN9lcRBBb~UQ`xFeh{U?*0+-| z4bmdIx~3&fUauEKzSykU>6^AR8ot7sjBgMNyPVKH05VR6GxFqZ$H~waRB;aaKH?k# zZ2&eEuYs@9m92K6?PIA!MM6tQtrwalbYBTlowftr=qCvluKj-E5l09kY!{tL;ER8` z)TuiUB1&j<;>K%LzEc)F#+3p29%fPN=+lQoMF5>z=^_(`?~j4lK*S z1HK7*vXv(C+5T3tf@HEnu?>+U?&BNV4dT%3E$C)TWk2oVp^%*TwXB0Tw1_IrIZ{kWp~9p?PrZc8 zBp}2`Xi2Itxa84eFP9YX>JI3Hx-&^nB}@XG;E)cxsLcr#pSF6<5D9Il&~n&<-gD&Q z=xK3fX6m6kpT@g$ciy(8%HD3hOYz~~QC}KX5;rcmwYAz6V$9RJ6Gnkv@_7SqEpu5y z60Z?#dX3joY9BVmeOgC)L<_AD+mzh%(zf8triaSb>jtjC4{=HS-I|S>8=6f}C~h`c zl~L`ip!uk@Q=BLkBx&}ehA=F+$7yStmU_b$D9`hn&;&?&$oXKInn;?NI?@5F)c0)Z z2l42t>t1u;$K=gAH|R{s!EJ?i)-PIMHLnR@D1uG*OBOF5e+R-PJO_j zwi@92LG(*SHh<57{Mo4h&0?`H)mE_ZHbvzq^U8 zJWA}OA8{X-twq!?2^e@|cMqIHfN7kz_y{Kb5{094s)8h0M%2JhUEJ(^Hp{b@UpcQ{ zF0jJM@`Gd)WQ>~t+2b}**!sGl;={Nf-L2iKz?UndzRZ_@E(y0#$Z7#l?lyRyv5iVb zbP?;_X#o$YtW2T6E@yD*MlSuu)-M9!Ou6nf2@}LPHAdsUfy-`j3l%l%EMU|1pOq)@ zUGa(kUeO>5dW&+?yov=C9g21y*qUnEIX0Mb2nRAOsu!F$9PQ%F6}sciDx z7v1w6U&UpS!=dg6$1;OeJcI$nRQ$#64}axUhDhq~B{zY@ZfM1E_CszdQcyOo%aP94 zxBv;aq0{6cXHK9KIpNEwGz9Hnu`EBxt^%h@vIst2+Xb2noR!dr;TUr4D~g9I*2|!Z zJO*fUsoQrfI&!&Hc8})_=xoR9Zu;K2Z*oRXYjKPbipnX)nF}Q!$t{ET55>8ENm$V( zK{wtCtj52A`&{ExK<@aL*iOYkXC){J+2!K}q?=*3J%gc_kYNOj+96I`S?3X^WYcB( zJ?v0%2y1uY8CGSxHlzDd8g_Q=9r{uJNcEJ)bPdcM_}I&hZr4WnwWi+d3Ek~?(y-UD zr!uIF@vf0^INVLSBec_x&gY>ryVb|mWteyt*P-Rt+9ZqxCR6cH9Ipq3x1+pDo^1m%@)hWa=F1L_#+An`f*{131p0$DB0$egAMat zr97p$jYm&jr%+L{y{@|#)~|kHy?%Y|>Z&VS-dY?5BFnfJnDT(W)rmK~&UGHxe;>Em zreTf+3~mRThAZr2eE^xU0H%JW-X6-&6}z7z;gT&9JwguGm@=Tu{6kEa=Ne8;2jROv z0P@*0i@4Pak{>~$TuMrqOL>Xy_i7QPQ|<*rEixGPM~j^K$CQIFQ;r;755Syl?_Een zu@#b)(HKoFxRHj9pJ3Wtk6`6TU5bcAgn>80C;wH6#Fn8{!rg!MQ{-#o#?SE2eTYVM z8@FfznLi)OC0SyyaVnuh^v1)Wf!+|ssNs>VA4ICoznN0SwuqF7CMMfkMzkFF3`M_h zg$m^V5)?ay_9gmdcE0M@kYQAQ+{zmMZDh&EbId?l?P(w%9iujDz0xU#J~?_@a^2PU>3=OgfTuV7Ozy`2-)Oc`{= z54KjKey1a5*|ru0uOxAw^4EziZc2{;+Sf2gxAO}eH+XJFIUX;2c`~D!G#&G5!U;KwL z&o2V@d!fdxdmRPK|DurSm$OW7b2Hi6?_*!WWOfK!rxpHydovB99PLawi}sX-hpYpo zJb{}%l_DoC_L~gOEOM-qbyKshCOHmb)v(0dCrM6{(7~>jcLs?$Yg2Okv|iY&SY`n>*Z-|iSS3ZMxe73a(D3H" % self.name + + + +class BranchData (object): + """A class for managing branch specific data for a Tree + + By default, this class implements bootstrap data for TreeNode's. + + To incorporate new kinds of branch data, do the following. Subclass this + class (say, MyBranchData). Create Tree's with + Tree(branch_data=MyBranchData()). This will ensure your new branch data + manager is used for manipulations to the tree. Any tree's copied from the + tree (via tree.copy()) will also use the same branch manager. + """ + + def __init__(self): + pass + + def get_branch_data(self, node): + """Returns branch specific data from a node""" + if "boot" in node.data: + return {"boot": node.data["boot"]} + else: + return {} + + def set_branch_data(self, node, data): + """Set the branch specific data from 'data' to node.data""" + if "boot" in data: + node.data["boot"] = data["boot"] + + def split_branch_data(self, node): + """Split a branch's data into two copies""" + if "boot" in node.data: + return {"boot": node.data["boot"]}, {"boot": node.data["boot"]} + else: + return {}, {} + + def merge_branch_data(self, data1, data2): + """Merges the branch data from two neighboring branches into one""" + if "boot" in data1 and "boot" in data2: + assert data1["boot"] == data2["boot"], (data1, data2) + return {"boot": data1["boot"]} + else: + return {} + + + +class Tree (object): + """ + Basic rooted tree + + Well suited for phylogenetic trees + """ + + def __init__(self, nextname=1, branch_data=BranchData()): + self.nodes = {} + self.root = None + self.nextname = nextname + self.default_data = {} + self.data = {} + self.branch_data = branch_data + + + def copy(self): + """Returns a copy of the tree""" + tree = Tree(nextname = self.nextname) + + # copy structure + if self.root != None: + # copy all nodes + tree.root = self.root.copy() + + # set all names + def walk(node): + tree.nodes[node.name] = node + for child in node.children: + walk(child) + walk(tree.root) + + # copy extra data + tree.copy_data(self) + tree.copy_node_data(self) + + return tree + + + #========================================= + # iterators + + def __iter__(self): + """Iterate through nodes of tree""" + return self.nodes.itervalues() + + + def __len__(self): + """Returns number of nodes in tree""" + return len(self.nodes) + + + def __getitem__(self, key): + """Returns node by name""" + return self.nodes[key] + + + def __setitem__(self, key, node): + """Adds a node to the tree""" + node.name = key + self.add(node) + + + def __contains__(self, name): + """Returns True if tree has node with name 'name'""" + return name in self.nodes + + + def preorder(self, node=None, is_leaf=lambda x: x.is_leaf()): + """Iterate through nodes in pre-order traversal""" + + if node is None: + node = self.root + + queue = [node] + + while len(queue) > 0: + node = queue.pop() + yield node + + if not is_leaf(node): + for child in reversed(node.children): + queue.append(child) + + + def postorder(self, node=None, is_leaf=lambda x: x.is_leaf()): + """Iterate through nodes in post-order traversal""" + + if node is None: + node = self.root + + stack = [[node, 0]] + + while len(stack) > 0: + node, i = stack[-1] + + if i < len(node.children) and not is_leaf(node): + stack.append([node.children[i], 0]) + stack[-2][1] += 1 + else: + yield node + stack.pop() + + + def inorder(self, node=None, is_leaf=lambda x: x.is_leaf()): + """Iterate through nodes with in-order traversal""" + + if node is None: + node = self.root + + stack = [[node, 0]] + + while len(stack) > 0: + node, i = stack[-1] + + if node.is_leaf(): + yield node + stack.pop() + + elif i < len(node.children) and not is_leaf(node): + assert len(node.children) == 2 + + if i == 1: + # left has been visited + # yield current node then visit right + yield node + + # recurse into children + stack.append([node.children[i], 0]) + stack[-2][1] += 1 + else: + stack.pop() + + + #============================= + # structure functions + + def make_root(self, name = None): + """Create a new root node""" + if name is None: + name = self.new_name() + self.root = TreeNode(name) + return self.add(self.root) + + + def add(self, node): + """Add a node to the tree + Does not add node to any specific location (use add_child instead). + """ + self.nodes[node.name] = node + return node + + + def add_child(self, parent, child): + """Add a child node to an existing node 'parent' in the tree""" + assert parent != child + self.nodes[child.name] = child + self.nodes[parent.name] = parent + child.parent = parent + parent.children.append(child) + return child + + + def new_node(self, name=None): + """Add a new node with name 'name' to the tree""" + if name is None: + name = self.new_name() + return self.add(TreeNode(name)) + + + def remove(self, node): + """ + Removes a node from a tree. + Notifies parent (if it exists) that node has been removed. + """ + + if node.parent: + node.parent.children.remove(node) + del self.nodes[node.name] + + + def remove_tree(self, node): + """ + Removes subtree rooted at 'node' from tree. + Notifies parent (if it exists) that node has been removed. + """ + + def walk(node): + if node.name in self.nodes: + del self.nodes[node.name] + for child in node.children: + walk(child) + walk(node) + + if node.parent: + node.parent.children.remove(node) + + + def rename(self, oldname, newname): + """Rename a node in the tree""" + node = self.nodes[oldname] + del self.nodes[oldname] + self.nodes[newname] = node + node.name = newname + + + def new_name(self): + """Returns a new node name that should be unique in the tree""" + name = self.nextname + self.nextname += 1 + return name + + + def unique_name(self, name, names, sep="_"): + """Create a new unique name not already in names""" + i = 1 + name2 = name + while name2 in names: + name2 = name + sep + str(i) + i += 1 + names.add(name2) + return name2 + + + def add_tree(self, parent, childTree): + """Add a subtree to the tree.""" + + # Merge nodes and change the names of childTree names if they conflict + # with existing names + self.merge_names(childTree) + self.add_child(parent, childTree.root) + + + def replace_tree(self, node, childTree): + """Remove node and replace it with the root of childTree""" + + # merge nodes and change the names of childTree names if they conflict + # with existing names + self.merge_names(childTree) + parent = node.parent + if parent: + index = parent.children.index(node) + parent.children[index] = childTree.root + childTree.root.parent = parent + del self.nodes[node.name] + + + def merge_names(self, tree2): + """Merge the node names from tree2 into this tree. + Change any names that conflict""" + + for name in tree2.nodes: + if name in self.nodes: + name2 = self.new_name() + self.nodes[name2] = tree2.nodes[name] + self.nodes[name2].name = name2 + else: + # make sure I do not issue a name that matches this one + if isinstance(name, int): + if name >= self.nextname: + self.nextname = name + 1 + self.nodes[name] = tree2.nodes[name] + + + def clear(self): + """Clear all nodes from tree""" + self.nodes = {} + self.root = None + + + def leaves(self, node=None): + """Return the leaves of the tree in order""" + if node is None: + node = self.root + if node is None: + return [] + return node.leaves() + + + def leaf_names(self, node = None): + """Returns the leaf names of the tree in order""" + return map(lambda x: x.name, self.leaves(node)) + + + #=============================== + # data functions + + def has_data(self, dataname): + """Does the tree contain 'dataname' in its extra data""" + return dataname in self.default_data + + + def copy_data(self, tree): + """Copy tree data to another""" + self.branch_data = tree.branch_data + self.default_data = copy.copy(tree.default_data) + self.data = copy.copy(tree.data) + + + def copy_node_data(self, tree): + """Copy node data to another tree""" + for name, node in self.nodes.iteritems(): + if name in tree.nodes: + node.data = copy.copy(tree.nodes[name].data) + self.set_default_data() + + + def set_default_data(self): + """Set default values in each node's data""" + for node in self.nodes.itervalues(): + for key, val in self.default_data.iteritems(): + node.data.setdefault(key, val) + + + def clear_data(self, *keys): + """Clear tree data""" + for node in self.nodes.itervalues(): + if len(keys) == 0: + node.data = {} + else: + for key in keys: + if key in node.data: + del node.data[key] + + + #====================================================================== + # branch data functions + # forward branch data calles to branch data manager + + def get_branch_data(self, node): + """Returns branch specific data from a node""" + return self.branch_data.get_branch_data(node) + + def set_branch_data(self, node, data): + """Set the branch specific data from 'data' to node.data""" + return self.branch_data.set_branch_data(node, data) + + def split_branch_data(self, node): + """Split a branch's data into two copies""" + return self.branch_data.split_branch_data(node) + + def merge_branch_data(self, data1, data2): + """Merges the branch data from two neighboring branches into one""" + return self.branch_data.merge_branch_data(data1, data2) + + + #======================================================================= + # input and output + # + + def read_data(self, node, data): + """Default data reader: reads optional bootstrap and branch length""" + + # also parse nhx comments + data = read_nhx_data(node, data) + + if ":" in data: + boot, dist = data.split(":") + node.dist = float(dist) + + if len(boot) > 0: + if boot.isdigit(): + node.data["boot"] = int(boot) + else: + try: + node.data["boot"] = float(boot) + except ValueError: + # treat as node name + name = boot.strip() + if name and node.name is None: + node.name = name + else: + data = data.strip() + + # treat as name + if data: + node.name = data + + + def write_data(self, node): + """Default data writer: writes optional bootstrap and branch length""" + + string = "" + if "boot" in node.data and \ + not node.is_leaf() and \ + self.root != node: + if isinstance(node.data["boot"], int): + string += "%d" % node.data["boot"] + else: + string += "%f" % node.data["boot"] + else: + # see if internal node names exist + if not node.is_leaf() and isinstance(node.name, str): + string += node.name + + string += ":%f" % node.dist + return string + + + def read_newick(self, filename, readData=None): + """ + Reads newick tree format from a file stream + + You can specify a specialized node data reader with 'readData' + """ + + return read_tree(filename, read_data=readData, tree=self) + + + def write(self, out=sys.stdout, writeData=None, oneline=False, + rootData=False): + """Write the tree in newick notation""" + self.write_newick(out, writeData=writeData, + oneline=oneline, rootData=rootData) + + + def write_newick(self, out=sys.stdout, writeData=None, oneline=False, + rootData=False): + """Write the tree in newick notation""" + write_newick(self, util.open_stream(out, "w"), + writeData=writeData, oneline=oneline, + rootData=rootData) + + + def get_one_line_newick(self, root_data=False, writeData=None): + """Get a presentation of the tree in a oneline string newick format""" + stream = StringIO.StringIO() + self.write(stream, oneline=True, + writeData=writeData, rootData=root_data) + return stream.getvalue() + + + +#============================================================================ +# Input/Output functions + +def read_tree(infile, read_data=None, tree=None): + """Read a tree from a file stream""" + infile = util.open_stream(infile) + return parse_newick(infile, read_data=read_data, tree=tree) + + +def read_newick(infile, read_data=None, tree=None): + """Read a tree from a file stream""" + infile = util.open_stream(infile) + return parse_newick(infile, read_data=read_data, tree=tree) + + +def iter_trees(treefile): + """read multiple trees from a tree file""" + + infile = util.open_stream(treefile) + + yield read_tree(infile) + try: + while True: + yield read_tree(infile) + except Exception, e: + pass + + +def tokenize_newick(infile): + """ + Iterates through the tokens in a stream in newick format + + infile -- a string or file stream + """ + + def iter_stream(infile): + while True: + yield infile.read(1) + + if not isinstance(infile, basestring): + infile = iter_stream(infile) + else: + infile = iter(infile) + + running = True + word = [] + for c in infile: + if c == "": + # EOF encountered + break + + elif c in " \t\n": + # skip white space + if word: + yield "".join(word) + word[:] = [] + + elif c in ";(),:[]": + # special tokens + if word: + yield "".join(word) + word[:] = [] + + if c == "[": + # parse comment + word.append(c) + for c in infile: + word.append(c) + if c == "]": + break + yield "".join(word) + word[:] = [] + else: + yield c + else: + # word token + word.append(c) + + if word: + yield "".join(word) + word[:] = [] + + +def parse_newick(infile, read_data=None, tree=None): + """ + Parse a newick string or stream + + infile -- a string or file stream + read_data -- an optional function for reading node data fields + tree -- an optional tree to populate + """ + + # node stack + ancestors = [] + + # create tree + if tree is None: + tree = Tree() + if read_data is None: + read_data = tree.read_data + + # create root + node = TreeNode() + tree.root = node + nodes = [node] + + # process token stream + tokens = tokenize_newick(infile) + token = None + data = [] + empty = True + try: + while True: + prev_token = token + token = tokens.next() + empty = False + + if token == '(': # new branchset + if data: + read_data(node, "".join(data)) + data = [] + child = TreeNode() + nodes.append(child) + child.parent = node + node.children.append(child) + ancestors.append(node) + node = child + + elif token == ',': # another branch + if data: + read_data(node, "".join(data)) + data = [] + parent = ancestors[-1] + child = TreeNode() + nodes.append(child) + + child.parent = parent + parent.children.append(child) + node = child + + elif token == ')': # optional name next + if data: + read_data(node, "".join(data)) + data = [] + node = ancestors.pop() + + elif token == ':': # optional length next + data.append(token) + + elif token == ';': # end of tree + if data: + read_data(node, "".join(data)) + data = [] + break + + else: + if prev_token in '(,': + node.name = token + + elif prev_token in '):': + data.append(token) + + else: + data.append(token) + + except StopIteration: + if empty: + raise Exception("Empty tree") + + except Exception, e: + raise # Exception("Malformed newick: " + repr(e)) + + # setup node names + names = set() + for node in nodes: + if node.name is None: + node.name = tree.new_name() + node.name = tree.unique_name(node.name, names) + tree.nodes[node.name] = node + + # test for bootstrap presence + for node in nodes: + if "boot" in node.data: + tree.default_data["boot"] = 0 + break + tree.set_default_data() + + + return tree + + +def write_newick(tree, out=sys.stdout, writeData=None, oneline=False, + rootData=False): + """Write the tree in newick notation""" + write_newick_node(tree, tree.root, util.open_stream(out, "w"), + writeData=writeData, oneline=oneline, + rootData=rootData) + + +def write_newick_node(tree, node, out=sys.stdout, + depth=0, writeData=None, oneline=False, + rootData=False): + """Write the node in newick format to the out file stream""" + + # default data writer + if writeData is None: + writeData = tree.write_data + + if not oneline: + out.write(" " * depth) + + if len(node.children) == 0: + # leaf + out.write(str(node.name)) + else: + # internal node + if oneline: + out.write("(") + else: + out.write("(\n") + for child in node.children[:-1]: + write_newick_node(tree, child, out, depth+1, + writeData=writeData, oneline=oneline) + if oneline: + out.write(",") + else: + out.write(",\n") + write_newick_node(tree, node.children[-1], out, depth+1, + writeData=writeData, oneline=oneline) + if oneline: + out.write(")") + else: + out.write("\n" + (" " * depth) + ")") + + # don't print data for root node + if depth == 0: + if rootData: + out.write(writeData(node)) + if oneline: + out.write(";") + else: + out.write(";\n") + else: + out.write(writeData(node)) + + +''' +def read_tree(filename): + """Read a tree from a newick file""" + tree = Tree() + tree.read_newick(filename) + return tree + + +def parse_newick(newick): + """Read a tree from newick notation stored in a string""" + tree = Tree() + stream = StringIO.StringIO(newick) + tree.read_newick(stream) + return tree +''' + + +#============================================================================= +# alternate reading functions + +def read_newick_ply(filename, readData=None, tree=None): + """read with PLY""" + + if tree is None: + tree = Tree() + else: + tree.clear() + + # default data reader + if readData is None: + readData = tree.read_data + + # get parse tree + text = util.read_until(util.open_stream(filename), ";")[0] + ";" + expr = treelib_parser.yacc.parse(text) + + # walk the parse tree and build the tree + names = set() + + def walk(expr): + children, name, data = expr + assert ":" not in name, "bad name '%s'" % name + + # parse name + if name == "": + name = None + node = TreeNode(name) + + # parse data + readData(node, data) + + if node.name is None: + node.name = tree.new_name() + + # ensure unique name + node.name = tree.unique_name(node.name, names) + + # recurse + for child in children: + ret = walk(child) + if ret: + tree.add_child(node, ret) + return node + tree.root = walk(expr) + tree.nodes[tree.root.name] = tree.root + + # test for bootstrap presence + for node in tree.nodes.itervalues(): + if "boot" in node.data: + tree.default_data["boot"] = 0 + break + tree.set_default_data() + + return tree + + +def read_newick_recursive(filename, tree=None): + """ + Reads a big newick file with a custom parser + + DEPRECATED + """ + + infile = util.open_stream(filename) #file(filename) + opens = [0] + names = set() + + if tree is None: + tree = Tree() + + def readchar(): + while True: + char = infile.read(1) + if not char or char not in " \t\n": break + if char == "(": opens[0] += 1 + if char == ")": opens[0] -= 1 + return char + + def read_until(chars): + token = "" + while True: + #char = readchar() + while True: + char = infile.read(1) + if not char or char not in " \t\n": break + if char == "(": opens[0] += 1 + if char == ")": opens[0] -= 1 + + if char in chars or char == "": + return token, char + token += char + + def read_dist(): + word = "" + while True: + #char = readchar() + while True: + char = infile.read(1) + if not char or char not in " \t\n": break + if char == "(": opens[0] += 1 + if char == ")": opens[0] -= 1 + + if not char in "-0123456789.e": + return float(word) + else: + word += char + + def read_name(): + token = "" + while True: + #char = readchar() + while True: + char = infile.read(1) + if not char or char not in " \t\n": break + if char == "(": opens[0] += 1 + if char == ")": opens[0] -= 1 + + if char in ":)," or char == "": + return token, char + token += char + + def read_item(): + char1 = readchar() + + if char1 == "(": + node = TreeNode(tree.new_name()) + depth = opens[0] + while opens[0] == depth: + tree.add_child(node, read_item()) + + token, char = read_until("):,") + if char == ":": + node.dist = read_dist() + return node + else: + #word, char = read_until(":),") + word, char = read_name() + word = char1 + word.rstrip() + + name = tree.unique_name(word, names) + + node = TreeNode(name) + if char == ":": + node.dist = read_dist() + return node + + + def read_root(): + word, char = read_until("(") + + assert char == "(" + + node = TreeNode(tree.new_name()) + depth = opens[0] + while opens[0] == depth: + tree.add_child(node, read_item()) + return node + + tree.root = read_root() + tree.add(tree.root) + + return tree + + +def read_parent_tree(treefile, labelfile=None, labels=None, tree=None): + """Reads a parent array from a file""" + + if tree is None: + tree = Tree() + + lines = util.open_stream(treefile).readlines() + + if labelfile: + labels = util.read_strings(labelfile) + + elif labels is None: + nitems = (len(lines) + 1)/ 2 + labels = map(str, range(nitems)) + + tree.make_root() + + for i, line in enumerate(lines): + parentid = int(line.split(" ")[0]) + + # determine current child + if i < len(labels): + child = TreeNode(labels[i]) + else: + if i in tree.nodes: + child = tree.nodes[i] + else: + child = TreeNode(i) + + if parentid == -1: + # keep track of all roots + tree.add_child(tree.root, child) + else: + if not parentid in tree.nodes: + parent = TreeNode(parentid) + tree.add(parent) + else: + parent = tree.nodes[parentid] + + try: + tree.add_child(parent, child) + except: + print i, parentid + + # remove unused internal nodes + labelset = set(labels) + for child in list(tree.root.children): + if child.is_leaf() and child.name not in labelset: + tree.remove(child) + + # remove redunant root + if len(tree.root.children) == 1: + tree.root = tree.root.children[0] + tree.remove(tree.root.parent) + tree.root.parent = None + + return tree + + +def write_parent_tree(treefile, tree, labels=None): + """Writes tree to the parent array format""" + + ids = {} + + if labels is None: + labels = tree.leaf_names() + + # assign ids to leaves + for leafname in labels: + ids[tree.nodes[leafname]] = len(ids) + + # assign ids to internal nodes + def walk(node): + node.recurse(walk) + if not node.is_leaf(): + ids[node] = len(ids) + walk(tree.root) + + # build ptree array + ptree = [0] * len(ids) + for node, idname in ids.iteritems(): + if node.parent != None: + ptree[idname] = ids[node.parent] + else: + ptree[idname] = -1 + + util.write_list(treefile, ptree) + + +#============================================================================= +# NHX format + +def parse_nhx_comment(comment): + """Parse a NHX comment""" + for pair in comment.split(":"): + if "=" in pair: + yield pair.split("=") + +def format_nhx_comment(data): + """Format a NHX comment""" + return "[&&NHX:" + ":".join("%s=%s" % (k, v) + for k, v in data.iteritems()) + "]" + + +def parse_nhx_data(text): + """Parse the data field of an NHX file""" + data = None + + if "[" in text: + data = {} + i = text.find("[") + j = text.find("]") + comment = text[i+1:j] + text = text[:i] + + if comment.startswith("&&NHX:"): + for k, v in parse_nhx_comment(comment[6:]): + data[k] = v + + return text, data + + +def read_nhx_data(node, text): + """Read data function for parsing the data field of an NHX file""" + + text, data = parse_nhx_data(text) + if data: + node.data.update(data) + return text + + +def write_nhx_data(node): + """Write data function for writing th data field of an NHX file""" + + text = Tree().write_data(node) + if node.data: + text += format_nhx_comment(node.data) + return text + + +#============================================================================ +# Misc. functions for manipulating trees + +def assert_tree(tree): + """Assert that the tree data structure is internally consistent""" + + visited = set() + def walk(node): + assert node.name in tree.nodes + assert node.name not in visited + visited.add(node.name) + if node.parent: + assert node in node.parent.children + for child in node.children: + assert child.parent == node + node.recurse(walk) + walk(tree.root) + + assert tree.root.parent is None + assert len(tree.nodes) == len(visited), "%d %d" % (len(tree.nodes), len(visited)) + + + +def lca(nodes): + """Returns the Least Common Ancestor (LCA) of a list of nodes""" + + if len(nodes) == 1: + return nodes[0] + elif len(nodes) > 2: + return lca([lca(nodes[:2])] + nodes[2:]) + elif len(nodes) == 2: + node1, node2 = nodes + set1 = set([node1]) + set2 = set([node2]) + + while True: + if node1 in set2: + return node1 + if node2 in set1: + return node2 + if node1.parent != None: + node1 = node1.parent + if node2.parent != None: + node2 = node2.parent + + set1.add(node1) + set2.add(node2) + else: + raise Exception("No nodes given") + + +def find_dist(tree, name1, name2): + """Returns the branch distance between two nodes in a tree""" + + if not name1 in tree.nodes or \ + not name2 in tree.nodes: + raise Exception("nodes '%s' and '%s' are not in tree" % + (name1, name2)) + + # find root path for node1 + node1 = tree.nodes[name1] + path1 = [node1] + while node1 != tree.root: + node1 = node1.parent + path1.append(node1) + + # find root path for node2 + node2 = tree.nodes[name2] + path2 = [node2] + while node2 != tree.root: + node2 = node2.parent + path2.append(node2) + + # find when paths diverge + i = 1 + while i <= len(path1) and i <= len(path2) and (path1[-i] == path2[-i]): + i += 1 + + dist = 0 + for j in range(i, len(path1)+1): + dist += path1[-j].dist + for j in range(i, len(path2)+1): + dist += path2[-j].dist + + return dist + + +def descendants(node, lst=None): + """Return a list of all the descendants beneath a node""" + if lst is None: + lst = [] + for child in node.children: + lst.append(child) + descendants(child, lst=lst) + return lst + + +def count_descendants(node, sizes=None): + """Returns a dict with number of leaves beneath each node""" + if sizes is None: + sizes = {} + + if len(node.children) > 0: + sizes[node] = 0 + for child in node.children: + count_descendants(child, sizes) + sizes[node] += sizes[child] + else: + sizes[node] = 1 + + return sizes + + +def subtree(tree, node): + """Return a copy of a subtree of 'tree' rooted at 'node'""" + + # make new tree + tree2 = Tree(nextname = tree.new_name()) + + # copy nodes and data + tree2.root = node.copy() + tree2.copy_data(tree) + + # add nodes + def walk(node): + tree2.add(node) + node.recurse(walk) + walk(tree2.root) + + return tree2 + + +def max_disjoint_subtrees(tree, subroots): + """Returns a list of rooted subtrees with atmost one node from + the list 'subroots' + """ + + marks = {} + + # mark the path from each subroot to the root + for subroot in subroots: + ptr = subroot + while ptr != None: + lst = marks.setdefault(ptr, []) + lst.append(subroot) + ptr = ptr.parent + + # subtrees are those trees with nodes that have at most one mark + subroots2 = [] + def walk(node): + marks.setdefault(node, []) + if len(marks[node]) < 2 and \ + (not node.parent or len(marks[node.parent]) >= 2): + subroots2.append(node) + node.recurse(walk) + walk(tree.root) + + return subroots2 + + +def tree2graph(tree): + """Convert a tree to a graph data structure (sparse matrix)""" + mat = {} + + # init all rows of adjacency matrix to + for name in tree.nodes: + mat[name] = {} + + for name, node in tree.nodes.iteritems(): + for child in node.children: + mat[name][child.name] = child.dist + + if node.parent: + mat[name][node.parent.name] = node.dist + + return mat + + +def graph2tree(mat, root, closedset=None): + """Convert a graph to a tree data structure""" + + if closedset is None: + closedset = set() + tree = Tree() + + def walk(name): + node = TreeNode(name) + node.dist = 0 + closedset.add(name) + for child in mat[name]: + if child not in closedset: + child_node = walk(child) + child_node.dist = mat[name][child] + tree.add_child(node, child_node) + return node + tree.root = walk(root) + + tree.nextname = max(name for name in tree.nodes if isinstance(name, int)) + + return tree + + +def remove_single_children(tree, simplify_root=True): + """ + Remove all nodes from the tree that have exactly one child + + Branch lengths are added together when node is removed. + """ + + # find single children + removed = [node + for node in tree + if len(node.children) == 1 and node.parent] + + # actually remove children + for node in removed: + newnode = node.children[0] + + # add distance + newnode.dist += node.dist + + # change parent and child pointers + newnode.parent = node.parent + index = node.parent.children.index(node) + node.parent.children[index] = newnode + + # remove old node + del tree.nodes[node.name] + + # remove singleton from root + if simplify_root and tree.root and len(tree.root.children) == 1: + oldroot = tree.root + tree.root = tree.root.children[0] + oldroot.children = [] + tree.remove(oldroot) + tree.root.parent = None + tree.root.dist += oldroot.dist + + return removed + + + +def remove_exposed_internal_nodes(tree, leaves=None): + """ + Remove all leaves that were originally internal nodes + + leaves -- a list of original leaves that should stay + + if leaves is not specified, only leaves with strings as names will be kept + """ + + if leaves != None: + stay = set(leaves) + else: + # use the fact that the leaf name is a string to determine + # wether to keep it + stay = set() + for leaf in tree.leaves(): + if isinstance(leaf.name, basestring): + stay.add(leaf) + + # post order traverse tree + def walk(node): + # keep a list of children to visit, since they may remove themselves + for child in list(node.children): + walk(child) + + if node.is_leaf() and node not in stay: + tree.remove(node) + walk(tree.root) + + +def subtree_by_leaves(tree, leaves=None, keep_single=False, + simplify_root=True): + """ + Remove any leaf not in leaves set + + leaves -- a list of leaves that should stay + keep_single -- if False, remove all single child nodes + simplify_root -- if True, basal branch is removed when removing single + children nodes + """ + + stay = set(leaves) + + # post order traverse tree + def walk(node): + # keep a list of children to visit, since they may remove themselves + for child in list(node.children): + walk(child) + + if node.is_leaf() and node not in stay: + tree.remove(node) + if len(stay) == 0: + tree.clear() + else: + walk(tree.root) + + if not keep_single: + remove_single_children(tree, simplify_root=simplify_root) + + return tree + + +def subtree_by_leaf_names(tree, leaf_names, keep_single=False, newCopy=False): + """Returns a subtree with only the leaves specified""" + + if newCopy: + tree = tree.copy() + return subtree_by_leaves(tree, [tree.nodes[x] for x in leaf_names], + keep_single=keep_single) + + +def reorder_tree(tree, tree2, root=True): + """Reorders the branches of tree to match tree2""" + + if root: + # reroot tree to match tree2 + root_branches = [set(n.leaf_names()) for n in tree2.root.children] + + def walk(node): + if node.is_leaf(): + leaves = set([node.name]) + else: + leaves = set() + for child in node.children: + l = walk(child) + if l is None: + return None + leaves = leaves.union(l) + + if leaves in root_branches: + # root found, terminate search + reroot(tree, node.name, newCopy=False) + return None + + return leaves + walk(tree.root) + + + # reorder tree to match tree2 + leaf_lookup = util.list2lookup(tree2.leaf_names()) + + def mean(lst): + return sum(lst) / float(len(lst)) + + def walk(node): + if node.is_leaf(): + return set([node.name]) + else: + leaf_sets = [] + + for child in node.children: + leaf_sets.append(walk(child)) + + scores = [mean(util.mget(leaf_lookup, l)) for l in leaf_sets] + rank = util.sortindex(scores) + node.children = util.mget(node.children, rank) + + # return union + ret = leaf_sets[0] + for l in leaf_sets[1:]: + ret = ret.union(l) + return ret + + walk(tree.root) + + +def set_tree_topology(tree, tree2): + """ + Changes the topology of tree to match tree2 + + trees must have nodes with the same names + """ + + nodes = tree.nodes + nodes2 = tree2.nodes + + for node in tree: + node2 = nodes2[node.name] + + # set parent + if node2.parent: + node.parent = nodes[node2.parent.name] + else: + node.parent = None + + # set children + if node.is_leaf(): + assert node2.is_leaf() + else: + # copy child structure + node.children[:] = [nodes[n.name] for n in node2.children] + + tree.root = nodes[tree2.root.name] + + + +#============================================================================= +# Rerooting functions +# + + +def is_rooted(tree): + """Returns True if tree is rooted""" + return len(tree.root.children) <= 2 + + + +def unroot(tree, newCopy=True): + """Return an unrooted copy of tree""" + + if newCopy: + tree = tree.copy() + + nodes = tree.root.children + if len(nodes) == 2 and not (nodes[0].is_leaf() and nodes[1].is_leaf()): + dist = nodes[0].dist + nodes[1].dist + data = tree.merge_branch_data(nodes[0].data, nodes[1].data) + if len(nodes[0].children) < 2: + nodes.reverse() + tree.add_child(nodes[0], nodes[1]) + nodes[1].dist = dist + tree.set_branch_data(nodes[1], data) + nodes[0].dist = 0 + tree.set_branch_data(nodes[0], {}) + nodes[0].parent = None + + # replace root + del tree.nodes[tree.root.name] + tree.root = nodes[0] + return tree + + +def reroot(tree, newroot, onBranch=True, newCopy=True): + """ + Change the rooting of a tree + """ + + # TODO: remove newCopy (or assert newCopy=False) + if newCopy: + tree = tree.copy() + + + # handle trivial case + if (not onBranch and tree.root.name == newroot) or \ + (onBranch and newroot in [x.name for x in tree.root.children] and \ + len(tree.root.children) == 2): + return tree + + assert not onBranch or newroot != tree.root.name, "No branch specified" + + unroot(tree, newCopy=False) + + # handle trivial case + if not onBranch and tree.root.name == newroot: + return tree + + if onBranch: + # add new root in middle of branch + newNode = TreeNode(tree.new_name()) + node1 = tree.nodes[newroot] + rootdist = node1.dist + rootdata1, rootdata2 = tree.split_branch_data(node1) + node1.dist = rootdist / 2.0 + tree.set_branch_data(node1, rootdata1) + newNode.dist = rootdist / 2.0 + tree.set_branch_data(newNode, rootdata2) + + node2 = node1.parent + node2.children.remove(node1) + tree.add_child(newNode, node1) + tree.add_child(node2, newNode) + + ptr = node2 + ptr2 = newNode + newRoot = newNode + else: + # root directly on node + ptr2 = tree.nodes[newroot] + ptr = ptr2.parent + newRoot = ptr2 + + newRoot.parent = None + + # reverse parent child relationship of all nodes on path node1 to root + oldroot = tree.root + nextDist = ptr2.dist + nextData = tree.get_branch_data(ptr2) + ptr2.dist = 0 + while True: + nextPtr = ptr.parent + ptr.children.remove(ptr2) + tree.add_child(ptr2, ptr) + + tmp = ptr.dist + tmpData = tree.get_branch_data(ptr) + ptr.dist = nextDist + tree.set_branch_data(ptr, nextData) + nextDist = tmp + nextData = tmpData + + ptr2 = ptr + ptr = nextPtr + + if nextPtr is None: + break + tree.root = newRoot + + return tree + + +def midpoint_root(tree): + """ + Reroot a tree using midpoint rerooting + """ + + # get maximum distance from leaves to each node + depths = {} + for node in tree.postorder(): + if node.is_leaf(): + depths[node] = (0.0, node) + else: + depths[node] = max((c.dist + depths[c][0], depths[c][1]) + for c in node.children) + + # find maximum path + dists = [] + for node in tree: + if node.is_leaf(): + continue + assert len(node.children) != 1 + tmp = sorted([(c.dist + depths[c][0], depths[c][1], c) + for c in node.children]) + dists.append((tmp[-1][0] + tmp[-2][0], node, + tmp[-1][2], tmp[-1][1], + tmp[-2][2], tmp[-2][1])) + + maxdist, top, child1, leaf1, child2, leaf2 = max(dists) + middist = maxdist / 2.0 + + + # find longer part of path + if depths[child1][0] + child1.dist >= middist: + ptr = leaf1 + else: + ptr = leaf2 + + # find branch that contains midpoint + dist = 0.0 + while ptr != top: + if ptr.dist + dist >= middist: + # reroot tree + reroot(tree, ptr.name, onBranch=True, newCopy=False) + + # fixup branch lengths and return + pdist = sum(c.dist for c in tree.root.children) + other = filter(lambda x: x != ptr, tree.root.children)[0] + ptr.dist = middist - dist + other.dist = pdist - ptr.dist + return tree + + dist += ptr.dist + ptr = ptr.parent + + + assert 0 # shouldn't get here + + +#============================================================================= +# ages (previous known as timestamps) +# +# Methods for calculating the ages (timestamps) of nodes in the tree given +# the branch lengths. +# + +def get_tree_ages(tree, root=None, leaves=None, times=None): + """ + Use the branch lengths of a tree to set timestamps for each node + Assumes ultrametric tree. + + Leaves have time 0 + """ + + if root is None: + root = tree.root + + esp = .001 + if times is None: + times = {} + + def walk(node): + if node.is_leaf() or (leaves and node in leaves): + t = times.get(node, 0.0) + else: + t2 = None + for child in node.children: + t = walk(child) + + # ensure branch lengths are ultrametrix + if t2: + assert abs(t - t2)/t < esp, (node.name, t, t2) + t2 = t + + times[node] = t + return t + node.dist + walk(root) + + return times +get_tree_timestamps = get_tree_ages # backwards compatiability + + +def set_dists_from_ages(tree, times): + """ + Sets the branch lengths of a tree using a timestamp dict + """ + + for node in tree: + if node.parent: + node.dist = times[node.parent] - times[node] + else: + node.dist = 0.0 +set_dists_from_timestamps = set_dists_from_ages # backwards compatiability + + +def check_ages(tree, times): + """Asserts that timestamps are consistent with tree""" + + for node in tree: + if node.parent: + if times[node.parent] - times[node] < 0.0 or \ + abs(((times[node.parent] - times[node]) - + node.dist)/node.dist) > .001: + draw_tree_names(tree, maxlen=7, minlen=7) + util.printcols([(a.name, b) for a, b in times.items()]) + print + print node.name, node.dist, times[node.parent] - times[node] + raise Exception("negative time span") +check_timestamps = check_ages # backwards compatiability + + + +#============================================================================= +# parent tables + +def tree2parent_table(tree, data_cols=[]): + """Converts tree to a parent table + + This parent table will have a special numbering for the internal nodes, + such that their id is also their row in the table. + + parent table is a standard format of the Compbio Lab as of 02/01/2007. + It is a list of triples (node_name, parent_name, dist, ...) + + * parent_name indicates the parent of the node. If the node is a root + (has no parent), then parent_name is -1 + + * dist is the distance between the node and its parent. + + * additional columns can be added using the data_cols argument. The + values are looked up from node.data[col] + """ + + ptable = [] + + for node in tree: + if node.parent: + pname = node.parent.name + else: + pname = -1 + row = [node.name, pname, node.dist] + for col in data_cols: + row.append(node.data[col]) + ptable.append(row) + + return ptable + + +def parent_table2tree(ptable, data_cols=[], convert_names=True): + """Converts a parent table to a Tree + + if convert_names is True, names that are strings that look like integers + are converted to ints. + + See tree2parent_table for details + """ + + tree = Tree() + + parents = {} + + # create nodes + for row in ptable: + name, parent = row[:2] + if name.isdigit(): + name = int(name) + if parent.isdigit() or parent == "-1": + parent = int(parent) + + node = TreeNode(name) + node.dist = row[2] + tree.add(node) + parents[node] = parent + + for col, val in zip(data_cols, row[3:]): + node.data[col] = val + + # link up parents + for node, parent_name in parents.iteritems(): + if parent_name == -1: + tree.root = node + else: + parent = tree.nodes[parent_name] + tree.add_child(parent, node) + + return tree + + + +def tree2parent_table_ordered(tree, leaf_names=None): + """Converts tree to a parent table + + This parent table will have a special numbering for the internal nodes, + such that their id is also their row in the table. + + parent table is a standard format of the Compbio Lab as of 02/01/2007. + It is a list of triples (node_name, parent_name, dist) + + * If the node is a leaf node_name is the leaf name (a string) + * If the node is internal node_name is an int representing which row + (0-based) the node is in the table. + + * parent_name indicates the parent of the node. If the parent is root, a + -1 is used as the parent_name. + + * dist is the distance between the node and its parent. + + Arguments: + leaf_names -- specifies that a tree with only a subset of the leaves + should be used + + NOTE: root is not given a row, because root does not have a distance + the nodeid of the root is -1 + """ + + if leaf_names != None: + tree = subtree_by_leaf_names(tree, leaf_names, newCopy=True) + else: + leaf_names = tree.leaf_names() + + # assign a numbering to the leaves as specified + nodeid = 0 + nodeids = {} + nodes = [] + for leaf in leaf_names: + nodeids[tree.nodes[leaf]] = leaf + nodes.append(tree.nodes[leaf]) + nodeid += 1 + + # assign a numbering to the internal nodes + for node in tree: + if node.is_leaf(): + continue + if node == tree.root: + nodeids[node] = -1 + else: + nodeids[node] = nodeid + nodeid += 1 + nodes.append(node) + + # make parent table + parentTable = [] + for node in nodes: + parentTable.append([nodeids[node], nodeids[node.parent], node.dist]) + + return parentTable + + +def parent_table2tree_ordered(ptable): + """Converts a parent table to a Tree + + See tree2parentTable for details + """ + + # TODO: allow named internal nodes + + tree = Tree() + + # create nodes + maxint = 0 + for name, parent_name, dist in parentTable: + node = TreeNode(name) + node.dist = dist + tree.add(node) + + if isinstance(name, int): + maxint = max(name, maxint) + + # make a root node + tree.nextname = maxint + 1 + tree.make_root() + + # link up parents + for name, parent_name, dist in parentTable: + if parent_name == -1: + parent = tree.root + else: + parent = tree.nodes[parent_name] + tree.add_child(parent, tree.nodes[name]) + + return tree + + +def write_parent_table(ptable, out=sys.stdout): + """Writes a parent table to out + + out can be a filename or file stream + """ + + out = util.open_stream(out, "w") + for row in ptable: + out.write("\t".join(map(str, row)) + "\n") + + + +def read_parent_table(filename): + """Reads a parent table from the file 'filename' + + filename can also be an open file stream + """ + + infile = util.open_stream(filename) + ptable = [] + + for line in infile: + row = line.rstrip("\n").split("\t") + name, parent, dist = row[:3] + + if name.is_digit(): + name = int(name) + if parent.is_digit() or parent == "-1": + parent = int(parent) + + ptable.append([name, parent, float(dist)] + row[3:]) + + return ptable + + + +#============================================================================= +# conversion to other formats + +def make_ptree(tree): + """Make parent tree array from tree""" + + nodes = [] + nodelookup = {} + ptree = [] + + def walk(node): + for child in node.children: + walk(child) + nodes.append(node) + walk(tree.root) + + def leafsort(a, b): + if a.is_leaf(): + if b.is_leaf(): + return 0 + else: + return -1 + else: + if b.is_leaf(): + return 1 + else: + return 0 + + # bring leaves to front + nodes.sort(cmp=leafsort) + nodelookup = util.list2lookup(nodes) + + for node in nodes: + if node == tree.root: + ptree.append(-1) + else: + ptree.append(nodelookup[node.parent]) + + assert nodes[-1] == tree.root + + return ptree, nodes, nodelookup + + + +#============================================================================= +# Tree visualization + +def layout_tree(tree, xscale, yscale, minlen=-util.INF, maxlen=util.INF, + rootx=0, rooty=0): + """\ + Determines the x and y coordinates for every branch in the tree. + + Branch lengths are determined by node.dist + """ + + """ + /----- ] + | ] nodept[node] + ---+ node ] + | + | + \--------- + """ + + # first determine sizes and nodepts + coords = {} + sizes = {} # number of descendants (leaves have size 1) + nodept = {} # distance between node y-coord and top bracket y-coord + def walk(node): + # calculate new y-coordinate for node + + # compute node sizes + sizes[node] = 0 + for child in node.children: + sizes[node] += walk(child) + + if node.is_leaf(): + sizes[node] = 1 + nodept[node] = yscale - 1 + else: + top = nodept[node.children[0]] + bot = (sizes[node] - sizes[node.children[-1]])*yscale + \ + nodept[node.children[-1]] + nodept[node] = (top + bot) / 2.0 + + return sizes[node] + walk(tree.root) + + # determine x, y coordinates + def walk(node, x, y): + xchildren = x+min(max(node.dist*xscale, minlen), maxlen) + coords[node] = [xchildren, y + nodept[node]] + + if not node.is_leaf(): + ychild = y + for child in node.children: + walk(child, xchildren, ychild) + ychild += sizes[child] * yscale + walk(tree.root, rootx, rooty) + + return coords + + + +def layout_tree_hierarchical(tree, xscale, yscale, + minlen=-util.INF, maxlen=util.INF, + rootx=0, rooty=0, + use_dists=True): + """\ + Determines the x and y coordinates for every branch in the tree. + + Leaves are drawn to line up. Best used for hierarchical clustering. + """ + + """ + /----- ] + | ] nodept[node] + ---+ node ] + | + | + \--------- + """ + + # first determine sizes and nodepts + coords = {} + sizes = {} # number of descendants (leaves have size 1) + depth = {} # how deep in tree is node + nodept = {} # distance between node y-coord and top bracket y-coord + def walk(node): + # calculate new y-coordinate for node + + # recurse: compute node sizes + sizes[node] = 0 + for child in node.children: + sizes[node] += walk(child) + + if node.is_leaf(): + sizes[node] = 1 + nodept[node] = yscale - 1 + depth[node] = 0 + else: + top = nodept[node.children[0]] + bot = (sizes[node] - sizes[node.children[-1]])*yscale + \ + nodept[node.children[-1]] + nodept[node] = (top + bot) / 2.0 + depth[node] = max(depth[child] + 1 for child in node.children) + + return sizes[node] + walk(tree.root) + + # determine x, y coordinates + maxdepth = depth[tree.root] + def walk(node, x, y): + xchildren = x + xscale * (maxdepth - depth[node]) + coords[node] = [xchildren, y + nodept[node]] + + if not node.is_leaf(): + ychild = y + for child in node.children: + walk(child, x, ychild) + ychild += sizes[child] * yscale + walk(tree.root, rootx, rooty) + + return coords + + +def layout_tree_vertical(layout, offset=None, root=0, leaves=None, + ydir=-1): + """ + Make layout vertical + """ + + if offset is None: + if leaves is not None: + for node in layout: + if node.is_leaf(): + offset = leaves - ydir*layout[node][0] + break + else: + for node in layout: + if node.parent is None: + offset = root - ydir*layout[node][0] + break + + for node, (x, y) in layout.iteritems(): + layout[node] = [y, offset + ydir*x] + return layout + + + +#============================================================================= +# Tree color map + +def tree_color_map(leafmap=lambda x: (0, 0, 0)): + """Returns a simple color mixing colormap""" + + def func(tree): + def walk(node): + if node.is_leaf(): + node.color = leafmap(node) + else: + colors = [] + for child in node.children: + walk(child) + colors.append(child.color) + node.color = color_mix(colors) + walk(tree.root) + return func + + +def color_mix(colors): + """Mixes together several color vectors into one""" + + sumcolor = [0, 0, 0] + for c in colors: + sumcolor[0] += c[0] + sumcolor[1] += c[1] + sumcolor[2] += c[2] + for i in range(3): + sumcolor[i] /= float(len(colors)) + return sumcolor + + +def make_expr_mapping(maps, default_color=(0, 0, 0)): + """Returns a function that maps strings matching an expression to a value + + maps -- a list of pairs (expr, value) + """ + + # find exact matches and expressions + exacts = {} + exps = [] + for key, val in maps: + if "*" not in key: + exacts[key] = val + else: + exps.append((key, val)) + + # create mapping function + def mapping(key): + if key in exacts: + return exacts[key] + + # return default color + if not isinstance(key, str): + return default_color + + # eval expressions first in order of appearance + for exp, val in exps: + if exp[-1] == "*": + if key.startswith(exp[:-1]): + return val + elif exp[0] == "*": + if key.endswith(exp[1:]): + return val + + raise Exception("Cannot map key '%s' to any value" % key) + return mapping + + +def read_tree_color_map(filename): + """Reads a tree colormap from a file""" + + infile = util.open_stream(filename) + maps = [] + + for line in infile: + expr, red, green, blue = line.rstrip().split("\t") + maps.append([expr, map(float, (red, green, blue))]) + + name2color = make_expr_mapping(maps) + + def leafmap(node): + return name2color(node.name) + + return tree_color_map(leafmap) + + +#========================================================================= +# Draw Tree ASCII art + +def draw_tree(tree, labels={}, scale=40, spacing=2, out=sys.stdout, + canvas=None, x=0, y=0, display=True, labelOffset=-1, + minlen=1,maxlen=10000): + """ + Print a ASCII Art representation of the tree + """ + if canvas is None: + canvas = textdraw.TextCanvas() + + xscale = scale + yscale = spacing + + + # determine node sizes + sizes = {} + nodept = {} + def walk(node): + if node.is_leaf(): + sizes[node] = 1 + nodept[node] = yscale - 1 + else: + sizes[node] = 0 + for child in node.children: + sizes[node] += walk(child) + if not node.is_leaf(): + top = nodept[node.children[0]] + bot = (sizes[node] - sizes[node.children[-1]])*yscale + \ + nodept[node.children[-1]] + nodept[node] = (top + bot) / 2 + return sizes[node] + walk(tree.root) + + + def walk(node, x, y): + # calc coords + xchildren = int(x+min(max(node.dist*xscale,minlen),maxlen)) + + # draw branch + canvas.line(x, y+nodept[node], xchildren, y+nodept[node], '-') + if node.name in labels: + branchlen = xchildren - x + lines = str(labels[node.name]).split("\n") + labelwidth = max(map(len, lines)) + + labellen = min(labelwidth, + max(int(branchlen-1),0)) + canvas.text(x + 1 + (branchlen - labellen)/2., + y+nodept[node]+labelOffset, + labels[node.name], width=labellen) + + if node.is_leaf(): + canvas.text(xchildren +1, y+yscale-1, str(node.name)) + else: + top = y + nodept[node.children[0]] + bot = y + (sizes[node]-sizes[node.children[-1]]) * yscale + \ + nodept[node.children[-1]] + + # draw children + canvas.line(xchildren, top, xchildren, bot, '|') + + ychild = y + for child in node.children: + walk(child, xchildren, ychild) + ychild += sizes[child] * yscale + + + canvas.set(xchildren, y+nodept[node], '+') + canvas.set(xchildren, top, '/') + canvas.set(xchildren, bot, '\\') + canvas.set(x, y+nodept[node], '+') + walk(tree.root, x+0, 0) + + if display: + canvas.display(out) + + +def draw_tree_lens(tree, *args, **kargs): + labels = {} + for node in tree.nodes.values(): + labels[node.name] = "%f" % node.dist + + draw_tree(tree, labels, *args, **kargs) + + +def draw_tree_boot_lens(tree, *args, **kargs): + if not tree.has_data("boot"): + draw_tree_lens(tree, *args, **kargs) + return + + labels = {} + for node in tree.nodes.values(): + if node.is_leaf(): + labels[node.name] = "%f" % node.dist + else: + if isinstance(node.data["boot"], int): + labels[node.name] = "(%d) %f" % (node.data["boot"], node.dist) + else: + labels[node.name] = "(%.2f) %f" % (node.data["boot"], node.dist) + + draw_tree(tree, labels, *args, **kargs) + + +def draw_tree_names(tree, *args, **kargs): + labels = {} + for node in tree.nodes.values(): + if not node.is_leaf(): + labels[node.name] = "%s" % node.name + + draw_tree(tree, labels, *args, **kargs) + + +def draw_tree_name_lens(tree, *args, **kargs): + labels = {} + for node in tree.nodes.values(): + if not node.is_leaf(): + labels[node.name] = "%s " % node.name + else: + labels[node.name] ="" + labels[node.name] += "%f" % node.dist + + draw_tree(tree, labels, *args, **kargs) + + + diff --git a/arghmm/deps/rasmus/treelib.pyc b/arghmm/deps/rasmus/treelib.pyc new file mode 100644 index 0000000000000000000000000000000000000000..61ed9dce6b5f478f04a76e5d7e2b339b97cdb942 GIT binary patch literal 66688 zcmeIb3zS_~THkr@y``$UrP8bPEZJ^fwJodUmSszJ+iqL#wykpe(YD%slzMxRyOvE02e==Z4DTP1$psvP-*sTFqV@ z&K5?p=SH&1nWY=Eq4sF@!dTYXnN|2Nz09|ls#)ce>8YBZs%OvD_0-PpQ#8VV{WXVN zb1ZvqOf^5$t$C+wUfN#v$u-U_O?E46afS9~J?RErv^Hh!@$5Xq9nN0Qve&Zg+2^)o zt&#HTX!q*YtW__sj&-kY%V?2*tAKE$ z_xSGhS!=KRa6{JG=Y#iTtw|r;n6>u%ppmt%^TC0vb-fSXo3(E6!A)7~JwCYEZQkfM z5Bj*_<6E-U0arMbwchK4sjPLA4-UK9&8~K9)_z~sI_TpgzJ803Z__al(A`7s{866* zlPQn%cHcVe%6DX~Tix=pto1%09QWNLzI&&R)y{2x@UETz3@Iy{v&>&kwT<&zn}SV z);ghvTMzh!k7lh8`aO@iUAKN7y8V#vKAyE6^xY@2IUb=?bf?6@=!#y)t&>@6I%_?W zy$;EIBx`-xPd%Bn9`(UfS?e($oXT2{`{1MEBd4dHfU(T~G5?*Ks&KeAL~(I#ej(0A zb*a6+)>>`899OpL%4)m4Fn?}lxw%@jSJzbO87`gbwAx*!Rpa8r{0h3AwV^3!mJ;SP zrWXtUVMtYXWs@$phqG4A(=?K`hJ1ZAgWiD@Y$oU(UI4A@%I2(PT7Pa_bgnB~vi$Va zX1#Hl;-SXuLbE6u=R2#7CHhq~=9e1H#%iat)^0V_+pz}!xj5NbT5HZPQF*?-(28fy ztu~it=Nb#`r3-6wjYF;ZV(n0)xzuXRUM$u+i;Y%ut;uD|LvCZX%1g`NN%i6w#VeKU zl`NadgoZjW40OQ!#;qYutF8cpjOkDlr{*cLsbLCF$5L~#Z6MFi%`dc8+e^NBsiGw>MX5`la^m%d4Fi+Oul~ z7cR^#E|z~7HvA8|W{n6!1?$XT6{L=i|%*=6`E5_7(rJnwu6+Y>wF&QLV zI9T&==PTK3!am(DuA_K-t-ab@Yd6;BRy!9j%r!jJn7Q6K=Bbo#qPCyQ^}2UuM7(BZ z=00^zUC*ksLT0^JF!V^ij)VS@yjs&FGOAZZ1h!K>z`?3HTP zIhIw{kNU=NHhuQS>{V((;=|eeNcKuRRMQjutE_w?JF~=JDkSLLMOX6n+Qn56xY?NP zEMIDL&T~{!q_MWSu%N5+Yehg7jF}qMgBqDpBlqcu#-&lJoxHA_8n^MRxxCz7YE9|f zpwC@-JXJo~kDH>nHC6O94s?!kF`y6eD|S(2wQZGg{;yYdS4S#i*>L4Rb@nQ}+eyX# z*@E_7s3u}7?%-w#zh_o2wj1;3!NRt2uQ4x}F0`BH8&gZ2F6a&ifeJa+1VFgNyNZ?q z^n5X+ew^a4#7ezibEV&-x_*#i{XNE{3{2XY2`!CjpxGCMJ!9D!1|MvuxRdG+EiW(3 z!yTKA^B0$9*XBD*jYB#*)L82n44y&IC6G!J8-gMRtXCTuj5KaSmtY@7KEc62r+!Uv zSZ&W^zdEl6{^lmF%)9ROEEI0!Ai%(6d&0LtCZ&M+fGC$0B=+q!K8VVHAWzWRR z&4m{Y`vLIze3G_5#V_&qEtMUnw3u6UvyT(2GxqKflm!6iEK&V&hQf;@Y9WOLO>_^|-kp6)Z#oG6scai28$vf!{i# z{?7Hm;3~#1b&$~pLhMOw?`AHR^NvQ?US4e%C{Rr^zrN8Ibt;|^$7>ucJ{T&8r}_$% zr`AB`c9eEzX4IdVnFY0}>C9@UrmS+uT}}JUjL~3bX0g+{xS&(<#hIB_XLe>L7bD0u z?%djzIxMMXu^EhHuWlM~uh&sS#Q}=mEcs{yfj*t+&KlQ~OW|0fVX%&)kgqK$UnwH`1XI7NRB(*YX-qk${v&tWuRMugEYB7;ES zDE<$c>o*7%n@i0L^Gg>RNy{&m+q3iM=VwE!m&p+7Q_nw-p7GBk{R<0 z!*;aImFL`;xi2aA|NxvA|Jm1jn1YBqYqLTY4y=3nz8w z5efmLixP*j%2g7qDvR^47XbPYU_-WQ+|o;+;TB_YlGK^lYt6q9fVi*9*)R*{`& z%mON(#bq6UQ$2oHQPu;z;HmcNg`@!WF}K8)fYwrb{=(e3&T3C!P}{J?1Jeh@HOc9R z3VFmZqaImZ?X31`h0@$L3&*TA>FD_2A-t%e^h5ZIyfrD#`zr@4+pA&qzn9DTO)73y zVRb>h$q%czRmD+?UI%?N0B(_s@(casHy9opuGVYy5ww93^DRlLS#J10LF{3E#s3U9 zZ*SBAaDl!CJkbI0;(tXQAUO_kC5Gb=S7JC0aU~v2zC*l^Y6n*$SBMPIas&hjEpWv# z0R;r8v(;CK2H?NkX7@iGBZ{%ZN(jzd>f`w5E-?gL{@mSZe?JFATyXPqusdtv+s_9I^qMjX7jk6 zarqMyIAW(hVGv>hS6s)z&vjSRXqTq&$#8(r03dqJ0QkxMmS6ILfh(W;eQXltUcKJnP&3c1{-xva1r zsSSe4^uoWy&s5g)$kwmnC%H&9cxDTpiN^v1F{>c2bc{%@WiQmSj##bFabH4pmjO>z zE|oVddLY8jmtGZa@mT5P&>0@i@OtQExbNij)Q~zTE45T@nqBXu3{Xw2;WMk5)r4wd z?OzepX!f!trn-j0aFs`eD#@d=|7wqloqp=TTY6ONzwj)pBU4lzgpub~|AyKcRg0DF z_N1Je5{Bg>iNJ_sI+9?t*|sJvMVhct%3{W6!bV@35`?aF!boQtm=i{#?T7gl+bOc@ zL~T32iJ={}EtM@jhpW0_s*~xddGXYMGRaka1D7Aa!qIP%lIdeGYnpRSjtxU-e)Xm1 z!o_xR4@RE$Mb@3Ayntv*gHLGdnFM*{oI3ka}6u3BTeSC}d7xkd9^@a!&%i za3(w0R>>|@6iiufW^3pNj>aJ|Ul__dPf?d(o%K=qo-5fWWX*=`S$-d`{wcYHIY$

d{i1nS7i)~zojxwn2fl<9?g#oz#o|Qdn-om6_ ztz|oj=xxGQ-rgVC`BTNC{6qBb_xWwaAMq07&)?!e__OJ)@CR(UP%W|O8?VBmOU5Fm ze)9RM;z0@l54je2V|04~Px^z@Dp6~^b38CFaEn0SkWF0PB9sCr(cUIoLm6B~f9|QB zSUJLV<0l9UG9v19=?uM2j?&v0##>`tr&zlt$~k>x{*TBZu@ZjDPS;Rhx)|6)w){a} z@|GA^-~@irk(2qsG6LyP%Z$J{=b7x`+&1If_Ug@*{gr*yO+797Pao$-=)zmz+-7bG z=Z4=A&i#|CaPDUVOE%zKU>m4L{$CeWto0jJdN^rbCY&s&*u%*-4j7*Wvy%aVTZXhi zG6+5^w8T~)(zOnmgML9uW>BhwS>)Adyy;qK3 zTS4a=s#|LHnvnWxUE>4~34IV_h&()JA@bk~N$SN!!c4e9!jf*oYbec+QZP+Kl)?;X zT*L=t6I&-a6(AA(l8yYbqn43!V#aWSdp!m7UC>WN^oz|G+cWBwRE*>cL$p-3_4Kcr z2Eor^Dqx~P8aOg6IZtZX5>?PG8&|8Ac2p>zjQ^%PxCfU20{UerXLwzkL_NAL`-RSI z1RJLm%%Iv#WIK_9w$^U84o4hAZ3S?ohX-(kq3@E1HrHg_ArMKZWkcI5U1v%cJRjqR zz=M`l3#!h7sv&tm<~!K`0$DnTq|-S{yOUACAvpz-l#;H)JG0^g6am|Sg{=_-uF+m6 zt4+>VUpo}(Zin!|IvikA%S38g?83^~Nj<64r|DZi8oW*;TDL9QdJC}Lng#{Ns?;}XL5ciJbf>`vyhL<{%#}94M%zcT8Z!AY9ci{riSH`? zNzbEyGwR>N0*NL{QII}h2p}dtdv!nF-6uFArs?TaTv^M1{br5b*@eR)2dr-^)(R; z&gdj#!bx&b%NGB;PqI()*2lYG9UiOf-H2ntv*IC&H?Eli749*B`;|f7Gf2Lcrmli= zaq*mJuyo!6w?mrlLn(m5tv-%1o=BmB_S8X<%JU%Qf+RgsXmqim|a@ z1L%?+G=bDJn&xNJJCSuZtURo-kP-q`m#Im#$9#ZVlPNRSeC`3Pl5|H>0m62+Y>$jJ z{=6*@MoGMVTt4}4x*XmPWJ^Ok!8!{qyWj;Sc#e+W1%Hkh7}=0C-tIyIel}JivfGcD zC#hFlPZ8slX8@rJDl*u6s`6iTd6z_A>D_^4mF(P5Fdr)BIu{pOq#`yhF3n?-_lrL8 zow-+G&v748XifqtI2eet^fB65)To1t*HNKtsm;vlUAoxCfu}iPq%vbEH)ZR0WUpB2 zi#XqF?#|rrT>}nwf$XJ@Yy*J9Gb`J$N$5RNs3V#x(mx8}R;Y=_wxU6<9koD5K zBp~Tp5fCzAXd#s)Us$MeNSR4TY)6&tC@I>@*4n$5j}H`%CaLJP6=K0K`PGLj!x)z*x{iry}6pqbB)FLq{liwBr9Mv~o1%@NK42gpCcY z{0Yu;k?<87Qv@ND#9+Qg&=5Xln-T;{N1Y+?X>M$U0OHHOIq<3T zAut8Y5LlENlQ57|N?!-ZPH+^LQldVXM;pT-Bzn+u{KPUp)GICiAUSWLIST=5V~6V~-q6s)e3bIqj-NFZ#sc+6ZFFZcO{c~ay7 z8eUehyO_ntbRk&LtyfN2Rb;@(X4v%Uu%SdYF52gDCKT$fGLbc?u{Eu{0tX;y1FT=Tp$z?h6Pv! z165=~P$_Pq2&rr&irjj~DH8*gfUyrIX=mM$JGZt^5cjk*njDBAR`Tu-Y50<0GN0a8J~#mJB?mV_*DV*YJ5qsKFN7i_PVCAN1Gr${*9a1w5i$`cwYQDz|U}7zadpAv_tk z_6&6)nxba0h){gSE)A{FBAH~h>6E>I;GI0%!zVk_ls~3t21{F`yoivZa|(XyD-BY? z(!_>g6yTVJu&&sz6WpVB(GEjh^s>i(mZH^a;)Ncq92QMfxViS~sZj3k;ysbME*LnO zrU!^z5_+Hj$i*~%S_2m`XVr1>O3kjYxl2@E#mV|pX@;RFc*gYE6WQ|wZun=37O;`7 z+d#-!HA~<^B!>SKDF`@L@VoLmBXkLWHnf@mByIJi0AvB?49*^a47KE9l|^Lrj2N?4 zV=NW{v%0Qtr%*m>e^ye@axx4<4h^17-NUPYUeF9SwLKe0`rI7SN6?fYG|haBgY_q_ zfanM0hnC#{Xspl~&NueK8FVY1i9l1jF9Fmu0%s41V&lpjPQ!0RI*E99yUF@qdnzJ5 zwqK;*%Tu{g?wpPcNDTr&riS`JD8S{S2y-D`Fj$M@ZU(08kKGHKJb?17aBfbate z!d+!c(T`9Kx`d2+CKXA&3$4KL+^JK6vIjfoSCLav#Bw$PaiGAnMrLinJnh#6MiB)0 z#wbNQ$z2+;a&$EtS1n;4|4NyeJk{!8X|BI@E_(PLgISr2>9IcEHF#>%3;v}BD2A2R zEY~>F?aWN7)>C(oD(@&YzHd;ss_$yMXQK@iMV)Q^dwt_6=j@%$+n+dry>F;WGwPea zBO0{aErT7e-tzlbj921TKhK|3OO#ePQeY?y48Fy&7}DPkhGfMa2gvi5xET3jarPwf z7;;q^H`Y3o(L5xgxaNjaqjW9 zG-|6Z!7sNJrs@qePeY`X6WmS-M0P98jAbkT2(C%npIrGe_UnP@e>`j>+Sx`N(QkIT z!Pi9_ryW`-lyov3%g#bUl4q;EdLMp>rq+usYoP(lkb7&-ez#D`|ISTNR#P z*;}mMUItC_3Z~8s881F67|IYI1~Z<+ryoS)HsM zsO+fj=y_SY3`li@0R`U?1}KTWn)33#k^6mNFp(l&`}x@y!<1nK@_p=S|2|= zK;<*Ca5nfQ66`S?omJt{8?ObV2n{W?b7BJ_t@m35pM5 z4-7G`7){haW7}z0e*B}J>sJCow_o}!_#my>iQK6_&I2oi#rTV$_(fordG-dfo z6(3e16E%Na#R(NpsQ8GACsjP9;%OC+sra~xPpWuM#V1tA?b5exK~tx9tB{)^pH*=| zgz9S4c)j%5kbC7MFMb(9V)E5e3%91?U*TJ|JPEt2YDY2!hjJ?|9O6ehBds2 z4NdCx{Vajh|HJi76FVk$P8?vPgwgug_(*+oeFsM)%&P~SB^##Ttf{6@NwLrsF_pfKtMVR${(tAN2`O*|LQYj)BTaTe6bnMO0ov{+!? zC(Qa}PK_6sRpft$Bd>w;&Tu75)qTTyen}1VJ;{xMp>@7WW2d|9v`T)TUaa4r0u=Js zH8ex!a~!=}V-`9;yJ5^?>3XfC>@2_H849sS<~e4*((?y>qH^gy**ZoK$}2JVuAk@= zl@%P9XU;6`#_++U!SEqpw}PS>n|p#Z$w?`eAT6S?$le(9?1o`{Nx`8Kd6@L%s-$*i zEb(jwLlozGWPP@6YsMfmj}+%Mx(I$YV+QQ)mP1b-Q-?we!d=NXRNGeFjGU-tBZSOI zR1-u6%`QZV8>f&89?GtCMks6jImDFUOd1M~wxBXhRZ(0`1_fM7OG*BN>P?^hH!0U2 z3Y2S(kkr#FkI@Xt5aEJD`^VhuU_?+Q0@<|o2}Q2BKDBaBrBXh?OktyZ;VVDjglk$S z$jbXZ;mXR$wDnwBzb(3Tu-t{Ndq+JY)x7NEZ(!B_2AJ!z$B8|j$cWRysXTH*C zZ2yjcT%!QIK%|GF{dg=I)GkG_J)ol~ofp}hM6TRsxj7NXNgE8gJZ4Qh*iPfN+oGbR zH=C67@iN!)JZr8&1c#Q2101aH*9>8*{+LxfeQiDEPFdX|&PC;FeyPgc39^y!3 z5dj=wa8Hm7K!#W+Gwy~?CiOaWB%^J<%nNOoHR@vy(A__E_{e>qiY=)=Wnwy;{N1w9 zY{;K$7UaK3x5=dx_r~!;XMU+K$6bTTzd=upP^?~DTH+Cy1ezq_P`5oh*ul!fyr#*< zDoQeCK?8n}>)A-1RORi}@v3rX9PP(N6ZOEB^qC?XP3TQS^oq1r8>V`sk}0hy*ie?;vFE>+=$dhDsdeI6b&wd zkT{y)(qcG}CY|8^sIU`GB<}F)Xn&73%Ta}g>Xpr^BL2eB3|9%EjQ8JwT&We@fGSGI z(&{8T$@JzI-tn6ubWD@USI5QpG>j|zFkbe74s7VdhSvt8!twSF2P}rOCCx(F^Zo@H(x?3(-adSSOc-1q@Bp%ztpO|U)P3igFTAn`h}nk_v)e3Y z-xL1Dx>sZnyklz$ri-OBD}Sw8VSDV#5Emtn$0d(>^Jft=%=ahPc6i8Jv-M{+)6-`^ zmauSsYl*8`(*v4JsABZivVcoFjRsq@SGEdy;vS`H?@G5$to(HKOr-HmpPf?M?36(r zcEc77V36#Yp$FL0Vsm!l>6II66@E_R+p9O~-KV{mS@CzFGxOZ1wSjQMNsn@$B-0A! z9Bgd^`^L}lHXK|G*XENelATfdN4r~FYI7>pvH_>{&hiqbZGYV%n|pGpT-k(g5PoG5 zr8c%on^l?Xo7KwdF((ShM__RfvW4vd!`bbt;~0Pw(lq&T@!Q{Kfg_by>|4 z&FnL`?$9IRf)V(f%eC5M&p?~|zb4w}x!hHGqBp#N*$qqsJX$Z*P~rRFf+g<%Pb z${zB0Bc*0Lwa2>Ge3fh?c5Uch+s@IB?%5{2L04*fs#|Gk7aJRm^0o*w?yT%12P=-1 zurpTcdVHjep5#{;#`gGx!h!x6$&AuG`n?%ut`OjBwg1(;{r)g7 z-3a{SPU!C80a8cvcil%OLwp;^nmj=7VUA2JvqGCue$rJ8xc>oS1RO2+Xo|pCVS`bb z%WcY*W1lf(HK0UD?2;TzG8-YP{0kex5_0>4RT4T6k2+p=D}I%wFL9@voT#=CZE7b) z%DIpf>9cmv)1ggSWBTkV3vr|bouq`xGJnI_UUnr?r$?RWJXF1+ZNfyfz4BdUZ$cXA z!WpzLwdH>E3hjdRj+zYdztkNdTswhdFMNBi-lulL8hMlxn7fwafQFU~ys-p@s%`)U zQdiymAru%_XU`;ESlV^X=b5R~Q`?z7@4ege>)P#or6yl4X2Ms~ueqDSHjELuXgpZ$ zSgc4EckaXTFP8U@jOBk;$GSh3e`k65{;~cgpA9aDFG%jPSV(5Ch{z=WUWy2Km<&WU zrUkP;wo6}|>wB?Wf`eZ~?>_7mY1~Wu*{~ST2-{%qsqQDDC6=@mZFmn1OlLbPH>9IU zSkwp+5MPZw*W@1&lcq7u2EcL|H0~~C2ga|l!4I16W_B|6#B#2!lY4b^xu(sQO;66s zK}vNu{OR&8ZNjz})f()bM-M_{HTgiK$OgTo=Cw+(y-+PX4{6jg1lF6eMY2hYccHiH zjZu!LW0bxV>mTq5$|R^K^BPUXJA+F3g)<~grRrK}SEPeKz4CM5?9XLq)S{Ir4KfAJ z$~b9+UY>sPS))sEAfb=AkPt#RQv4zXH2FMMz9WZ>Q=|ncjbzL42MkH*it|0%n2x`U zP0WTw=oZ>>m=^LEIEfCJ0AiSfK&1M>lLm=H!roBjE(gnU6(ZgOye`>Zr{ZNl#Y5a% zf4>;T^x4}43tFu zhYl8ptop=;6V4+QBK2)4|Cmlcu0rl!*R$@@USE!V{3B3YR}GVp6auRztL?S$jRUjL zJMu{#byb7`7}0KVvBt!St}~WgTnA~4RmN*uklH(I`z)!)A(VZU6g#)V;z8c7-qafuITgqsWHBlNXP&DmLV2fIe|@7D9jR9Np4kaN-Tl^U2|R_I0-5(c3@ z%8`Gct_yHe{8Cz#s~J}2Oaj;dVC7vtJ+|<#%q%ZlViVr1xLI$u(le&Ac2i}dR|LrW`9NP$)W)D9Zzu*ZN!h1_hn%`X%4AZHuY=CMK~nv0(;ari`o*=nVWdvjXAz z8zb2pqu$j$_8;4n^{xcV81B0=WF`Vz={~i|SIVW4Z*U9NGnBnSJ3L6s>=-!gI$UJ0 zGs?IQOB&I2=v9R3ZOB-i5Hu7ND51yE=_z5h*C*Pz$gw=MDwQv8blp_iS=$SDo8^~9 zj329E`bVaZiQf$;RGw)P8P@Ru!ZfE2*K7% z{`DM9iFq24XXl!$aZ`4NDMZ1>!YDlm%pIj!VXj{Eyv>;R!-0!`x}!JHp1{J#={mkNugJJd_err_8GDrhJDpBuIEV1h^vq zbDZXXg`ziE`G2EReuLmJ`47;}4{Cmb8*3rlNL5bw4YO3;U)fxl)jNeyT@v_FPBc>x z!Vh|;zV|I>3RqPVNx)pd{i|k*%U^k>sPL6%iVFCYXqd_A1^`ZrmU#C)^s+ZOLeICK z9E;xD?l^Y*&b!`!_dWN1;Al$gILOl`sQg8aVrsrs*WSgM>ZO)@?7qWCu4IGeitjdQ^(IV;+t0lHgmr05 zoI^dgnE8DUE_>T35LjX%K=g_gTM0hf2~sRk))D525oF~0;qn9)CH99gsmd@js#&5- zyIoV$CA3s(P@dh*Rt$+9Ppd$@T6;~`fgW?iuEc~ zVh(7SO2gs~QghwSw{711Q>M|Wo;)-EJbd2~>%@`ZC>Vt+O^U8_8|@EUl}B^gDsX7O zYD)a{Sm?WdQMJ3%pPO>U)55g;VcoSB_uuJAs$6d>iT*6+|E;czaSoOh|5aN2WueK< z9I$;Xw&VngaV^`yPD08I*?-8i2_rrM}$0&gn*6wyuPUl8^WfEde)nw zxY6a5z1fHx2D+dyc;NPCP}x4eOSfW0LS>@53)m(olY1lD)0C%pxebP%>2XXqPEbsF zV73t4>%XRJ{4y(_`89AT_rQ9n^EoOTe+Hp#?WVDQJ0U*T#JA2ZKpRsO7P1&d3ivrH_8P>n zK3*aE8c$B2{UdW80(d+~_yey7TZWs3!L}y#W;}cF9NuL;@db2r)I0X@<(wO?QqPu- z(mCqzmT*!iaO4x!A@Ju+0v-y+TLE3-d}h*k!b#d6t^rb&b{LixpKfV1S67>t`qOo-CBu-Y z`4=c`#H3GT?$^a06>=8j*`;db(-lsEG8IHbt8fy1KBL;wZmm}H(VGixXTPOmpzP~)q;7?4&Qy0!r=lrKWPAD|N+tpf zQ7t+PZS5yH3d&$Dy;Sb-`zeI%L7M*Ny<3q-7e>L~(evu$AjI$v+KCSKk|5J~BlHL9n_3f-)r2K*Or5-Uq0|oS@3sKFyXt_DR8ZhI= zz#s*T3NnjW3YFf)Cq>FsXgM+2h^C|gZiHfTkOM%$Xf@_q_Ii+nsbCra@oz8Gwl|TR z*z<>DVMw#lH+W}fed?B5xOATZ;Wr8LdQ+j_VT$$l3i@ZSGc&M~+~R$u+h-5Z9`0ZD zEQWrtc;H}hDuAW-Aj>Q}dWR!svBU051BGOKE>9okz`Os!b9CRqxOyf3MqV5^{>>ca zqNSjC(cD1$ur@)9k^0v^ou4i{lhq7fVU>O-`4vjVMqdyz1J79I^;s{Fmce4i7AOzn zPp8Ik*gmI{5o}}yyFVb+(-#`DkBQb;e1Hd?+r7jZw#HS|wi~%NeRfYDV~4l%u1#?8 zg5}j;%*w3}xhltl8BB2#9S!nP_G2$|MVpeahDb=|eAzEh&oH+hV|Ew#h42sq7Gjyx znWE0KDUtaZo7SwZ74k*rzd(&(YCovDnx;?(LcKl|b#+;_mU_VhvvLU2#1Shmc3Q{nT@&C}$C?=|L7{s&KEx%o?4 z2YrG^r5`5vmfyrx;*h$f#G$VU{V>OVS*-|cX7v}B8Ao#1i81G0s16i%`8;i}3tFQb zWUMDr0<#gm}?KjbKwL>EK@g^oX)5+bo&^ZIrDu=3`aC6^bW)c6&bDFRk+ z5+RDPG#mzlv`?F(lC&SxoH!EU5~6mgKvO~5cJV=M zKoYt50Se8w9X1REX>Run-%m@b9e1F6g4o)~J1(oMfh0yCYk;h?CVi2n)EfMcu+fI5 zghhM}L3fp)h~^B_vnI?X!J^l^_@L?yml|fPr$CBxhoxiF$lje3y5<`aeYw=jK4g+q zFkxDscxk@C!f8c7Sw52~euK~@+~T(isr15(&E)qgRjl$Jf8^)7N@20wY8-4O?@liW z?5+#zat}xIV(l~JWVe&SUF_#^i1$~hHwblcGwn3}5N+aV@LyVfFeJf-XiI z^r-rEbGbpZB?fEU)Q>lQK97NwV_IxFPMS!TW935(LC6QZC7TK(5?4L{$4^ zyIHI?PNLoN(e;N?MrC8_$&(Kqwn%Ttw&;FV$iiG4UTm4^k)S*kU%D(B7jUpGrR*i) zjxDH#*=D2}9OOz6hcV2w@_DXB9-oNF28!~o;_HMuI&STN6wzbl!Ew{cz=-8ndGB8k zqTR|th6Pf`AaC#+mxQk>@~3U=^-~f7Kcqj>UVK@&sj(OlzY#>#lmnUIQnJ>W@bX;A ze4w4QJN`LNx`(-AZY50fs1D4hIvNJyznt4o-ytE2rgp zjy4qVaI8UX47&|ZH?)DdP8(b$wVyU(bJLaYqz#A$C*t~(aw0Mvve%(0l*fB{bzHLW z(4~V$Jg#n@q#^$m^(h6>ahH2&Y2_ExC@&k$*6*|fvaelUwikE6^x1uO&*34UGj*C{ zFN}M5f<;AVHaz85U;Pv>?|*6$P>hy>N@_+wx~FX^6f;!%Fx%O#y{vt;^+8f0Tw2K^ zE7_`UTvL`2+HK3}M!4$Q)Lqk>AJyy8L zQ^6;S8EEplsHe_i_u3}Vp?o%Eo|VEcsg8v?2LpqB{?BOaO9HugsPakkW) zyWB>}HUgv-JOOg93&@adzz)ilKt$-B#PtVrKGWoMLh$*jfg_!xzN=A7A*yqb*`87!sNR`D)O#c%TN-_ca;;=of;RWiP|Fx^tQ9xtyaq?{Dv z;4@T24-uGQwEz>PgKtY58XDTx?gE;UWD?Kiq!*~lKVhN~LE)j3KSZe7dUV>X&- z(d@=2Yn$(p#-{h291P}Wa97KH77(|bUop5p8XsJhkaX}NkLmdab1 zRU~s)Bm2+& zd7l3C=(JSn6#Hd|mo&5@NVmDR$Oo#l(L`+h;vHn1_{VxAaa2Ejh==re5&OS%QA}?$ ziY;-Vff>bOVA6lMME?|U6mKFHr)Zj$lkw%ykfH`OQ&x)P{vZpg3M8e<JD9h8|c^+NyEw=We!X*zWw;KVCT+6YO@hdpLcaRYM)+camT6MG8m+V<&wy zH-k#Hi%Pf}(&vOc_v*Z3l4@Jo ziE`ZXZ66=gxG~Hd^$Ttn3=L9s5=DKB&Y=1?kR zj1&xC)9W;e5W)+?jdahhmgHob;+?%I2*7La*_e%uQ!uMZQ*ZS%1G>aP*E0Z?g$Bf~ z373sAiIc29Ew7ST()8Ks#GD5k-S z=t4?}7cfI>KcS;1x;NC{Nc_>Pd!oFDzn8p)X8NyG#X?RrZB8}2(Ba#e*u{d;Ac))i z>7?+&IuGWrmL6`l#zXX)#RrD4hNT`C`rSscev5#2_D0dMpl#m@(CB_ykC0G=g|1L4 zwBp}M22*M?|NAP`vIjJ%QEiL{eN@BK+v@0?X0c|w->p|$!w|beaBt#2T2Y2Vnk>h+;`j8aphImxfkC%-N* zM~XCvp&ql3iirW~O=J?gOb>zk;G9Yh2B$fhSmBW0XRLy(zbGwa`s}Qo4SpJpCAk)c zP)PK6O<$3TLz>v6-TI?d-qrO^XoU~11XCT?l_EgbAbVZngBL4Lnr1p*9hv}?w+wqR zQQ`>L(Cag06Voh7F!$7fe=N;5pzEBj{Xf z9rnIri}A+Gb9f(Qj>=cQ;p<+EoZj*STZIkbKL8Pu8%^_`KdZtX2Qv~Q=##o8)b6bf z^aAW+esOtW{`@6>hcK7Cjzj<@4}#&W?diUCMggsb zc6@)3WTGWqa&DIcUh&7rmdaOx7T*mZnc+A42oo%jcx|fYVigIoG5A7IO1dz z8~fkH-A0;b5?-ht_c|bA18w^3tv!1f>_rI_BO^IXoE{_&b$sPcsN~K_sc|T#r+!<` zT{em#&SWqr`|TI$(3Loq3}c5+UT%XJomDnAjX+5mGcjTmJ#IUIjrL(Mz3Fz)KZ;+=*4Jv<1!xQjKu7aQFvegqCM)1GEEBaE+ zgz6zccIbu`C$sCGxYT2UgRyr2oY|7{TZEz_%GkquE3t|GWxEzBQm|s{QTP0tbb3OC zh%|(Xd#QT%ZUg@FL1Vok{0BWCu|I&GuiZd9=__iaM@WO1DKX9JMggBrVWT>}?ja}) z1z;zMYP(0A6sv)V0h!8V;f2U&vLGy|E@G4Y1R;|z;At%j=2!pNIn3BHevkW)ud>lng2VzN!S|kCvm7)D{CS7?{(LN z*&{*)Ap52~THsngwGNaZ#k7(UJ#+5TOrYX-Ye3ue+Rft--cGDDt*PHt?S9}+0{f&M z75jrm%I_x2=cv_ZVw@6rNfpGOBoEXshkQ6O{Xy!J6Dw2MXKbZOwpA->H$AHp>0><2 z(mNE+A&Ut+fkN1HB}o1s=}Is)sj2xNs!&5=3meAQ>zeA7T86$W5_-sWs>yF)sIj)2 z#x{cXyuWi-{Dimh?ky^m1sA(A&K2k3dQKw-aMQWaAeu9Tzwl&_nG4FryIW3ymzdzf0p~c*dX3 zo)UTB(kZTu*7T85Ym&p}gRWi!8h1)%g2RV$BuLR=0-%I<2zzrBL2fJibeo4)?zXxw znNK7fmkilSF!#VptP_v}RO~*QwzkW^Odms^m4O;0twDq`psBOdjxQQ^mYnaj+Lq4h z&544}tsz(_+tQYHz0fCyC1hhqdLEkz=G-2v|L^C8deXZyRM$Oh;UnLR)-G*Q201Z? zO_BDhu4!!2r(8d*A>ap9EC0YK!0GP(bY@$w#l=N0!nCn4rH10EvNRx$|ppKT1SI#4?EG}Z7sQTmb$tGmJ&)f9A&-QH#DiZ_W_?& zvm2++?)Ez(aoyf@MYDeb)MZ~tp|vsR6eMG|0B&mFkxXvwuZNrV=&GKTyhZH3a5i^V z$x+j%82^ZSf!ab*-$W9epFx9(=vMGZf006nUd(pbBigN@LcExNzn&N4a?sL`E_6CC zUR<_(%xhr56n6(P^=|REH0{@`MNoGK`H({6z12I(nAlevv(soN>ae;Rp&BvQpjbVX z^GjVN%6vS!+Vh9zirU0;A{DR(f#&{4ojMm{U=}JBooa{!&?nN08M9KSApNvPYEfJp zsoq)Ly<^AFZZ>V)y>nJrFLc)+Px32%h9b~C;V_bzP|tVi;`G^XO&BYQp(GFZA=rqB z(ZVSZ(I>h?1>6)%+KdIJ>JvQ!1`;dTM`ddI#A#}qoE*2phI}RuJ6a$I%X%>1HY*6L zh9~%N*~3!Mv&K3+qx#BLF~ zQLqs&F(i9OB7?#-UvNGREP5wzq)G~XOF21 zYm6;&ek2zc?9Pq_Ip(_H*ZPPuUMHJDwu2!qYXSMxZt+l}`tS=0JeGyQ^hFzk9lH42 zd|}^FyLAJ2ADq7z{M7V$aE}0XnCi8(Qc;Kp43R|Q0UTQ+hZHt@!!CLA61vA`W~e{u z!g%Ljnp#{13UY$$BwCdAa#G92ci#p1(_Lv3BEEQ=Z+5{ANyR%@odkV#!-}kIHqLg4ZI++J zj`)Xl%k10K*d@+BCjDhq@eMbob_R=XdVP%bCgIw9Ikm#hTJO7wMl{H4-O693$M{E% z5?@gf4odUa)5gW`f=u4RLIULDTB56|FiwKA;NkMejAWgU@`pT6>HlFfQAZjtAxaQ{ zOXk@VD^JOUk}6BqF)}%|CdWmI*vkL3BFqby<0DPocaj3I46{c!0e`e17wn9iAieZp zt#wr|#rQY&(n+PM(azuFF|V(S&q48K`P>6XC2gPvxIEk0MkOy1rqc5eM%`Qdvdhp= zwwp@YiZ?KmXJXAQxpclc+;@}KS+pyvpxv$BQg^eU!@6Ov(6qbDYG5jw0Lyclx8_yR zp1$;h6)#W$<<@LTkm>cA%Uj$wwb-VZ?PFX)cnpa9w8?tnXp^fzll8<@8t;Puy<(R( zL13qQfN{+l;BgOU!b6)Vmy0v~Pp^ELq&0qL7y>Ovh$5}1xdmLcEDTX=6HYWPACS|0 zMB%PrGWQqs$hl4Kqn-u+W!eE<{ojLT2B)S>?ma$W32SL;Z-|BK4k=J@zqHWzF&*^? z`&ONrAD9(5wD5$kiNbP`yth-D646C6qv{bs&;0XH6|A26BSKKTs}~|1k}p5dS*QK{kxTVd%Qby_HEUy@S>5wd?ph;=L@$aEFgM z#n@ZBYBzCh3akCh>TC#GuW7Sb?OC9dWNrSsoq9UouFYdr*C~VbP&T&nHtmyF<5w)g z1Lqu`U;m`>+IIf**(b`S4>)KMg)Ewt&gsQoGQTS1PQ(rdZ*UE6<|H*Piw!~*{OeUa z1Lh&q*Z-+rFn#tPZETt)6)Q{OmV!gN1)heFp703gKUewkR zzm5g>u%pSndbN(x4)}|kWMbvVWcP_&ts7{?FE02Y-3qj=ILo3(*};LWAbA6u82H6(Fge1~ym5a` z9##)qeG_zK)bK!#fH^#H{jb}IWxpoed4DC=@W6t%bf44)H|QnK@X{y#>N26&i4~jW zwQ9A3=Yow>Uk@8(Yu^;XIoUSv4VDW5@Ns#PXakJ~^S>4{QzOQ^AER8n$=8!$*)Bf6GB4%5;Ju!`DXs{Q(d~I>QwX7T> z{OBTcc%7><{qf%{T&cUc)}0(X)pY%K_2w+wX&OK04GpGqvPA;NUb^LD&RMaPR-=|N zIO@`CwVIaqj&e99B|35szD=jPZF|C-rw+{dvoQ_fi{b}*alPXHM9==1|K|Tlg>IE_ zcrf27_WWfobs=J!=7LS|_1%UDGPs0D8n;{&Z!=oj7?@*V;}CzWJH+U=I-+6OF{H2= zv&d|L>!+tUORA%4<@N;oNF3gG2VmLzYwbU-Ug>qGrew#(7ojX6e@~|y<)YD`z(>Ox zEul)J4~m~hqSiqj!8WtJN^kA2@FOgKWxbmhx_DdRNZir;U~*79@~8+YD2&T*;9-j& zQcf)RG`kpohb^+tX9hoEyw54_A;HD$tv030wT7723MG)s#N=8%1{ zfevgCStbX4q^CvNHI77k0TCUs>8@T?d@Y4}(#QEYK+mmCc##i{%-Ol#?7G)+)iu`U z36rli7nk|&5DRs?neJ|c#ggo-QZ)EJ(W>@hSe>8s!$-qu@??Zl>@ZO~jXQ!Je)%u| z+h2O>+|wU2@vB>!QB%>6aG-%(mMdtA%;~Ueguw%H7EN34Itlr8Jb1oQiLFkV03cVM*hlpvEhn57cxvo4G$9NC^WE653EAb!$tnl%=cBF3pHEon{h;(1E>II3l~T;#Ly%`C z+?$R?9ETF!sTkHVF_Z2MzhwZuHJ$2Xd-hv3FSLuFSJ$N_W~15yOA8yus{6L}ZIP?a zp5#|dQ>;Ir37kIretYO0gzAuu9+M9amdi!;CCd}7D@&;`57U})`!wF| zuS{2=DL`Z;VR?Z`5C*OIzXanEP69c^B7)}%vi1lDcvXE6pfvRXN^|lVWyjA5p#NF` zm1xU0iP4R*PGnZoui01c3`&_M>;F1H`Oo`6`774~B>)t$-2t(g`heyYkVaJy!1=Qq z0tLoe?DKi*L(0%RkvOj(rR1A>qGo($P+qBEqt|LqueT#$$ZB(`X`>0FLQSlEOH!#T zkLf)lY`f*xJ$m)TKl&JE7AiI!VNK)8@1pTb>mSL^w7Or?l13P-##5S7cV;6suD3os zqRqnAE~mIq@)V{zcSjo#XBPBDNHK}!~#NyMV!irHn zrnpRNwfS-og_E2jG@;7Ne1d9iwzCkQ5sAI1OunDzW?xLZi)5DRW=Kt!=LU;)!mQc3 z_UwyBwm%fIneo&TvSB`6heUkfnN|r=_K=SpjiU zXhHlsrFDY_OmQUElR>Y2(AJvg7TO^cpP9qk-G4c9&@3dIN2M( zl4FjTVH6h?{!UvJ%&#`)TgtcOQ=1(XceO+Gso;UDR20L1tHK zb81dmetcHQQ*ii*2D7yB1x4ewW3HXvu70T<4O_<>7t?alFVd3PdDwh3oOGRA8>GOp zKZidW4~bn|>WdLVI;=}<9E||WY`XALHywY3e?gXLwRN{++mG|KVPeRtV$x{ zNnY*6<=8{dqt+6EPx16wciLNQg$rA>5o)|RL8$hk`p62Ggtp5`&-?9WG0Vn_NSPI3 zEsJGX*8q3?msbsz>TNbaA;$xNGw#7QdLQAYiD3tu#_TKFZ3TND0a-6jAE$>Jrpf;f znBJN-MHIa`_yB9m|6cHcpEBRa@lnYd&!ZyRs{tiL_pb$Okki9_oqJaB95LjkH!1c~ zZ-sA^W=l=Ge{qbcrsPmO8NL_{bKD~O8XepQ1=9OKrmvQ9a$^0cmx#l0=nwlbpt368 zM%@ZOH8B^vQ}CgjSov`_DcZyOAQspm*&M-{K6}!&amwJ1!9Rj$p03G8pa-nlsAns` z9n6lVHiv7RzEBxI2Th>EQb8YwOu?6*5C~%#kmxp%mggbZ)-!U>27aB_1}9oD6p2P6 zkI46n1X^<+p=z00>kGnL*ujgzwk{Baabv2NT64Czm|D@tlTM3mUWpmzfGlio&M!nO z@N95J#cju6$1r%g8Dd99uXtGCT5R>GyOxX2Q8hY$HJb)oaGujW2a+VgWATdcEB~N` z=>tM8IU6^OzUK@Mm=*bhs%#l*^0Nv4kgm0P)~O2#%LT5%(@{SOc}>L~q9aAYvU(i@ zRDv|Nc-Rj;DJw@R@h;Xy?XGECI;qJrP_5O8Z3r|c`4xv41QHQfqnvjHiqqGU_3qGUrH{DXX$GROu0myJ?Gf4Vb3uZ{z3SLKQ*f#{_BClyK{s5wq9w=R{_#(*GC93~l19*E4AcTJsy;n(c zxq&Qi&38(v99NZXG1W{gCz`qSN?l-szSM04fy@p3v+0BM(y(5hn@5d8+_@`_#?&3R zk;~j}9kw1&>K57;;~Hwzf3C`scrM&jD$9{ZGc+L5xQ}V97kR(5Exodjf%NgpTdkp# zaCzt2N>|x>DCr)>eIBWdPivV*f}q|} z{+AYQpayA7uHfR-N1u7*J`Y}F(83Mxw8N{I39`xCvrQg|3at)pt4h+6%h9?afk=~t z@|!v%h^o<~+}Y8L9bii!NsF}jNQ9gJClv=(T(82?!0B!FqFO?jw~IaYvE?TC=jF6zrJAXQYJ>Rtz}B^GcyP<8-ff>sg6|=sKFHt0g3r=s3*!uIzWs|e*dCVyO+2WSU zWEL5=6ihQR62qJl>y)RBYyem=Ok43dG!>Y(61MGV!x<+vNIaYUbPymjs5*q$h~DFu zTK{;w^{EYSg?S@4O4W$U#II1zQF60sw81vJgqUm zt%f8VHx6E*xOkSY)>~*Zf;FMANV|w7nome!sOePjj`cL3($VuOeouww&^uaRsj~P9 zt1Q;=h0(YXU~-|Ad{|Apj#U%QdL@7wR??-9VJ1kyH^W_tWJr1SsA(j@LxBrw1cdgHx`1F})4w(ad=u}lvzD$OV=j~QWmKXk%NcNOH#S}&M@}^RM zi<`E*ViWFUc~LVgtz@I&rn7T~t6~_o%FxB)jo#?4!geCzm60)9v?#9Fl0K{)@Bbmm z;ZM~cQmNUEeLsu>%S;`&J)Yy0-VHfjHaaKIVz@u7aO338`yW#!Z51^5S8 zf<_;{yifB1J&-4XzL;cYb&v0I@Jw@EtC@_E^fa5a1JyNm< z$9=Yj+g07s=uY!19-;`aA&b3zAM<5Dj914XL%m3NNm<|mFg(=@TO{bz8055Qd1`d!_{j& zmg|XpO#L(6x`r~(dfBAj^+F0^TDI^rdg5bxLK7Jy@-%A>tuYQ@4*k+a5OszOvUdB| zH9AR~EEK;g8`4+nTeh>SU3dm|RQFYpyz0Jwb=BlotWwNDq9ZOhxg^XSi(&bWWB*1d z5oPU?u_{66EE$WhL{b~lC!2{#N`GkD56B?^i?0#Yl_BkS<5tL|SxvXeIUod*9Kh?| zt5EyxqT8~hT#jLfq;XDNDia4wD~+-D*-DYC*9~&Z4TVD~8gDhuU9$afR91z#xd~Y_ zEq-P~{t*tq+{@^-+HCGn!heOC5RP$qmZa!z$Xz?k?LrcnyhSF&6&ZbMK)i=2*Jabu ziKV9*eMwThWm4bvikg%02kL?wPEPLQ!Ck$oy5uZztJ~TVU$JcUFg+bAPv3eTqw@s@ zqyHY9>T#J}oJXv-%VqML!Bx}2Id(}9IJ(x(@|q7X6|>EScK%!38hjY~#!=AFQfDYO zsoU>VzzIILyq`~IO@u>OGbG95S^5G8a2!;Z@KvE0G9kMF8j2(MMQL!^&7C-$Ov!?XT(hIR$&`Zuvqt$ zxVq`2h}rq1D!rd#iaLV`82gQP(TfPTS8n3#W!gYqxdPiO!N-sBs$T3NKdU~tBkr<# z<1_|Y>+9X4*wG+Vpx*fWnE9k0i!r&qui|tNY*QBcLh}+TzXPXZ-H}a%!Mtx`OYP?R z&g!fHuV0r_Ugl&DPC4vy$fd&(Zw9}F6|(X*l5-r8kmgg;H-s%sKy1V8a1w?q{#ibe zSSW1}y6*DNzNh>6=WxkCzmpyfmVZ7O-W~HzV#|2KI(YZl80O+Kk*kME5DJHt*zw#v zpITZ4RpAYd*#%ZiwTU`k2yT2eXZ;eL{gZLlTi%+ps^OsRrDEVb7OhlL4OZ^&9mfQg zDa7&CE*lnUz9kzrP{fKQ{qE#f9H3a=FPu0_B0hsKUsFEl-YY&wVznuKiNxIe@AGJ| z2q+`V|02Z{zd;bm#%#!iQ5w}N>&cWAD~siAa#&nTnBI? zUA&&oWE(`tR$N_4h*WO6h8X!89zcvBFyQS$nv?r>y`AK+0(_O^ke$*mIc$cMl7kZr zlCI<+*l~sA@Qg|{#~L_ck(Ed1ljhF>&HPg;o>wu!-9Em#5w5uyrI!pSC6mo==7JTA zi4F=s40E{_l1$k1Rx*JqDVymUG9f3^D$75u?uy6a1CFF5_p)z%J&_B66~96OTA}zUgY)ZA9_%!E zp`LXfz@_<;$bI_k(G(_wu%$M}=8mWt+!V4%J>0bqmhd#YMSI6+?QmWuvav6kwUO|M z={0&Do#k53e-+PvRiGRjM$vvKKZ<`nkDqOm%Lts?SE@!(a-=XtXHKkqzct*Xw#&@0 z`fNKnfYwmpSdzdukan=a;0(rPqqYDPA1)HOth&U*KJ1)7Pne;XT8NJi?Wi@sdIcc_ zdT?(b)2@#7PkT9xuhWz_^p;^+i?X5E^8o*4SW`OMl1)^u52qS9XrAurSHGFPNEb3B zro``k)GJ~bdN6XvIT##$eu&1tMyP7RrYtns>Ma%fQ!^-*gK#%C);q zcJX1;HO2B5kob%9xSKD;$zpTa4VOc=UVanLzMbz!>au!r%8#o*0+=7az=6;+Tnq#Y zF8lm7x)%#er^_wZ^=SbrD#4;5hZCB=yuoN#3Cr#&_G;U;5?3`P7k6ZyJ_dot0S7QtTIve5W}|A+j3QiYwnA#4dA{0=#YFI z%4O=T&H!NJwtG?Aw%K^3lRCeX{EBa<0JGyCZe#F!{Rtt=^x20~D1m!28kA^Kdu;%4)3SyKK(D8JBS_i`ZG~;!gai5Q= z$Ii}|pV@M-8%!#?=}g{YNbC*QzrMcLE(&t|B1_7NdX{W1u(|x`hK&sMRYfu(_2`kv z?2CK_Tlu#dNd*J}!_4llasZYvK&^J*qdyudsp#bwe0UBw#xl!qVQ!}@>&r3@rq9k9 zRu0JuWkFu)8MwsL`ePdLMq7eS9RA_ zge-$#nes8&IV^M($J79RLIarKPBs#$0m=&7THRaQJz?|||D;OL>bG(LlS7IID}#qZ zvFT^C@vM0-S4^NcsR>D`S0<3tfez|NGMsqVu?bX>a+SV)2)|YKFM7Zl^_+ z;-E2y0-%q=xlhFKxVfokZ2&WrOE~_HCPX|vt3-;w4)Z^wq8mSqsDp#a3K`}v^IX12 z5l4t-c-^Zy5XSLn3UULB;Eg2clEnIB-)gDI-b)5$XFe0`pNFl|IvidhP zNMju`B7%wsDu?;D^tRf(dJ#g6KCmeF5PFp|Gxd|BPSjsW%3Zd&m7TNQ`H4IP(t#?@ z=&i~rhg3*Gj8O|2TxtBn%QkB74ZJLNbH|ZuBS}UOxEBsP`QgAjY7@$^6HbrSv-2!f zj!;J5V;Dzp(!Km_-_t3=k@QeWFAB@yxSIi3Zal438I5M+q0=WHf4uP!Tl06* z(^&F>ut{l60-zr-`U)wY55X0?Tz`ycM2bWWJCw5>I%8u~!EFEVEFi-eoR~rP~ za55>T1K+#sy-Ad_g35E53~n1J@IPEcOx~nEh1+)~&KkS05Z7j!r7-+Ct4F+HAM6&+ z%4Ok2#JyQq#EhwVxGq~2!9!Vm?+u}Kk)#T0Q*w)5LK-!%k(5TuV<=*H#OM`C=AO^J zRDoyjLmSw=x36C~&b*O9yjMG%5PGS_hPUd>W+SOS5fhD}py@Ne43lp<>z$Gaewbc~ zmX`i0qbi|5?iL+V;0ylGMM^*ue{_#+4v3Qt;iw3xmDl!?ZJ%HY6nToWp0$u5f+2$& z)5qNO-f_qQpWS9ti4(-MFN~r+V}2bO_G=)yl{0FTomMtZ4Kt~jX7?7p`)H$g ziI)S3UM;J*S+6(TYRXd-m!p-W4Ji~o%%rQ0VFwsxys2kpzZ7#jkEPAfXTy*X%dpGyd za2EZRo*8(wYqv=LE}G7&hiiw4aam_{Ue69f4*+_-g^9mEN~4id#Qrcq zYj{Yy=f165YPTNT%f7&~ywygLM*2~H#YZUSoF0_CJWwj^LB@~2k^?t{AE5#45h-$1 z!#>0+N5O%pbS&Vmxfkdzp`7zybt= z007cf0Px54%Do%`n-m`b^K#0nikv)c(uK?rJU4YAtECn9N6s$SVQrY3P(2btpU;GY zt9o3H2eHe`SxL{r!r!*i8Q8`(r%vNNl^>!^^H*r}QwPb|KX~3zs;PrVkDu4sl=Q{C zsp6ZMM5o#0U(k`c8EBYR?|e!}!qG^^yT**?`WWiFHQC~_y~#dDNSuAMmx{+UEwA@N_27}Awo$f?<=R)YVxwCHi)Lyot_{X=U%@9!YO)$ z(+AYIBAIs+fcZU4&U1p6q-9AT`mW*C5CY9h2+dvrq5K^|NGataiY}k&9?hZ; zrQ(!|r&WBM!hV6rPklIV>Iz&T%jZb_zS zI)l*t#A2s)aiRS|G4kSXQjD{NApKvf;a}T|#T9=y4aMIYta58*`{t$Jvi= z8{Rft-%`J4+XBzwQ3AKG^B_`<~f;|J=){6A5z*T?I->ettA9v>UuUf;>l#Q47P zsqy!)^~sIf>f`s+Z>kTEPi{L^e{kZ!aIHSZZ_Dt=_ 1: + ret = Dict(dim=self._dim - 1, default=self._null) + else: + ret = copy.copy(self._null) + if self._insert: + self[i] = ret + return ret + return dict.__getitem__(self, i) + + + def has_keys(self, *keys): + if len(keys) == 0: + return True + elif len(keys) == 1: + return dict.has_key(self, keys[0]) + else: + return dict.has_key(self, keys[0]) and \ + self[keys[0]].has_keys(*keys[1:]) + + def write(self, out=sys.stdout): + def walk(node, path): + if node.dim == 1: + for i in node: + print >>out, " ", + for j in path: + print str(j) + ", ", + print >>out, i, ":", node[i] + else: + for i in node: + walk(node[i], path + [i]) + + print >>out, "< DictMatrix " + walk(self, []) + print >>out, ">" + + + +class PushIter (object): + """Wrap an iterator in another iterator that allows one to push new + items onto the front of the iteration stream""" + + def __init__(self, it): + self._it = iter(it) + self._queue = [] + + def __iter__(self): + return self + + def next(self): + """Returns the next item in the iteration stream""" + if len(self._queue) > 0: + return self._queue.pop() + else: + return self._it.next() + + def push(self, item): + """Push a new item onto the front of the iteration stream""" + self._queue.append(item) + + def peek(self, default=None): + """Return the next item in the iteration stream without poping it""" + try: + next = self.next() + except StopIteration: + return default + + self.push(next) + return next + + + + +#============================================================================= +# list and dict functions for functional programming + +def equal(* vals): + """Returns True if all arguments are equal""" + if len(vals) < 2: + return True + a = vals[0] + for b in vals[1:]: + if a != b: + return False + return True + + +def remove(lst, *vals): + """Returns a copy of list 'lst' with values 'vals' removed + """ + delset = set(vals) + return [i for i in lst if i not in delset] + + +# aliases for sorted function (which was added in python2.4) +sort = sorted + + +def reverse(lst): + """Returns a reversed copy of a list + """ + lst2 = list(lst) + lst2.reverse() + return lst2 + + +def replace(lst, old_item, new_item, replace_all=False): + """Replace an item in a list""" + if replace_all: + for i in range(len(lst)): + if lst[i] == old_item: + lst[i] = new_item + else: + i = lst.index(old_item) + lst[i] = new_item + + +def cget(mat, *i): + """Returns the column(s) '*i' of a 2D list 'mat' + + mat -- matrix or 2D list + *i -- columns to extract from matrix + + NOTE: If one column is given, the column is returned as a list. + If multiple columns are given, a list of columns (also lists) is returned + """ + + if len(i) == 1: + return [row[i[0]] for row in mat] + else: + return [[row[index] for row in mat] + for index in i] + + +def mget(lst, ind): + """Returns a list 'lst2' such that lst2[i] = lst[ind[i]] + + Or in otherwords, get the subsequence of 'lst' + """ + return [lst[i] for i in ind] + + + +def concat(* lists): + """Concatenates several lists into one + """ + + lst = [] + for l in lists: + lst.extend(l) + return lst + + +def flatten(lst, depth=INF): + """ + Flattens nested lists/tuples into one list + + depth -- specifies how deep flattening should occur + """ + + flat = [] + + for elm in lst: + if hasattr(elm, "__iter__") and depth > 0: + flat.extend(flatten(elm, depth-1)) + else: + flat.append(elm) + + return flat + + +def subdict(dic, keys): + """ + Returns a new dictionary dic2 such that + dic2[i] = dic[i] for all i in keys + + dic -- a dictionary + keys -- a list of keys + """ + dic2 = {} + for key in keys: + if key in dic: + dic2[key] = dic[key] + return dic2 + + +def revdict(dic, allowdups=False): + """ + Reverses a dict 'dic' such that the keys become values and the + values become keys. + + allowdups -- if True, one of several key-value pairs with the same value + will be arbitrarily choosen. Otherwise an expection is raised + """ + + dic2 = {} + if allowdups: + for key, val in dic.iteritems(): + dic2[val] = key + else: + for key, val in dic.iteritems(): + assert key not in dic2, "duplicate value '%s' in dict" % val + dic2[val] = key + + return dic2 + + +def list2lookup(lst): + """ + Creates a dict where each key is lst[i] and value is i + """ + return dict((elm, i) for i, elm in enumerate(lst)) + + +def mapdict(dic, key=lambda x: x, val=lambda x: x): + """ + Creates a new dict where keys and values are mapped + """ + dic2 = {} + for k, v in dic.iteritems(): + dic2[key(k)] = val(v) + + return dic2 + + +def mapwindow(func, size, lst): + """Apply a function 'func' to a sliding window of size 'size' within + a list 'lst'""" + lst2 = [] + lstlen = len(lst) + radius = int(size // 2) + + for i in xrange(lstlen): + radius2 = min(i, lstlen - i - 1, radius) + lst2.append(func(lst[i-radius2:i+radius2+1])) + + return lst2 + + +def groupby(func, lst, multi=False): + """Places i and j of 'lst' into the same group if func(i) == func(j). + + func -- is a function of one argument that maps items to group objects + lst -- is a list of items + multi -- if True, func must return a list of keys (key1, ..., keyn) for + item a. groupby will return a nested dict 'dct' such that + dct[key1]...[keyn] == a + + returns: + a dictionary such that the keys are groups and values are items found in + that group + """ + + if not multi: + dct = defaultdict(lambda: []) + for i in lst: + dct[func(i)].append(i) + else: + dct = {} + for i in lst: + keys = func(i) + d = dct + for key in keys[:-1]: + d = d.setdefault(key, {}) + d.setdefault(keys[-1], []).append(i) + + return dct + + +def iter_groups2(items, key): + """ + Iterates over groups of consecutive values x from 'items' that have equal key(x)""" + + def iter_subgroup(): + pass + + + NULL = object() + last_key = NULL + group = [] + + for item in items: + k = key(item) + if k != last_key: + if group: + yield group + + # start new group + group = [] + last_key = k + group.append(item) + + if group: + yield group + + +def iter_groups(items, key): + """ + Iterates over groups of consecutive values x from 'items' that have equal key(x)""" + + + NULL = object() + last_key = NULL + group = [] + + for item in items: + k = key(item) + if k != last_key: + if group: + yield group + + # start new group + group = [] + last_key = k + group.append(item) + + if group: + yield group + + + +def unique(lst): + """ + Returns a copy of 'lst' with only unique entries. + The list is stable (the first occurance is kept). + """ + + found = set() + + lst2 = [] + for i in lst: + if i not in found: + lst2.append(i) + found.add(i) + + return lst2 + + +def mapapply(funcs, lst): + """ + apply each function in 'funcs' to one element in 'lst' + """ + + lst2 = [] + for func, item in izip(funcs, lst): + lst2.append(func(item)) + return lst2 + + +def cumsum(vals): + """Returns a cumalative sum of vals (as a list)""" + + lst = [] + tot = 0 + for v in vals: + tot += v + lst.append(tot) + return lst + +def icumsum(vals): + """Returns a cumalative sum of vals (as an iterator)""" + + lst = [] + tot = 0 + for v in vals: + tot += v + yield tot + + +def frange(start, end, step): + """ + Generates a range of floats + + start -- begining of range + end -- end of range + step -- step size + """ + + i = 0 + val = start + while val < end: + yield val + i += 1 + val = start + i * step + + +def ilen(iterator): + """ + Returns the size of an iterator + """ + return sum(1 for i in iterator) + + +def exc_default(func, val, exc=Exception): + """Specify a default value for when an exception occurs""" + try: + return func() + except exc: + return val + + +#============================================================================= +# simple matrix functions + +def make_matrix(nrows, ncols, val = 0): + + return [[val for i in xrange(ncols)] + for j in xrange(nrows)] + + +def transpose(mat): + """ + Transpose a matrix + + Works better than zip() in that rows are lists not tuples + """ + + assert equal(* map(len, mat)), "rows are not equal length" + + mat2 = [] + + for j in xrange(len(mat[0])): + row2 = [] + mat2.append(row2) + for row in mat: + row2.append(row[j]) + + return mat2 + + +def submatrix(mat, rows=None, cols=None): + """ + Returns a submatrix of 'mat' with only the rows and columns specified + + Rows and columns will appear in the order as indicated in 'rows' and 'cols' + """ + + if rows == None: + rows = xrange(len(mat)) + if cols == None: + cols = xrange(len(mat[0])) + + mat2 = [] + + for i in rows: + newrow = [] + mat2.append(newrow) + for j in cols: + newrow.append(mat[i][j]) + + return mat2 + + +def map2(func, *matrix): + """ + Maps a function onto the elements of a matrix + + Also accepts multiple matrices. Thus matrix addition is + + map2(add, matrix1, matrix2) + + """ + + matrix2 = [] + + for i in xrange(len(matrix[0])): + row2 = [] + matrix2.append(row2) + + for j in xrange(len(matrix[0][i])): + args = [x[i][j] for x in matrix] + row2.append(func(* args)) + + return matrix2 + + +def min2(matrix): + """Finds the minimum of a 2D list or matrix + """ + return min(imap(min, matrix)) + + +def max2(matrix): + """Finds the maximum of a 2D list or matrix + """ + return max(imap(max, matrix)) + + +def range2(width, height): + """Iterates over the indices of a matrix + + Thus list(range2(3, 2)) returns + [(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)] + """ + + for i in xrange(width): + for j in xrange(height): + yield i, j + + +#============================================================================= +# list counting and finding functions + + +def count(func, lst): + """ + Counts the number of times func(x) is True for x in list 'lst' + + See also: + counteq(a, lst) count items equal to a + countneq(a, lst) count items not equal to a + countle(a, lst) count items less than or equal to a + countlt(a, lst) count items less than a + countge(a, lst) count items greater than or equal to a + countgt(a, lst) count items greater than a + """ + n = 0 + for i in lst: + if func(i): + n += 1 + return n + +def counteq(a, lst): return count(eqfunc(a), lst) +def countneq(a, lst): return count(neqfunc(a), lst) +def countle(a, lst): return count(lefunc(a), lst) +def countlt(a, lst): return count(ltfunc(a), lst) +def countge(a, lst): return count(gefunc(a), lst) +def countgt(a, lst): return count(gtfunc(a), lst) + + +def find(func, *lsts): + """ + Returns the indices 'i' of 'lst' where func(lst[i]) == True + + if N lists are passed, N arguments are passed to 'func' at a time. + Thus, find(func, list1, list2) returns the list of indices 'i' where + func(list1[i], list2[i]) == True + + See also: + findeq(a, lst) find items equal to a + findneq(a, lst) find items not equal to a + findle(a, lst) find items less than or equal to a + findlt(a, lst) find items less than a + findge(a, lst) find items greater than or equal to a + findgt(a, lst) find items greater than a + """ + + pos = [] + + if len(lsts) == 1: + # simple case, one list + lst = lsts[0] + for i in xrange(len(lst)): + if func(lst[i]): + pos.append(i) + else: + # multiple lists given + assert equal(* map(len, lsts)), "lists are not same length" + + nvars = len(lsts) + for i in xrange(len(lsts[0])): + if func(* [x[i] for x in lsts]): + pos.append(i) + + return pos + +def findeq(a, lst): return find(eqfunc(a), lst) +def findneq(a, lst): return find(neqfunc(a), lst) +def findle(a, lst): return find(lefunc(a), lst) +def findlt(a, lst): return find(ltfunc(a), lst) +def findge(a, lst): return find(gefunc(a), lst) +def findgt(a, lst): return find(gtfunc(a), lst) + + +def islands(lst): + """Takes a iterable and returns islands of equal consecutive items + + Return value is a dict with the following format + + counts = {elm1: [(start,end), (start,end), ...], + elm2: [(start,end), (start,end), ...] + ...} + + where for each (start,end) in counts[elm1] we have lst[start:end] only + containing elm1 + + """ + + counts = {} + NULL = object() # unique NULL + last = NULL + start = 0 + + for i, x in enumerate(lst): + if x != last and last != NULL: + counts.setdefault(last, []).append((start, i)) + start = i + last = x + if last != NULL: + counts.setdefault(last, []).append((start, i+1)) + + return counts + + + +def binsearch(lst, val, cmp=cmp, order=1, key=None): + """Performs binary search for val in lst + + if val in lst: + Returns (i, i) where lst[i] == val + if val not in lst + Returns index i,j where + lst[i] < val < lst[j] + + runs in O(log n) + + lst -- sorted lst to search + val -- value to find + cmp -- comparison function (default: cmp) + order -- sort order of lst (1=ascending (default), -1=descending) + """ + + #TODO: make a funtion based linear search + + assert order == 1 or order == -1 + + if key is not None: + cmp = lambda a,b: cmp(key(a), key(b)) + + low = 0 + top = len(lst) - 1 + + if len(lst) == 0: + return None, None + + if cmp(lst[-1], val) * order == -1: + return (top, None) + + if cmp(lst[0], val) * order == 1: + return (None, low) + + while top - low > 1: + ptr = (top + low) // 2 + + comp = cmp(lst[ptr], val) * order + + if comp == 0: + # have we found val exactly? + return ptr, ptr + elif comp == -1: + # is val above ptr? + low = ptr + else: + top = ptr + + + # check top and low for exact hits + if cmp(lst[low], val) == 0: + return low, low + elif cmp(lst[top], val) == 0: + return top, top + else: + return low, top + + + + + +#============================================================================= +# max and min functions + +def argmax(lst, key=lambda x: x): + """ + Find the index 'i' in 'lst' with maximum lst[i] + + lst -- list to search + key -- function to apply to each lst[i]. + argmax(lst, key=func) --> argmax(map(key, lst)) + """ + + it = iter(lst) + top = 0 + topval = key(it.next()) + for i, item in enumerate(it, 1): + val = key(item) + if val > topval: + top = i + topval = val + return top + + +def argmin(lst, key=lambda x: x): + """ + Find the index 'i' in 'lst' with minimum lst[i] + + lst -- list to search + key -- function to apply to each lst[i]. + argmin(lst, key=func) --> argmin(map(key, lst)) + """ + + it = iter(lst) + low = 0 + lowval = key(it.next()) + for i, item in enumerate(it, 1): + val = key(item) + if val < lowval: + low = i + lowval = val + return low + + +''' +def argmin_old(lst, key=lambda x: x): + """ + Find the index 'i' in 'lst' with minimum lst[i] + + lst -- list to search + key -- function to apply to each lst[i]. + argmin(lst, key=func) --> argmin(map(key, lst)) + """ + + assert len(lst) > 0 + low = 0 + lowval = key(lst[0]) + for i in xrange(1, len(lst)): + val = key(lst[i]) + if val < lowval: + low = i + lowval = val + return low + + +def argmax_old(lst, key=lambda x: x): + """ + Find the index 'i' in 'lst' with maximum lst[i] + + lst -- list to search + key -- function to apply to each lst[i]. + argmax(lst, key=func) --> argmax(map(key, lst)) + """ + + assert len(lst) > 0 + top = 0 + topval = key(lst[0]) + for i in xrange(1, len(lst)): + val = key(lst[i]) + if val > topval: + top = i + topval = val + return top +''' + + +#============================================================================= +# math functions + +# +# comparison function factories +# +# These functions will return convenient comparison functions. +# +# example: +# filter(ltfunc(4), lst) ==> returns all values in lst less than 4 +# count(ltfunc(4), lst) ==> returns the number of values in lst < 4 +# + +def eqfunc(a): return lambda x: x == a +def neqfunc(a): return lambda x: x != a +def ltfunc(a): return lambda x: x < a +def gtfunc(a): return lambda x: x > a +def lefunc(a): return lambda x: x <= a +def gefunc(a): return lambda x: x >= a +def withinfunc(a, b, ainc=True, binc=True): + if ainc: + if binc: + return lambda x: a <= x <= b + else: + return lambda x: a <= x < b + else: + if binc: + return lambda x: a < x <= b + else: + return lambda x: a < x < b + + +def sign(num): + """Returns the sign of a number""" + return cmp(num, 0) + +def lg(num): + """Retruns the log_2 of a number""" + return math.log(num, 2) + +def add(a, b): return a + b +def sub(a, b): return a - b +def mul(a, b): return a * b +def idiv(a, b): return a / b +def div(a, b): return a / float(b) + +def safediv(a, b, default=INF): + try: + return a / float(b) + except ZeroDivisionError: + return default + +def safelog(x, base=math.e, default=-INF): + try: + return math.log(x, base) + except (OverflowError, ValueError): + return default + +def invcmp(a, b): return cmp(b, a) + +def clamp(x, low=None, high=None): + """Clamps a value 'x' between the values 'low' and 'high' + If low == None, then there is no lower bound + If high == None, then there is no upper bound + """ + + if high is not None and x > high: + return high + elif low is not None and x < low: + return low + else: + return x + +def clampfunc(low=None, high=None): + return lambda x: clamp(x, low, high) + + + +def compose2(f, g): + """ + Compose two functions into one + + compose2(f, g)(x) <==> f(g(x)) + """ + return lambda *args, **kargs: f(g(*args, **kargs)) + + +def compose(*funcs): + """Composes two or more functions into one function + + example: + compose(f,g,h,i)(x) <==> f(g(h(i(x)))) + """ + + funcs = reversed(funcs) + f = funcs.next() + for g in funcs: + f = compose2(g, f) + return f + + +def overlap(a, b, x, y, inc=True): + """ + Returns True if range [a,b] overlaps [x,y] + + inc -- if True, treat [a,b] and [x,y] as inclusive + """ + if inc: + return (y >= a) and (x <= b) + else: + return (y > a) and (x < b) + + + +#============================================================================= +# regex +# + +def match(pattern, text): + """ + A quick way to do pattern matching. + + remember: to name tokens use (?Ppattern) + """ + + m = re.match(pattern, text) + + if m is None: + return {} + else: + return m.groupdict() + + +def evalstr(text): + """Replace expressions in a string (aka string interpolation) + + ex: + >>> name = 'Matt' + >>> evalstr("My name is ${name} and my age is ${12+12}") + 'My name is Matt and my age is 24' + + "${!expr}" expands to "${expr}" + + """ + + # get environment of caller + frame = sys._getframe(1) + global_dict = frame.f_globals + local_dict = frame.f_locals + + # find all expression to replace + m = re.finditer("\$\{(?P[^\}]*)\}", text) + + # build new string + try: + strs = [] + last = 0 + for x in m: + expr = x.groupdict()['expr'] + + strs.append(text[last:x.start()]) + + if expr.startswith("!"): + strs.append("${" + expr[1:] + "}") + else: + strs.append(str(eval(expr, global_dict, local_dict))) + last = x.end() + strs.append(text[last:len(text)]) + except Exception, e: + raise Exception("evalstr: " + str(e)) + + return "".join(strs) + + +#============================================================================= +# common Input/Output + +def read_ints(filename): + """Read a list of integers from a file (one int per line) + + filename may also be a stream + """ + + infile = open_stream(filename) + vec = [] + for line in infile: + vec.append(int(line)) + return vec + + + +def read_floats(filename): + """Read a list of floats from a file (one float per line) + + filename may also be a stream + """ + infile = open_stream(filename) + vec = [] + for line in infile: + vec.append(float(line)) + return vec + + +def read_strings(filename): + """Read a list of strings from a file (one string per line) + + filename may also be a stream + """ + infile = open_stream(filename) + vec = [line.rstrip("\n") for line in infile] + return vec + + +def read_dict(filename, delim="\t", keytype=str, valtype=str): + """Read a dict from a file + + filename may also be a stream + """ + + infile = open_stream(filename) + dct = {} + + for line in infile: + tokens = line.rstrip("\n").split(delim) + assert len(tokens) >= 2, line + dct[keytype(tokens[0])] = valtype(tokens[1]) + + return dct + + +def write_list(filename, lst): + """Write a list of anything (ints, floats, strings, etc) to a file. + + filename may also be a stream + """ + out = open_stream(filename, "w") + for i in lst: + print >>out, i + + +def write_dict(filename, dct, delim="\t"): + """Write a dictionary to a file + + filename may also be a stream + """ + + out = open_stream(filename, "w") + for k, v in dct.iteritems(): + out.write("%s%s%s\n" % (str(k), delim, str(v))) + + + +class IgnoreCloseFile (object): + def __init__(self, stream): + self.__stream = stream + + def __iter__(self): + return iter(self.__stream) + + def __getattr__(self, name): + return getattr(self.__stream, name) + + def close(self): + # ignore close call + pass + + + +def open_stream(filename, mode = "r", ignore_close=True): + """Returns a file stream depending on the type of 'filename' and 'mode' + + The following types for 'filename' are handled: + + stream - returns 'filename' unchanged + iterator - returns 'filename' unchanged + URL string - opens http pipe + '-' - opens stdin or stdout, depending on 'mode' + other string - opens file with name 'filename' + + mode is standard mode for file(): r,w,a,b + + ignore_close -- if True and filename is a stream, then close() calls on + the returned stream will be ignored. + """ + + is_stream = False + + # if filename has a file interface then return it back unchanged + if hasattr(filename, "read") or hasattr(filename, "write"): + stream = filename + is_stream = True + + # if mode is reading and filename is an iterator + elif "r" in mode and hasattr(filename, "next"): + stream = filename + is_stream = True + + # if filename is a string then open it + elif isinstance(filename, basestring): + # open URLs + if filename.startswith("http://"): + import urllib2 + stream = urllib2.urlopen(filename) + + # open stdin and stdout + elif filename == "-": + if "w" in mode: + stream = sys.stdout + is_stream = True + elif "r" in mode: + stream = sys.stdin + is_stream = True + else: + raise Exception("stream '-' can only be opened with modes r/w") + + # open regular file + else: + stream = open(filename, mode) + + # cannot handle other types for filename + else: + raise Exception("unknown filename type '%s'" % type(filename)) + + if is_stream and ignore_close: + stream = IgnoreCloseFile(stream) + + return stream + + + +#============================================================================= +# Delimited files +# + +class DelimReader: + """Reads delimited files""" + + def __init__(self, filename, delim="\t", types=None, parse=False): + """Constructor for DelimReader + + arguments: + filename -- filename or stream to read from + delim -- delimiting character + types -- types of columns + pars -- if True, fields are automatically parsed + """ + + self.infile = open_stream(filename) + self.delim = delim + self.types = types + self.parse = parse + + def __iter__(self): + return self + + def next(self): + line = self.infile.next() + row = line.rstrip("\n").split(self.delim) + if self.types: + return [func(x) for func, x in izip(self.types, row)] + elif self.parse: + return [autoparse(x) for x in row] + else: + return row + + +def read_delim(filename, delim="\t", types=None, parse=False): + """Read an entire delimited file into memory as a 2D list""" + return list(DelimReader(filename, delim, types, parse)) + + +def iter_delim(filename, delim="\t", types=None, parse=False): + """Iterate through a tab delimited file""" + return DelimReader(filename, delim, types, parse) + +def write_delim(filename, data, delim="\t"): + """Write a 2D list into a file using a delimiter""" + + out = open_stream(filename, "w") + for row in data: + out.write(delim.join(str(x) for x in row)) + out.write("\n") + out.close() + + +def guess_type(text): + """Guesses the type of a value encoded in a string""" + + # int + try: + int(text) + return int + except: + pass + + # float + try: + float(text) + return float + except ValueError: + pass + + # string + return str + + +def autoparse(text): + """Guesses the type of a value encoded in a string and parses""" + + # int + try: + return int(text) + except: + pass + + # float + try: + return float(text) + except ValueError: + pass + + # string + return text + + + +#============================================================================= +# printing functions +# + +def default_justify(val): + if isinstance(val, int) or \ + isinstance(val, float): + return "right" + else: + return "left" + + +def default_format(val): + if isinstance(val, int) and \ + not isinstance(val, bool): + return int2pretty(val) + elif isinstance(val, float): + if abs(val) < 1e-4: + return "%.2e" % val + else: + return "%.4f" % val + else: + return str(val) + + +def printcols(data, width=None, spacing=1, format=default_format, + justify=default_justify, out=sys.stdout, + colwidth=INF, overflow="!"): + """Prints a list or matrix in aligned columns + + data - a list or matrix + width - maxium number of characters per line (default: 75 for lists) + spacing - number of spaces between columns (default: 1) + out - stream to print to (default: sys.stdout) + """ + + if len(data) == 0: + return + + if isinstance(data[0], (list, tuple)): + # matrix printing has default width of unlimited + if width is None: + width = 100000 + + mat = data + else: + # list printing has default width 75 + if width is None: + width = 75 + + ncols = int(width / (max(map(lambda x: len(format(x)), data))+spacing)) + mat = list2matrix(data, ncols=ncols, bycols=True) + + + # turn all entries into strings + matstr = map2(format, mat) + + # overflow + for row in matstr: + for j in xrange(len(row)): + if len(row[j]) > colwidth: + row[j] = row[j][:colwidth-len(overflow)] + overflow + + # ensure every row has same number of columns + maxcols = max(map(len, matstr)) + for row in matstr: + if len(row) < maxcols: + row.extend([""] * (maxcols - len(row))) + + + # find the maximum width char in each column + maxwidths = map(max, map2(len, zip(* matstr))) + + + # print out matrix with whitespace padding + for i in xrange(len(mat)): + fields = [] + for j in xrange(len(mat[i])): + just = justify(mat[i][j]) + + if just == "right": + fields.append((" " * (maxwidths[j] - len(matstr[i][j]))) + \ + matstr[i][j] + \ + (" " * spacing)) + else: + # do left by default + fields.append(matstr[i][j] + + (" " * (maxwidths[j] - len(matstr[i][j]) + spacing))) + out.write("".join(fields)[:width] + "\n") + + +def list2matrix(lst, nrows=None, ncols=None, bycols=True): + """Turn a list into a matrix by wrapping its entries""" + + mat = [] + + if nrows == None and ncols == None: + nrows = int(math.sqrt(len(lst))) + ncols = int(math.ceil(len(lst) / float(nrows))) + elif nrows == None: + nrows = int(math.ceil(len(lst) / float(min(ncols, len(lst))))) + else: + ncols = int(math.ceil(len(lst) / float(min(nrows, len(lst))))) + + for i in xrange(nrows): + mat.append([]) + for j in xrange(ncols): + if bycols: + k = i + j*nrows + else: + k = i*ncols + j + if k < len(lst): + mat[-1].append(lst[k]) + + return mat + + +def printwrap(text, width=80, prefix="", out=sys.stdout): + """Prints text with wrapping""" + if width == None: + out.write(text) + out.write("\n") + return + + pos = 0 + while pos < len(text): + out.write(prefix) + out.write(text[pos:pos+width]) + out.write("\n") + pos += width + + + +def int2pretty(num): + """Returns a pretty-printed version of an int""" + + string = str(num) + parts = [] + l = len(string) + for i in xrange(0, l, 3): + t = l - i + s = t - 3 + if s < 0: s = 0 + parts.append(string[s:t]) + parts.reverse() + return ",".join(parts) + + +def pretty2int(string): + """Parses a pretty-printed version of an int into an int""" + return int(string.replace(",", "")) + + +def str2bool(val): + """Correctly converts the strings "True" and "False" to the + booleans True and False + """ + + if val == "True": + return True + elif val == "False": + return False + else: + raise Exception("unknown string for bool '%s'" % val) + + + +def print_dict(dic, key=lambda x: x, val=lambda x: x, + num=None, cmp=cmp, order=None, reverse=False, + spacing=4, out=sys.stdout, + format=default_format, + justify=default_justify): + """Prints a dictionary in two columns""" + + if num == None: + num = len(dic) + + dic = mapdict(dic, key=key, val=val) + items = dic.items() + + if order is not None: + items.sort(key=order, reverse=reverse) + else: + items.sort(cmp, reverse=reverse) + + printcols(items[:num], spacing=spacing, out=out, format=format, + justify=justify) + + +def print_row(*args, **kargs): + """ + Prints a delimited row of values + + out -- output stream (default: sys.stdout) + delim -- delimiter (default: '\t') + newline -- newline character (default: '\n') + format -- formatting function (default: str) + """ + + out = kargs.get("out", sys.stdout) + delim = kargs.get("delim", "\t") + newline = kargs.get("newline", "\n") + format = kargs.get("format", str) + out.write(delim.join(map(format, args)) + newline) + + +#============================================================================= +# Parsing +# + +def read_word(infile, delims = [" ", "\t", "\n"]): + word = "" + + while True: + char = infile.read(1) + if char == "": + return word + if char not in delims: + word += char + break + + while True: + char = infile.read(1) + if char == "" or char in delims: + return word + word += char + +def read_until(stream, chars): + token = "" + while True: + char = stream.read(1) + if char in chars or char == "": + return token, char + token += char + +def read_while(stream, chars): + token = "" + while True: + char = stream.read(1) + if char not in chars or char == "": + return token, char + token += char + +def skip_comments(infile): + for line in infile: + if line.startswith("#") or line.startswith("\n"): + continue + yield line + + + +class IndentStream: + """ + Makes any stream into an indent stream. + + Indent stream auto indents every line written to it + """ + + def __init__(self, stream): + self.stream = open_stream(stream, "w") + self.linestart = True + self.depth = 0 + + def indent(self, num=2): + self.depth += num + + def dedent(self, num=2): + self.depth -= num + if self.depth < 0: + self.depth = 0 + + def write(self, text): + lines = text.split("\n") + + for line in lines[:-1]: + if self.linestart: + self.stream.write(" "*self.depth) + self.linestart = True + self.stream.write(line + "\n") + + if len(lines) > 0: + if text.endswith("\n"): + self.linestart = True + else: + self.stream.write(" "*self.depth + lines[-1]) + self.linestart = False + + + + + + +#============================================================================= +# file/directory functions + + +def list_files(path, ext=""): + """Returns a list of files in 'path' ending with 'ext'""" + + files = filter(lambda x: x.endswith(ext), os.listdir(path)) + files.sort() + return [os.path.join(path, x) for x in files] + + +def tempfile(path, prefix, ext): + """Generates a a temp filename 'path/prefix_XXXXXX.ext' + + DEPRECATED: use this instead + fd, filename = temporaryfile.mkstemp(ext, prefix) + os.close(fd) + """ + + import warnings + warnings.filterwarnings("ignore", ".*", RuntimeWarning) + filename = os.tempnam(path, "____") + filename = filename.replace("____", prefix) + ext + warnings.filterwarnings("default", ".*", RuntimeWarning) + + return filename + + +def deldir(path): + """Recursively remove a directory""" + + # This function is slightly more complicated because of a + # strange behavior in AFS, that creates .__afsXXXXX files + + dirs = [] + + def cleandir(arg, path, names): + for name in names: + filename = os.path.join(path, name) + if os.path.isfile(filename): + os.remove(filename) + dirs.append(path) + + # remove files + os.path.walk(path, cleandir, "") + + # remove directories + for i in xrange(len(dirs)): + # AFS work around + afsFiles = list_files(dirs[-i]) + for f in afsFiles: + os.remove(f) + + while True: + try: + if os.path.exists(dirs[-i]): + os.rmdir(dirs[-i]) + except Exception, e: + continue + break + + +def replace_ext(filename, oldext, newext): + """Safely replaces a file extension new a new one""" + + if filename.endswith(oldext): + return filename[:-len(oldext)] + newext + else: + raise Exception("file '%s' does not have extension '%s'" % (filename, oldext)) + + +def makedirs(filename): + """ + Makes a path of directories. + Does not fail if filename already exists + """ + + if not os.path.isdir(filename): + os.makedirs(filename) + + +#============================================================================= +# sorting +# + + +def sortindex(lst, cmp=cmp, key=None, reverse=False): + """Returns the sorted indices of items in lst""" + ind = range(len(lst)) + + if key is None: + compare = lambda a, b: cmp(lst[a], lst[b]) + else: + compare = lambda a, b: cmp(key(lst[a]), key(lst[b])) + + ind.sort(compare, reverse=reverse) + return ind + + +def sortranks(lst, cmp=cmp, key=None, reverse=False): + """Returns the ranks of items in lst""" + return invperm(sortindex(lst, cmp, key, reverse)) + + +def sort_many(lst, *others, **args): + """Sort several lists based on the sorting of 'lst'""" + + args.setdefault("reverse", False) + + if "key" in args: + ind = sortindex(lst, key=args["key"], reverse=args["reverse"]) + elif "cmp" in args: + ind = sortindex(lst, cmp=args["cmp"], reverse=args["reverse"]) + else: + ind = sortindex(lst, reverse=args["reverse"]) + + lsts = [mget(lst, ind)] + + for other in others: + lsts.append(mget(other, ind)) + + return lsts + + +def invperm(perm): + """Returns the inverse of a permutation 'perm'""" + inv = [0] * len(perm) + for i in xrange(len(perm)): + inv[perm[i]] = i + return inv + + + +#============================================================================= +# histograms, distributions +# + +def one_norm(vals): + """Normalize values so that they sum to 1""" + s = float(sum(vals)) + return [x/s for x in vals] + + +def bucket_size(array, ndivs=None, low=None, width=None): + """Determine the bucket size needed to divide the values in array into + 'ndivs' evenly sized buckets""" + + if low is None: + low = min(array) + + if ndivs is None: + if width is None: + ndivs = 20 + else: + ndivs = int(math.ceil(max((max(array) - low) / float(width), 1))) + + if width is None: + width = (max(array) - low) / float(ndivs) + + return ndivs, low, width + + +def bucket_bin(item, ndivs, low, width): + """ + Return the bin for an item + """ + + assert item >= low, Exception("negative bucket index") + return min(int((item - low) / width), ndivs-1) + + +def bucket(array, ndivs=None, low=None, width=None, key=lambda x: x): + """Group elements of 'array' into 'ndivs' lists""" + + keys = map(key, array) + + # set bucket sizes + ndivs, low, width = bucket_size(keys, ndivs, low, width) + + # init histogram + h = [[] for i in xrange(ndivs)] + x = [] + + # bin items + for i in array: + if i >= low: + h[bucket_bin(key(i), ndivs, low, width)].append(i) + for i in xrange(ndivs): + x.append(i * width + low) + return (x, h) + + +def hist(array, ndivs=None, low=None, width=None): + """Create a histogram of 'array' with 'ndivs' buckets""" + + # set bucket sizes + ndivs, low, width = bucket_size(array, ndivs, low, width) + + # init histogram + h = [0] * ndivs + x = [] + + # count items + for i in array: + if i >= low: + j = bucket_bin(i, ndivs, low, width) + if j < ndivs: + h[j] += 1 + for i in xrange(ndivs): + x.append(i * width + low) + return (x, h) + + +def hist2(array1, array2, + ndivs1=None, ndivs2=None, + low1=None, low2=None, + width1=None, width2=None): + """Perform a 2D histogram""" + + + # set bucket sizes + ndivs1, low1, width1 = bucket_size(array1, ndivs1, low1, width1) + ndivs2, low2, width2 = bucket_size(array2, ndivs2, low2, width2) + + # init histogram + h = [[0] * ndivs1 for i in xrange(ndivs2)] + labels = [] + + for j,i in zip(array1, array2): + if j > low1 and i > low2: + h[bucket_bin(i, ndivs2, low2, width2)] \ + [bucket_bin(j, ndivs1, low1, width1)] += 1 + + for i in range(ndivs2): + labels.append([]) + for j in range(ndivs1): + labels[-1].append([j * width1 + low1, + i * width2 + low2]) + return labels, h + + +def histbins(bins): + """Adjust the bins from starts to centers, this is useful for plotting""" + + bins2 = [] + + if len(bins) == 1: + bins2 = [bins[0]] + else: + for i in range(len(bins) - 1): + bins2.append((bins[i] + bins[i+1]) / 2.0) + bins2.append(bins[-1] + (bins[-1] - bins[-2]) / 2.0) + + return bins2 + + +def distrib(array, ndivs=None, low=None, width=None): + """Find the distribution of 'array' using 'ndivs' buckets""" + + # set bucket sizes + ndivs, low, width = bucket_size(array, ndivs, low, width) + + h = hist(array, ndivs, low, width) + area = 0 + + total = float(sum(h[1])) + return (h[0], map(lambda x: (x/total)/width, h[1])) + + +def hist_int(array): + """Returns a histogram of integers as a list of counts""" + + hist = [0] * (max(array) + 1) + negative = [] + for i in array: + if (i >= 0): + hist[i] += 1 + else: + negative.append(i) + return hist + + +def hist_dict(array): + """Returns a histogram of any items as a dict. + + The keys of the returned dict are elements of 'array' and the values + are the counts of each element in 'array'. + """ + + hist = {} + for i in array: + if i in hist: + hist[i] += 1 + else: + hist[i] = 1 + return hist + + +def print_hist(array, ndivs=20, low=None, width=None, + cols=75, spacing=2, out=sys.stdout): + data = list(hist(array, ndivs, low=low, width=width)) + + # find max bar + maxwidths = map(max, map2(compose(len, str), data)) + maxbar = cols- sum(maxwidths) - 2 * spacing + + # make bars + bars = [] + maxcount = max(data[1]) + for count in data[1]: + bars.append("*" * int(count * maxbar / float(maxcount))) + data.append(bars) + + printcols(zip(* data), spacing=spacing, out=out) + + + +# import common functions from other files, +# so that only util needs to be included + +try: + from rasmus.timer import * +except ImportError: + try: + from timer import * + except ImportError: + pass + +try: + from rasmus.vector import * +except ImportError: + try: + from vector import * + except ImportError: + pass + +try: + from rasmus.plotting import * +except ImportError: + try: + from plotting import * + except ImportError: + pass + diff --git a/arghmm/deps/rasmus/util.pyc b/arghmm/deps/rasmus/util.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6db66314b4c3c21f8abd4eca3126c0ea5cd12f55 GIT binary patch literal 60962 zcmc(|3zQt!dEa@vXD~0k-vFOYisTF^03=2fDO2DRLsF1Lh$@hTL~=B0Of_bjnCYJB z>LCUqA*uA{^`$!5=q<9J_Zy}NPNiBI;}-eWu7 z-LqL|fB*ljs_p?vJu(f3MBlkpb?ZL9`@QdXZ~fMvY@c|~$N%kz=R^MAKK?$;FMD(} zgfxWbNGl<&hKrT(9GRG}hUcn9ekeRQROE-lbHhb`OL%UJ^F!g{NLU&T&y9wqvGCkj zsHm3VaB(~=O@!wrLZz`aq+7zPw6HCtBcZW9T%g2gcrAoiL-^=(J3=~E+#N6P?hNTf zXzX&?t>xX_A>F2n=k|njyEA)3V_$e~e@J)u@<3=D4C&7B8qFOF=`Ph4g@L-W<|{%H-*x@EYK$h4iqW-r`rT^DDQ8^m^ZX zM@VmQ=C+XD=*;aQy~&w7LVB|^lOe4+GZoTXoH-KGTb;QxG~OA~ceu`@A*GVroj(@R zJ6zznW=#Jk-I=>XI_3K(LVCoR>5$&(%)3JRPG{~3=}~9i9nxdY+#Ay4&fFK$yPSDX zNbh#$J3@NGnfpUJ?aX^a`YvZqhV&k19ti2Xop~^%_d4@XNbhsz;gG(^nMXqU9nL%& z()*n`6&f=kr8YW$GQ0+IJr>dj^eBDMV|+ZM5Bc(8Up^7iNBrXZLi(sPr$c(mnfHfu z#+eU<^f6~X7}Cd`c``K4=wJGT;q#%8zR&lc3h8NQJ{;2bJClU;1J0Zc=?9&8I;2lJ z^Grz3IP~hZ)vy%A!^Tp*^U%n91r(J9|q|Z3> zF?aD<*E<)|bH4fUkbcCOdPqO&%=wT$=gfRapLZq==?kIJ(7$xnK)evrkNMGp9;I`x zb1|eJF9@ivrzW;$NYA_Bd3Wo@kW$@`g|tzYxac~nM(hi&eyQ)yg6}N*&Z66Cg|z9$ z+aZ0?nU#=UbY?ZAOU`r*jQ55{Cp1<=V@qgehAQ>6!fP;(ZVoNYTH8&mg_L@V!fD4h zUkYjF3NMAU>&$vc*POW=(wCIUQ~D34#Zg=;T6F8_dUy@S^ofvO_KPovlssU2IV`~H zUh$w_3F#F-y%N$-I`hepey20v8PZRM^v6T`UAi<-xn`)J_M6`o(pO#X(;1Un!Okd|d=Yf5;@6PMK^SO|I-i2Qe>32Kx`H+5(Gv8gH7F6^F7yh1*e$kJ< z(D&$jbtnD4@EUyki+%UMIcK+9TpQm8ZLe%JbkWpUl?wDHM}?x+V2b%{_8#d zAs(;(!btIW)Q`DQ`ADl8Dj(?;{NZn0&yT2{Kfk)3KYwFAKl+X9`7zb=;OJHTd2lrE z4-h*$^%Ih%{~yW3M2-JWwU?LMt=co)=2ElUY-I87h2~P@{@NNhkFQ*=xnO;*yV&kz z_t&1Rce}Nuo-MCsS)=7=X}wGF6DRI^*WGtb-+lMHC*mtlb=ogA(?(W1U(cHJwF_&l z`EIk_sxQ@6I_-r{eR;XrS}1R4wF~V|ZMoiRuB-1t9k10M zS<2d~B33>hN1)Dn1huJdbGgyUj@DL|+FjLiw6;h^ZZSLJ4o@`yBmbS5sse}ZFiCT{ zz7jK^Xs%3EbbBjF+PF|(Tk5Le?g-Z>8Yk|)`)+3+U2COFjd^bRKfUrOzpO^mK-!}( zkb;F3;~ZpKb!IpuXQzf#@*?lJH;>e^<@(Z6ZGNeqMJUX78W93@Znm?vPNUYoP$Ypu*<5HTSH53swt#8`YJ08Q=+v_L z_DUnp{bTF(h7azV4B>mi;bnF_N@`r zDqmDG-Cs#j(Q&{3&If8I%KC*Vlr67UI?Yzst;WK&*mV`7UoPh4QWGeG80p$P{Z|9A zPG{#eMVjj^{63%wm?2qzAiPouO>@PW*}K9E)o_J>L!nyd)8 z0;T)5E*7$N}N_BU~Nt!^E-*VPPWer)Q3&nJdT}C7!*4!MoRc3B3zNeS7#?l2> zP)<}=h;p%U*?(WEFHLb7oMi7Mch_>Od)N8q!d>;w{9^N^#$Am4#Rf#ejfKVK<>F7; zSjq0{m}KwDiMl(+`*U;6R@4D`eO_1ks2YGh}AMqhV)8Lt`v7$7Qw1jF=FUG4h?DwkI#wR$z$cyg9E> z(_+P)%e70*?xHAEj1CH3T5DuYk;=S!q@O0zIPfePW*?6B;&^E9gC_bA;8i4EYe(Nb ztfz{|2LMAiCY`zA@CyI9v&Cpb#)CU+Mx6aI^_x^hxh5d~9y^w2VPL}6W__voiAGHW zs7=ko#m+ZsBCDg87DrXM)8iazb6J(Cme$(xc{m+>l$(uK#_%&;br)j=5gcXv1w?qW z$Qn3$rS2AU_^G4iTg_J1=yYSRFVvpytTpISytmwh5hK(f=g7d-uQQ${W5bzts}V^> zi0E5$^t7ufm}{*qEk$IFwMA}H@79gtCMg#*)XXjPsxTn$yk%B=cYqY9o}T8H?H~zb z+bjF3+pFUh{vVo`Y!&LL|3;thBqQb#!i%G!eID$9RnkGwX*jgsVbnoLmUj<<_ zgepwAwU;78ITzO5ideNu@H&_mk&PBkwj%1CtpJneh>CC*1U+kBd&cOU9HKkPVG>V4 zvR9Y8mB`~@_#zW8C;N3vfV4s|H@8soZ4E9$UgXj=)H{vt0Kt0$^?rEz)rPa{hVz2^<2m|g{!;9SC&;3%Ox%6QH=*@1n z4|}R2CAw(bwl8g0hPC`v0AYj9%uKFe2#yWaKXykXG8dW2(MSd zFhj1Me*fzLGK2*b1{j`i59zkypq`&`mC9+T!m8{mBtp}O>j3{Fp|bu--I|#_EgUXu zudLr7;LgnMD^w7a4}d)s4nDDZJe*TO(7bwj7`{(R6h@|oS6oD!ksLAsp;q&sqcs?N2=~W15uF&fvu>xStF+o_!&AFb?=B`c zs*K)wv8PgBsxMviduW)R%FU`Q$q}`}FFQaY`92y(AO@v-Dz{ekHxoxXY3YES(rK>8 zp*$FINn?!a$DQQ*?y2TP_cq%{GvQ0p{xa1zMiMG}woX91XQr(4h*Kszm58X5JxXdy ztbW{}oUk=Q{BmZ#opn=ym>lJP>@=AH=}h8ykMYZ-7>!g$hW76t-!?vO+I(s)TRe?8 zo#&DN6Z{_Kmpw?*=+{x+VC3Wq3Ytn(P>}Qdi`vOpshwyx&voi6vPOVer{2W`v~ki# zER!wn*`mQlYF|QTw;Hl$R_Gl@&ZR>3Qg&Gnc_fj3p+mNi@39^Pb`2zM)R#-L*N9GO zL;`%tBO$A~ifCLJxIjy zs?*P-5Zj^8g+;Tw42< zg*#$L1rY7ShbVwxN$#bEFY&XgAF2l`JF0KQwS5%OfDHjSGjP!gM6Ly^3Q=CIF6@q@ z{noHL`ZMB?Wopz{RvN9e4<&^jg5CfzK~;(pe~7;k3*vw^eWm929xg8L%E!zMBl6C* z_Jv_YhTLi_!0;b{I9vIM;DSr9@wgZXV#c@DcumFdgS@(ijdBB z+bgHdUEm7IcW|fAIF(lDqJsuhqjB-as5`15)geUKj>FYaWN z7qb0(U2|@;B%RzvouA?7zKv8dQisQf#z)78=eb$>0-jKbCrKhv=M)`!dM5J+`5wMf z3G26M!jS_v&&(dOrfwbN#i3YO%I4~^a1I=ZYI!zEwFR+DVXQT=v~cL)%UHu;lP3^A zSYve!3((A!^0~t4xi!7lxMv~!czr2rcur06*lw9Ml)BUNJ@9Q7a&FDN`s=hERrRVg z%!$fYBV~BUim1l%QopIWHdB<)h`}tUEop;U=2=-{2SJ0|T9? z%nK{wZZv3Fgu=RRbb3N;KIGkL)4m7zA4$EI& z6FZG)23FZ#=Eajed1GeRSXrviH*z)1(lr9>Or#)nuTyU=G!hY+F}~SK8|zWf8dzgI zXfLHw3YM)sBD~^-)ZR{PU=Dg7GzN|Ks{bcsqbs61TGLeN5EU9kGqd|;feBuuHg^vn-m`jB_@wxj%h?Q`nLQe=;LIA16l`C^%qh@K{v&S6x5XdXIw;RY@uK9NO*^Av%_vdr8mdpHj`Iu!c{zWvC|vwK^P#67 zyT5k&f>rJKY7LM70xsqj#$nl0-RxLBgUaggXLt2@Q4JL=<5X<2nvf4Ivhci~SX!ej zdN&1!YP$|V&{wxBv}+;P@2t}b6{Gu1>>m&qvyvIjt?qQ%mvXi>J)68=cLnsh=ifCu zID&SS^Z11RB~?drZ(?s{3iK=80HPg}^*hKE7$!Q7SQ066NLaGf*cDmXI)w|=$N?pm zK0R5>*5((P114VA&o^gTaUu6S1j)s00sP|chip*UoVwKRq%4ckThC#(c0NOHHChlp zlVFMAw<351ti+@(U{bpwJ80@vE6eog_xT(3$1U?@t#GJaldC!ewBcn=I=f4JpiO*F4UJBMek# zkjpQUd-Lw#ACa^59fn8j%h=t+9hEwz=YokKPz^`~YYAuUA+n%=f#KEB>U88(a;M(J z1^&HCz{{7ju%U2>Vu+9mu<}#bll@D&agO3STUwtKA!{e^*8U&qT zfkhY^q%Q7+wIXGnDB#!Zthq={@S4ba?4o~OqDn_rYNqa#&0 zoo=k$RNWoT9-*^-o-{41EyhVZCLvquRoBT?lFZp2i%H`frrXyWnHRtQ+Bf#Ac}I94)wnE3_nm2&0&v4`z5Vd z^q7O0EDa>@ppOQ+y5g0+Scu!3cc=*k;8axIod6=ybQ+74vb}O3(u(H31p6nsh+qfi zqKfqwMKa*lt5UBIgcrQPDdj6TZ3ESxu`(Llj7@2l%m2Kj&JtU_bH0h*&}lAR zuFWsD+k}1)iSZ%c?_6qT)*Kt_P`t3xqPx!BXl2;9M(>uIV#j$)lec4wfQ$l2RJk>W zU0J}75LePsJ&$!d?M_rN=k#!#q#SDUajve#GIC)IsAq?TarCKxm;pYcz=S z#=WLrb<0Ckq@Nwlx06}FNw_8JPA)WXwyks?oE9k6*m#KZEyDJcHdvXJ>$hRe$IN_io$#uihB( z@?$8h-`&raZuJZg+wfq99jGiOWR?W7om|}uA1b{Bs^%(;lVj1WC}N$NkHV~SE>i~+ z35Z(hcM~M))|C2}h!}g6XF@cHE;@Z_fPfN?&=GajaMB`$)FhGMbyY)5uv9TaOm^LZ zAtviCE)fr#61~KE9l*g_TYuVG5zc1GSnXGD0h! zu1-ZpdFDR{DR%G{s^DK`^{yP^rpRaL#v>~$u*!NFl~tQmxYDE}vTE5l);DlwExNI_Y%JLT-+AJC}Y-O*KMWy7QuB|1?xupMwm`MFC5uFTXg7BS^u1}mGDUjg zXzlp%<3}agT1TXf7S)w{h8n5WQ7PQ7^9oBsUyaTMYRnA~^Q_5q9@l1R2EU;y9zU-( zXK7NI78ddYwR%ym_#5la%BaSECAe3+EoCYJATULDj_gKO8DV;%O++X&S=4V6-7omg zJh# zrS`ub9dwgMF2sf$K8tmpd#OQ`G_`(21NMI#kSY~Y>CTxRF7a5pCbH?J63K>H?gFi%LBEYvn^9ue6kW2r@a2v2Zi!@{;v&Z_G%(wf!jEC;76opB zEZ~Q=GvOKkhDzMRLnGW84n5qm?tv&XZVel^{BC9S25{@D7eMgbp15YXm9Hiqpbpbk0Fz9Y`k z2|MboI%C@RoMz0Fv^wTbt0%dS>;P)s0j5G^twyraPu6qP7E4Wi%Wv%#df-~CiKn%O zhSk9%7Nz*p*xr`0$nLDGz<#zaaPI=vIf7T7vATS-JidriEtma~gFiz1k%McP^)$W8 z5oTdz?QYNxTWN@KG+)znSaBN}&pGpU2xouf$s%M;gQq|yi0A{z-|dAL)H6FRXJ&8i z^RYzbiRrNFrWx1>@Q10!d=%RRpxRiL`VpNDt*Bz-SI8ZcjirXxjpb(Ob8cJsY5pQ> zQa>%|R8iK`ilZ15Ru7Q>j8PqGn7jYC1wI=zTSh9oFldNIRJS(ZuG$yiY`zG5w@1W9 z&jn1z&HK15M73&cn92Gp`GbPS`tWm&ohEE8L5^f=%hK*J`Lfznxp;F#Izt{b)N`vQ!bhR|3(1QSyQZ@y|F=4=>DTvYa%Z=iAp{||+GAmN#Th~tI0oy51IItMs0qCWN~w*FPoIIcWFNk1Wq zr{WGWm$wR9uc8e*+6-L3I@+uRU3L%U&w9m5PMeK8KD2s*qs)8e8te1OVmY}jptBr7 z)pnE^!orB4>+=H~v{WJ5p38go2O7KJ8>&N$rU$L-ENLKQW@t%xF2PO&>&>{{%((>|I*(I?QeDW~RGQi#|)kQ_%n21{76LrG%n^oToK6iL1_wZBH{w ztUL>}^m01U6q@VOz;yByR|A~XaaCTx<^R!??&D4vtr3iP5TBr8>9pK>IC=(!y{)DQWnSDr;t6QFSZ32yU z+4?&)(=)Tz=QNXV4^>pwh%2nDUIGEmfdKNw8v(vXKRr!_45*`rosSqGF)&Fs58Jcd zAQ2HK6VX|O$rLN=Y22cl7$5qc+jS=QeciDis)NW6kO?ghhqIUAfMo_A8*2uPCneCfBYiThyM4aSB_!IoHzeJ)5MW6n1&KZO}B^u1kenOmS=# zo|!F~#G}Fx-WiTwC5MpNZ!EWx*7zL?c3z=iSH@JKss(Mw0^A5TQ(65m=z^0Ptjbk} zpQfWmmnY@B?RW4N+Z=Km$2%F{$VwhjJXL*OA}i~~Vj11=L*s3IdU1^hF?tLulI@jV zzIB^UU=Gt$JUN<|!jl_|PxtM+j$B0u9@)pu${AM#cFRTMLwwVV3V9__bNgYH5{5*v zu}ID+h5tGmAmT(ewlaO2aK^l4qU0>q>)|X%D%Zcss_tHOhkuBVC_=ril=}dV@$hko zD(+8U&84{t*+l&%$8XAlxh8D-^xXL8GSdTjecm- zHHc3e`U`D3`#!2Uy}@BCPHF}!53aTW_sy#(7c<8cCCNNx+hKa@T}NxvM~>u68NHpq z&rjWbw08HAqc#0Kp}&xpuleii>HM0%CyvCpzJC2n!?o6RshM^cO<#+R=E7p`q!&63 z5T#_G#{oSd(A#e@s+BtZ=8Gv13ovqN6AO?OhdmOd=qFagC-Z=s>ux%#8%D!ZGA?J6KeJ?BZ=*_25kaJ^)v9# z2_I#3g>6Mt*q(@y!pagOd+aV+K?r;nYm0Rf6-|DQ#B|hHHE#LlgL?8P$pDFIl#`(PSUi{EySRU0^YZM!_CE{a@e2^D*@eHJ@eYFyhcmr!16TpUo3yKh&IyGw)V z@xt2$$#dBGPAB=CzWO?PQ4)EDCFp*icy9(BJFyRC7C>|}*J_zXuEe_MldUoXirMO5%5Lgu(xq*wYd!)LzI+5=@J3LgYvZ#l8 ze6&~-rQo+E-v1uulb19$u8KLUtI*R|H{aJ^ zxTbwAz`E4`e_+=uR zMYIfimR_~Gfu4X~A=8;yqIrq?Xs*o6KI1@%Ep|4Su|y~m(lhXB6SQ{3s)^JL`z{rW zY8?c}BQ6dU5yiAouRY;7A&0{1l%3Q0`;%||KKjnVybixDCy5M|>_rCkG%GRkPI|3R zVJwP|?Bx5En%NSU_LoHKwm;U5H#oDUrz%8!Bi|z8kk%Nbt|RKCgIs9q2pBCeW9;L6 z>Ah*&M2Awo%!aQM_t&1E@>20pmWkzC>AxT>boQv<#Nf~WAB}0<vS8eN4~@BCv<*0_flftoJ6l;_anRouHWcuf0KShg6hA)5K}gr3p2s#>9_VXR zO9Nv%HA$VLLM8$ils|#GP>1&9CJjYlhgTN3_=SHaS(mguy|!QPz*q8 zRBtFm??&>rC?B(o=&aSP?3b9Bry3oR9_y1$pB|a9$i$W<;XRQ=_P%9(WC?kp{`a2r zd~dZ{VO&f#IV15%uAoONLIK;nR1jQVh15j3_Y|aFJfeZ39%(W7|Bk*GtLiIJEA~>I zbXQLL^2KtibXi$vjlHh4@S&-t_Cl?7B(AT~gqu_ZOV;k_s9jxSsKmh+4QZ4PW+6kY zAJ3(&6i?@uiF8pFZ1G!RA05YW_V$KM<;(i_*CcUDJggcuK6T=OdNvQ+ z5HBp=hS426@c;)d=MQ2ntdZz$|$L*YRV&)izsUY)3J9lnmJ4t~AmC@Bx8_`TI8wE?qoPsU#qlVQvmqjfQ8 z_d>C+a!q*U5*NNS7L*P6kApa0jrY-E9yiDJx;scUGyCp7HHOkxh2Fy@cfrw6Q>e_L z_N({cow%oe4gJ5SX;anXkeAhhNcpnzA>O{w+vR-S3W~wL_d8BRL6DW*1RKeiuzEd_ zhrmmrJdLrYWGvTXRpmAuNLp?T8k*vXH@G~Ymyb}(gT)ipP_+prE;8gq$ru#k4KWF% z$=yn{Xkuv|ty^QYs4RJxlHVk;NC$`EFj0Ox>ROl|k(kV;$xuRUfqTW^J#JQ)mT z{v#&YSOb@RTQIZP^89~O%!GYKm2x{7ltg{Rry>n8hF6>^QCI~n5Vfue3ct?z3>zh2 z50RadEHue&R#3z}S_L*oD&E+_RC&4Sw&Y5hE!x^Jcy0!to%-!UNs&#woAaM*3Z;(~ zJ-^0Kk{7FNYcNoHE2daZ?ci{U!`35byv9(HQ>tuhFi_%bthWo7KK?dA=~sYK+16m7 zB-abIz8w@-f198rUu@adV4$?+?Ssf-QLruP$pB%GtiM!!DbOhiO5jCp8&&pBZVvIXr|zu+4$4Se4|?Rx59g ztm<{2LCMvo^;S#L3&mWNSoF<0$*|t;PA-0nokZpQ&j-ni-*zYeGeB~==w#%GqK6(< z+24s9HJ*8aF`nX+9M3x}3G-u`xzV+~{Yo82N&CHy+&owG-S$33yI=|KsSYI;S(QOClY z3}F|y83JZ3D)WX>*tgv1+MlLT;7*&Hjv@j;OV9ZYM$5+O zi|czqO%$Wio#JVKBHDB0QrGkw;dhFk57xiz&4hiue3LWTwe0W_i+=SzKN8@7B-`Ch z@?4|So@u_+)VGx4VQp%p$ARPLb+|PkDjub#tbU=v*f<0`-vCz`!ZC$AN=@rmj#1wp zxV*2&p)?tcN9Q6B!w`THt15jv0+c); zqKfLx^YyH8t;P?Re64@{`zrh1B;vJ@FlNt|FmVo*CSNDHb_l3BXtrKrW#GHjPp;ze zW||){xl{bI$4O9F5ghuQSbi2JGlCD8MLC|Z;$??T-($+673nd`rB1g#%j@}OnV(U` zoT}!XGI0%Na(z-8=`J4yBRLx_N9E=(;^Y`<*~}ASzO^bE*78kF)p8U?DshN zhsEd?M-#U%v5o4yPWmaRCYcr zxxDfP&4m10VdOgYFY}8BH55WebK~P2t~MKtCOxfReYCGE7ynEG%Kqy({7YYuG3;Mc zF3NI^FXGzok@HABz)ed~DMLT1VevQD7Bm{Skf^N)N8c-(-fXKrz044mzV{<)UJZmE z1?ut}Lo@pzMT|kGSb)+t(C#Ic3-eV6twFNhp!11cMvGLGnV!19;mt?1<>BN54?I}A zFttEFt|!c^2WnTdb2C40?}_sgm74GMicG&XrsB!hNGy^rdb5@85pzQDtJ4dEo2Q?7 zlX?2LG?e@slD>J;Wugb0o~JP?DM0s=UfsQw@-vb`tyBCm9g9q4bo-E<4?z7z4$e|UgF!2=fn%DuPoduZ4ZMDZ zIt;hMRw_Oi;yp^W=j%t$6HtS+UedN1VPv#&D6(#D^$bd_FN4L= z8q0Ci_$mQAW}!)6fvv;BUflrB2Jd=|kJMJzn)4THm+F@lz>xBpEgb>gX=$q)-1A1EN^|R)!PiIAwuty*>?6+=N)Z;c)ray_3KJ{lp)we@VzcGy8ge;2htlWG)EQ zIjqPZ&XgnG2l^lqXEZd&^h{@rD)uLS-j$2?ja<7Vu{jC~s)U;l62x>+1zWY^A^It& z-|}39#r}k1i-JOjn%m6Z2pc*L59@0cH8=7>w|cJiPIToR){<}LD@|88w2aGiEKvC? zIB>C4xbPh1P-7pp$WmDJ3*c`u7zgXsxe2vaZ zTW1utivXO)dJG$S@WBTonmtgPd=j*bdYkSxQ0(-1!dv;&tq~D*?Y5VdyW(KPWe)XN zuo8FM%O|Gad1CrX{&nifz86&GRi&rziES9&Z@um1TQr0#xAF~*6$dqf_uP#)UB-#m z^|J$v{Ds?Icv*y?5)VHAu@|n)-g)GOEB4_Sops;ne2aC6+g|p?6(6#aj|t+UqA(?G zE75LalZie<%HcH24PRZDTUctJ7YKyG7v`4Q^IRlAzdgJVQFW zCzAs!xn1|9K&a{Dd&tDky(pIDvEtkl7kIJF;<4UN{)EJI9D5^TiGAVQwR|*PW!xKI zCV#5ZYJthhlHVZX;DF@MbYBG5m?9aVwU`6(cSPNHsOs%I{dL+sLqiHlw2azSy{@{e za%1&oKF4(vzkCn4x~ZROUowk%h@=7GFF06t{T31a%`4LwB22MZNeSdR+PewrG2yHn^;=z2Qx;Lg| zT*(B<00FD1Helnhc9x*sausN|P*A_O!fAi~NEm$}$7paFZWiU8zCXt2kF>i@E47m0 z8FX+pPDiHxCMX>r{3io917=|&+K~dWk(QGZI~E&#yEyKpIgyth7}B30)8ih)j`?RdUp;C9AI(GhOF%1vKWZ4G@=_tLk*A9M z-4gc(G65q#w20Z^VH(dMLF`J(n`Ej#+^`YB4T_^^&X`=j;Z_DLhDJMLxdwHedqd^cp%13gMRpqJl zbVvPifD>A>B*El%&8FmR5KF3xB^~19F1JQyn}^Q;Ak=1_)QleSZddPC=IU3b3Mxuw|z?y`8lp59#+Nk!Q(l z04SSlAGTTze09XuwBttOM-O014k#6I_*@+Eq=mXSW0A6#A zc3JP(#)(ih?tkk9kn9JLnR2XB5<+@5_@}f;sJ zW^h1C)UhY&bK$tt!1Yt_%brJUgelvY(c^si<8HEvz6|5x4EWQ^$vxEmRA1i;hz=Ef z6N4)ICOJ_kn6Z0ip;!n0W|n)Y8r)0kbW$A$2TFflWlq+J+MAm@+t>2~s`_iF@~j^X zxVk1Fv}lp}s;I=PYlB{yk0-Jvqstp71NM4ca055}mIJy1jX z-`u!U@;`yZuz5d)nN@iZOfgZctPpaXU$cRa;2wn_Q;Guvyic(8r|dX_@!{d&b7ENj zd1091n-u;{o>Q2H;`6`c%H*yv<7{K$<50knaFNY{-Y*oL5fA_^u1@idW+rgVJCmNG z?0C`eUMjYKqU_k}j!Iw0^5+x1j_WzNlTL`ba384h?l$!;Vl)-C_}Z{jxbqZo#2xD_Ub zZgjc}56`yB-XR@OKkn_Pq}5%+dxsBHcd!}Da%@KCh-{=rXI^#aKMna&e%`)S>O@f? zf^9a?gR0@RA?ZQ6LIejLwq`Uc%}7dawx2}xW)KlgK+dVBG0P1`fmuIEYDP~g0;W+iS?3Si3&f3qMUz-Q13iMt+V)0tIn!Y^T*zonA$U*bM@dC$Ke$;7yt!$di zTxlP@>pYmo5^$}t-{1HNJE zODFc?D6(3Hn6WgTe#@uQq%9a%Q7qc;@nO0?eww8UUbT|~*E!0UR85C2l0ZQNcgp{+fu-=%|%ysmXNm64$VoQWQaxINVK?ag)a zOC&p$^rlsC_1v0A35g;eXz*5>qbKPpUCt<3QL?B+M_UgNBh{6=Y&WvR82Z4vVjJt+U!};uXdZ3C$2C}Wr_YJe zo65mbsAZ-Y9EYyMwg0{VVB)BjLobO#0Wr}i>mPSM=-YUB8n&mepLQ4dxEtX@^%`G% zxoYf@uHVklNwJKh`+DE#hUjl0l1%#g>Tw)hG)u5j*54~leP;Gu_6OTb%ydV$%xf?T zmN+5K_)p9Ea&CeP^2_KOI^5t2Bu7@}4$5ORQg~>Juh`=dE8JczDocjcOPF=6OU;XX zH7n}0;(#(mzsMKIRyWQ-i--2QCvW%&sww|=ibvdPkQ?w`)s0_E_-6%-$3tXK{Z1q6 zkN;6!uq`V1M*(y|Ipeq;b>c%k`ra&QTknaEIwSc%Ru`qbg`w!Fs}3KB8wfWBm;Ea zK~|43doYW6^h%CS<0}lgr4d=cGA|}tXi5GTC7w=CtX3um2q<|p7u3NyDGa|QIB90W z@DL1oj3CrumK2rSQQ6WvZMlTl!xX)|qX)4K(Bggpt+#>{=$u1JOOf9uq-+*hs%0@6 zXd}=f-x(~tlDn9Me#O=qY8+qoQ3-SAICDaVf z^X>Lhv?N>IX@WPp-OGk1y<^AT`K*uJ1`JwhIt6I4V$kbHV2N>c2Dt9#c#K^{myK7q zRg-^D*?GZSsHPXyyX?o23MGRQsB#nndkQxh2mS+c216jYFzgn*M{%W%cqt;!fF=S2 zhGwVRSZKX)bph5RVgXGGvIfO;wAg9BMf>J7Q&etc=-__-5<(;0mC-Rhieo%}j!uo~q$^;_zW8uoyu>OpKc{HA_s*Qg$vyX%;+wxnlZ1YK@D8>q! zyNzxV`NKVF%Dm$h#6vay9*hr8uKq5P_;)$-{2zvMvDYexJI04()eR6tgg0=3v2E}5?9e{?R#|mVg1jGK{4ih zP`?+h>~wg=uCV^Ay~nTY?0snACp2y}423pvJ0JU%-Qktp0jH!U`XM^FD>MmJB2Ebh zA)tuk>wDqQKDtv}A5=ko49mU$9bBD$y|2VKs);ubmajUVYNtTc+!d&X8bOU3j?7s3 zFID1#peQE01=jEsE*YJKnTLfGp9`?8Tf&Qi*t^;uE)3(e-mTkqe#6^E37-y6kz+*g ztvC5LLGKv#(jL#sMfvh9q5Et1z1!{spVAp4P^cwasn1IdJJwf??jf~{IQzV-eKnmZ zU&oQC-tv0x{zAQ5(dgw%D2_3@zm{Fjjz_m+d=r}!-qrltXR$Z6kT)-WTJTh<-sTHT zu*k=(5t!$p&MFp<3UY3dOUnY#GK9!krpRqNR@2*0kBUdBw&Vv%+{F3I{xHh_rJMKA z1%z7&wcI?_CYrc9ZoVt)#`4VH^NHr2MfFds+*kEfp&pu&X*go1xt`SY@*TrB4eyn)>mywE@s7R^vTJBJ zaUQn+1Q7jB@yptDdcg}bxd4F&eHb3(Woj{`8RRkY!LhfDy2MftRW?d|#A$WKBXEbB zcq*BpLVTI%6I2+*6BXiFdDFK5KCPDZM1EdWf~ab151aZJzagA4`)T4;%gglqi>_$Ff>xV4nNTm{MzrS-op`#`tFg^(aIh?+k5#$cGIpaF(BhD zulAgJU*w{PQ!D^RQ$3<*VYhqIt#((W5yg)jdkHd@)2uyQzG@Wc)gw90KLUkg9CLI9 z#46*1`>1ghw2V^G6Ch7gE)^B=9hXCjnapix3x#OTcn;0zWc)`D?w^oL%;>~!&|NRU zE$`ILzpFC*Vlp^ZqyAx}|?pt1ppq3E44h)>6Xw`ZWTy<<^7SUzMAha2|S1T zJQJ%Y9GohTt^6>|3xw*)%R^=itaAA@YX8T~R8dHf1PbK#o^HO9iYt$S$f?6{G{~cI zRp@rIQNUR|-KWbZlvpB-D)(C?mXvX6%JL6ewVNd$(PPn(Ym9OK+2Cd_c@G`^PED9% zQNoa7C0Qeac6>l-m|)5#dLDTprl(9IG)`HJ*wdusaMSQJ?K^!+s}EA?z8Tn}zY}jL zAWS|)hY}HLbV!*iozbNrnmk21xY66Ji@lqs?e7&Fa_^>E(@&%Eh~Gm74DA_F4Bxvi z8G3G3F-4eF;Au2KL0uN=d(ORCRn_iv2rOo8hVyN~pZv2vaoV%mt=f!pt6dehK2BUS zd6Yu=f(lhu)@Xz8CVJbB3%Z;iM@8r)&G-m~VzD$y;25= z{<fRYxs~}q(0Ed)xzHm(Hwp!5oynslXCmXWNRc#2mXsVMiMjy?z$OaT zafnGWr-y=BtW>-#`MQ#SsN}8MaCHMGWvJM@EIhO7zdnh$t9q!i4Ow(ubuSwn=5<%{ zkjW1p;+J(um_WEU69W?9Fl=nk^N*;*;Z((ZJ+GmD=l|az(Z@j{D0`dUxoq`a-D(AWq&6F;jv1Z0S*>qTPN(H3J&BspyhMnwIZE`MK%<+T?IJ|bim zMh_6j#%aRBe~zj7yJ8+@W)u54eL?>x4-{jYn3o(8SJ7WHlb;|XZ3H{wv}9-d5s2Ok z$r7GcbHr;(CmqZrZUW^FYrwJ@yq2hs#XYr(!&AR=>oczFd`lYRRm8Ct9%l4^0>hiPaf|E|X@F%kZss`j_(>l0N(a_=pWzD7E6}aRF23#oL0+)_c-VAs~!?glW zLTpb+KCa|i0p-#nu{*#27C@ola{W}|A$x>m{eH=I1>Iwkt}yh@+o_DYFNb% zO*_p|@vZJzix100UNjUQRSi;i8-%u_z>?bbdX0?DFEo_bDh_|=K5<^;9Y3oABW1vs zj2%n1C*!9vT9*r1g^*@NTk5`gkl*dw(h+-BlvXrKUKHmAI`d_G#KKyr6D7t}j^1p?WpdB?+L`{mF{t z1+L;u>VBePgCl6iv-}55o|KhS%rq$0;H9*CSg2-R|iGIZGFbL~gM z8asG7tdT^}?_`~=?17MePkuOf zkw`kekuqOli)ErB5SL1>^WqfMq9Z_UTH87u84wnM8y4KkdRG4d58v2U?ABpd8Q0$6 zxh0IjvLal}enBfT5$5AOis51x#H~yUb~9-nqIPgIIXlXUCy%?RPjV;f3w~XGD2qUo z&VV_dHNtUCmR%T?oz<;&UmTRbsk)b`+m6;BS9T>3`(nq$D-+i4FfNs7SUsX}ZnIK3 zX;AUMFF5eXmXYfCmTg1ZwiufPbzzg(RrXO5Sk>oPkTJLVESEM#m`r9K<|Tu~81yRK zU;%QMJ)(HP8*)#W%HxkVMqz32${xP3$SGlYWbC_WqPG{oC;ww!D8kie5lnK(#Ns6W zVQ&g>^z|f;$H{qyU|)zRQLM&?WDCc<*hJ_0mzktGMnYYrsB>P7p{Bt=RA_m-rklSp z?%M2!VGzg#Yln|ywk$NjwAo3X)!a^UKRVn5^>}%a2S!VSAO<$N)0D`^pdy>a0ehL$ znK`#}tNPB5dA~)zHO;>Iq@Vf9$H+j~aFL5RLYd>}E=FA0-;DE9kr6U6DOZ}Io_Ki( zqM&0jR>o<3)#2EHMz4YxXH5**U#6WWh_qKsp4@cx8egJX=|wynAMT1?7vqwzp+*&SBHg2|o2 z3%Q@d7#>)}{ejBfcmj*YChcCBlKl@P5dY^lvII>oA`f~zPI|-G`(?wHb{WY}t z29@3#@4u-)xXQWKA>!U9e1OU}9iV~@y82Q)E#)aPEN4B4#$0Ywm64Y;=GSnpztq4} z$`Ly(x`Lh%Grm)TVoRDCrl>iRqBGZ@i0tqojiv_;6pHAXiK7BUkdz0e^PyD^I>`qb zHTiP@MqJdwH7X&@ec=@o#`M@pM9fSRm^`MA%e`;>G1p^LfyumUEX<6s09g4tg}P9`kUb7WvUu9oFwdsAoAcC{x!m+X)iN#!dh%Wk^HxuWhHM%Hz~?mQy*6t}9Cp2C19m^y^{22CX*Zj!y(_Z4kUtMQufNTUfWi}6MAxkHoUo=O} zvjVu_ABJ{S_C*#&*Lt2VHQl2>^PQEP)U5jgJTnG_T&|g%MjUU!nH8MA#lnFY!dWFnmQTa1hTzeFlS`;>);h?KGApiSN*EOIAHZzw1F+5 zRetO5b3CvO8y~>K#M$Dy0vwM81a9Sl<#*Clq55e#DgTBWHd|A1OF)`1c)n=IQUUQQ z)=s5KmHbt^x9AxCggOrE4{?JinEk6XdZBkHT(pZH)xEEi7_ytCdN101LO`X)o*gWJ zS_<4k=Jje{N5Qnb*Xph{qi@h-Q)=26?4o_-|coWhH5z&+!Qhsoi&oc^?&d`?hNTQSf?)6#=V1Om4< z&*sKe38QwmKCMw#Rc}uRlbYxSqklw(@incdL%tA#!;e`B#>GzLX}&IKgAE~Z80w`w zDM(mFJ88X-7a-QFU+^s+yzvEX1h;C}UsXklj#+9{aWG|JAnUh@I&BuB#K09$e9RbP zA#vZ;pUUHh1XgbrJRom+LxN2C=aFPAh+%!=a7ORc#8?t#kqSTg2CHczL%FGD|moha#mP2quin%@EeSr zv37utHpd40<-aCiXyl;^7?jbEn-NL=78%j@kVW=}7y-f+p*4n{KYWr41a>T$$0l*? z!zGh8^C?1Q$=LoIph>NVpchds(33sKzXHy#xvcCas<`?!m)tLA6FWh5RE5Tnhc*dh zGYxzu%krj6DLB72f3X1<(1f%aI6?WcBj(lrctS?WZ zOT3ifKYFR5ef+8=&8y6s`)JIIig9r_hV?p@&;dTb&Ew>Qn$4e9PIK*PHdAn0&bfs| z+}BZU1DtR%vN-%Xb^I4J=MEnjR``H|*e0s?^7p1{i2=d_vk5W0Nc1|KOc-FD4$i@B zeHzgKGs+w&5>;5il~pJ)9Y-=6l#xrs2?5fqt$VEm^~{gGk-pVffLmZF31`cVy$~|mj$jk~09(I4re^~t zWvh(;R6U{Y;DeaeOUZgqp4=&7<@TmA8)(=tceH3GC{he6%nRNFgZ~w3!uK*vM4^PS zCwOLdyHBwl(UU!RGT4aCJ{B(6<+A>mhB!0(Kz`6IHR_<}J7ENDb%8C?toNn#w{}<# z3E!=(){)h9ye~)k8@%4Nan7vEfl&LXH!c?ZEo2T6!(%cTaz-n!_7iNlU8yyeIILK^ zEF{EDRr**gBwmoN<$iR2eU-P8FAGuZ7We_>EXoDFnyicrNtSP~9C!Hd;mEGX=tRk`|0{JX((7AHaFqB=$Ubx_ zO(UliJ`<<$$^4YUd>ZeEw5bFZ-B(qN=H||eIHKiE@FdMk39N?uhEA@2hAAa*o!=X$ z)zLz-;>`AY)B07W6_gx~(`tS(%|K3J@%k7&e6QX10%zg&Pi^$z=Uh1gEVEwG$0$6* zo?K6}XISuh(;5Cj&AP;fC)e8?2FQZN4>XSdRm6H77h(7&_9qlmx}Ryip_o>6QHwW1 zMw78*M?S3(CLkw*F48ST%LX+FmPu!tfGvQ8y5ds^o9`@-YMzI+`^+U^&(`QnNI5#N zEo#r|li`zN+Lb1(U_08fiPH7LIc8>G@M#X)MEGpBq8D1LI|T}Gy~P*za=|_@I>D{J zt{m1aUg_(5AI5wNZ21(}GAC~#uepC8b~6MGZc%r*Rbe~2o?A(nnLXwWVw@d?H6dpv zwU59k2tZTYqhz;)g9|QPS>28g_7A}^q75e^j%hZmEI$=Lc3-R4*pOc0Sr3oAUKowm z9=mv0m}0a#O+AruXueFl<~%2Q*=c8ur}3_{)7Es^Xmg^-#B$z56U#X}-D5Y25Y&|| z)z3FL{8`vJKn#`YD+o;gnNajruEN+&m79i+6OptlY{SWT0E?ox{a`x?5-X}dfZ;QJt}#U8TzSX*-PqOpWJvIn(I>xKF?ElN8~Yh zp!^iZ!*ocahzcN=%oFp%zt2-W-r`K|Yq{&$+^ z*K9EM3G;Y7L?O-J!1CvXi~7l~w8Knp9Xd^S_ha|*jV`Wpicz^JxmDWp(QS7*6a8n4 zBAazHSv5PNDWJxD&CM+2tF#?=8_C}5i}QRlwZk+oZiPpz9{aBjyPL_+P}63+XXez`~W%V`_biV5FE?O#_3iTdywmTOR^{MJUKSqk3?#}borf$CyU{L+EspjKKAIw@b_gw#u2*ndU z*NePwITWjM56g`vl`D@c0*ck3t3JQXFH`ATfDo5a?iDu-6;R(7q=v=&G$t{JHd~*T zdED_vA*@~%^>JKprg5`2*Jn%@)-%}ayK#mzSB?7oVxh9j$_hew6C5L2M zm{`IfE^Nk=Qnrm*hf$*>?zCOGFiws`A`# zX=nH8WeK~P*>_qOn#jK%!cw%fq4BVfoy4d-R3~8>_w2t6a`0&oR3snEsZdGn*&o%O zeeJwpd4=to+MiL4E5jP(nVvOm2zYiFZ3uTJ<@Smx0WNu5i6_bymPzlYbno9Ou}HjK zIdeYIWbgP6Z@EaMT1`1{>CL#|kLiuF$+*+`36Zh{8C#Igv#Cr(&foZ<3~y0}DI`C~ z-2vi_m}Cr<7xtVM;_TyAsNO=<%b}qW)?*c>#h%UTlmM0dyvC&xG5K-b6H7?0SMu9R zypH!r${i!|DuoVj>aw#aOSlsf)#UBr>~-N#XUWGbF^`i=2_>6DsLni|2tgC`y4}hl z0Yh?^l82Q%Mq+r$=iNwS{LmA6a#G0|C1;h`zpc2fMBxWXT}i40jLi4UtSR>iC7)9A zT}ob4f|w7<>q<}^Lh?l=Us9sA-Q>$kq$nmoqC}pkMEeKgF89Bz+|MZat4cI;$zNCU zHK+}~63Unu!6mHe8LUsv+?mB=Pe{(+LOEBPl%{u?F#t&*Qt@;8+H zqLN=x@(W6SNl8t~KUeZEl>Cm8Pb>NFl!)*Xxmc3_Ny-1LWSB`x{x{|Rm6Ct0(ONkESN$yp0 zzmnssUYnS_nK(J4oHj!yPb+yw$vGv@DfyU^x{~us;9m$0B`ZoUDfyHV*huioZ(IOD z$qUIBl*p@|e4i3)I5v&GqD!-auPOJ_O8$zHzoz8pl>7pTgS9*LY=@}l~F2_;1Ld3I;-VWFj4!WS2P)JMt9>rG=+a*~r!*OnR&O3}<7 zXEY;xB%=8LKxNm&w&4S{0}mg#V|?rQ*vQcM*tV_XyT`}JC${Yw-??r7_~`zv<2$z9 zG=BZ~VeW4opWy%9b8-+BjeSb+jnlE?H${8kMA5mI=*9k+xWzu-SlPKfn7Tv-CNtU zWzX=oiShm8hsMXox9jbl5AV5d|1L^z;Vs5BUKy&;^ZomGQ-c5Z9@{oXe{ZGMoqM+L s*)qP5_eS>Y9Unimf6oCOtg!R2o^W^9cxC^7oe-rnqLi|$ %s" % (fn1, fn2) + shutil.copy(fn1, fn2) diff --git a/test/test_pylibs.py b/test/test_pylibs.py new file mode 100644 index 00000000..2c312b4b --- /dev/null +++ b/test/test_pylibs.py @@ -0,0 +1,32 @@ + +import os +import sys +import subprocess + +def test_import_arghmm(): + + assert os.system("PYTHONPATH= python -c 'import arghmm'") == 0 + assert os.system("PYTHONPATH= python -c 'import arghmm.popsize'") == 0 + +def test_bin(): + filenames = os.listdir("bin") + errors = [] + for filename in filenames: + filename = os.path.join("bin", filename) + if not os.path.isfile(filename): + continue + + cmd = (("export PYTHONPATH=\n(head -n1 %s | grep python -q -v) || " + "%s --help < /dev/null 2>&1 ") % (filename, filename)) + + pipe = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) + out = pipe.stdout.read() + if pipe.wait() != 0: + print "ERROR>", filename + print out + errors.append(filename) + + if len(errors) > 0: + print "scripts with erroneous imports:" + print "\n".join(errors) + raise Exception()