From c58cb8c935c533567473ff270101420a0032fe53 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 13:43:43 -0800 Subject: [PATCH 01/17] tests and data for parsing jplace --- bp/tests/data/200/placement.jplace | 156 ++++++++++++++++++ bp/tests/data/200/placement.newick | 1 + .../data/300/placement.full_resolve.newick | 1 + bp/tests/data/300/placement_mul.jplace | 1 + bp/tests/data/300/placement_mul.newick | 1 + bp/tests/test_io.py | 54 +++++- 6 files changed, 212 insertions(+), 2 deletions(-) create mode 100644 bp/tests/data/200/placement.jplace create mode 100644 bp/tests/data/200/placement.newick create mode 100644 bp/tests/data/300/placement.full_resolve.newick create mode 100644 bp/tests/data/300/placement_mul.jplace create mode 100644 bp/tests/data/300/placement_mul.newick diff --git a/bp/tests/data/200/placement.jplace b/bp/tests/data/200/placement.jplace new file mode 100644 index 0000000..9fd647d --- /dev/null +++ b/bp/tests/data/200/placement.jplace @@ -0,0 +1,156 @@ +{ + "fields": [ + "edge_num", + "likelihood", + "like_weight_ratio", + "distal_length", + "pendant_length" + ], + "metadata": { + "invocation": "/home/y5jiang/miniconda3/envs/std/bin/run_apples.py -q model.200.10000000.0.000001/01/1/query.fa -s model.200.10000000.0.000001/01/1/backbone.fa -t model.200.10000000.0.000001/01/1/jc_result/run.raxml.bestTree -o model.200.10000000.0.000001/01/1/jc_result/placement.jplace -f 0 -b 5 -D" + }, + "placements": [ + { + "n": [ + "82" + ], + "p": [ + [ + 361, + 0.01013206496780672, + 1, + 0.02652932626620403, + 0.039354548684623215 + ] + ] + }, + { + "n": [ + "99" + ], + "p": [ + [ + 308, + 0.04520741687623886, + 1, + 0.11020044356641526, + 0.06550337922097477 + ] + ] + }, + { + "n": [ + "43" + ], + "p": [ + [ + 309, + 0.04054866161921744, + 1, + 0.010712923050783987, + 0.020946988900520196 + ] + ] + }, + { + "n": [ + "195" + ], + "p": [ + [ + 277, + 0.01918907908397749, + 1, + 0.03065741838803451, + 0.04513513498399864 + ] + ] + }, + { + "n": [ + "162" + ], + "p": [ + [ + 55, + 0.01758935282545493, + 1, + 0.0033199487685078776, + 0.05388735804976052 + ] + ] + }, + { + "n": [ + "56" + ], + "p": [ + [ + 81, + 0.2366882303770561, + 1, + 0.04172580852519453, + 0.0007060238727097983 + ] + ] + }, + { + "n": [ + "91" + ], + "p": [ + [ + 105, + 0.0001863393767883581, + 1, + 0.04578898721138839, + 0.08655004339151215 + ] + ] + }, + { + "n": [ + "174" + ], + "p": [ + [ + 89, + 0.01216463967379211, + 1, + 0.04707020642820376, + 0.045206727542450205 + ] + ] + }, + { + "n": [ + "5" + ], + "p": [ + [ + 143, + 0.012162345471765756, + 1, + 0.023797389484252734, + 0.10447375403452556 + ] + ] + }, + { + "n": [ + "55" + ], + "p": [ + [ + 139, + 0.09563944060686769, + 1, + 0.014593217782258146, + 0.04537214236560885 + ] + ] + } + ], + "tree": "(((128:0.091649{0},(((((63:0.046046{1},34:0.026065{2}):0.147471{3},(36:0.056854{4},113:0.119931{5}):0.057087{6}):0.048654{7},(((65:0.013097{8},39:0.023368{9}):0.048249{10},(68:0.038382{11},(166:0.028569{12},(60:0.015049{13},86:0.070973{14}):0.018681{15}):0.065314{16}):0.013338{17}):0.037003{18},(172:0.11128{19},178:0.075705{20}):0.033605{21}):0.095847{22}):0.063586{23},((((127:0.095186{24},(169:0.004654{25},((176:0.080311{26},199:0.025038{27}):0.010857{28},171:0.036119{29}):0.008868{30}):0.039983{31}):0.071999{32},(((((((160:0.030215{33},192:0.023451{34}):0.048363{35},((197:0.1835{36},(141:0.072552{37},200:0.065412{38}):0.038114{39}):0.018307{40},138:0.111608{41}):0.011379{42}):0.064896{43},(20:0.132222{44},94:0.05062{45}):0.029734{46}):0.010164{47},((93:0.034447{48},17:0.16696{49}):0.036792{50},(52:0.073619{51},(108:0.108945{52},54:0.071498{53}):0.020253{54}):0.007886{55}):0.012139{56}):0.020069{57},(((117:0.087632{58},175:0.04907{59}):0.023117{60},105:0.063734{61}):0.034021{62},142:0.159509{63}):0.034053{64}):0.008633{65},180:0.038679{66}):0.011734{67},(32:0.177401{68},(191:0.012004{69},18:0.006781{70}):0.070602{71}):0.073237{72}):0.027244{73}):0.065858{74},(48:0.020422{75},145:0.041412{76}):0.078334{77}):0.158131{78},((((4:0.077285{79},140:0.059016{80}):0.10287{81},(112:0.054516{82},73:0.098056{83}):0.07508{84}):0.040879{85},(((64:0.034688{86},(((103:0.024899{87},164:0.027051{88}):0.108886{89},(90:0.037625{90},170:0.013174{91}):0.061949{92}):0.04288{93},125:0.03853{94}):0.000689{95}):0.043095{96},((((155:0.007038{97},116:0.010439{98}):0.021865{99},(115:0.04627{100},194:0.050225{101}):0.001882{102}):0.007504{103},35:0.02298{104}):0.057944{105},(((129:0.001131{106},144:0.004662{107}):0.035561{108},(177:0.068837{109},27:0.109227{110}):0.005532{111}):0.025179{112},(97:0.048918{113},133:0.07577{114}):0.049757{115}):0.027304{116}):0.007374{117}):0.072662{118},(147:0.088135{119},11:0.064036{120}):0.146731{121}):0.045209{122}):0.033272{123},(((57:0.038761{124},(25:0.012687{125},139:0.051202{126}):0.011167{127}):0.070301{128},((114:0.04211{129},167:0.018572{130}):0.026461{131},58:0.098008{132}):0.003546{133}):0.046481{134},(((((189:0.004832{135},84:0.00443{136}):0.021258{137},101:0.048949{138}):0.045037{139},(163:0.040324{140},80:0.095645{141}):0.025838{142}):0.028898{143},((21:0.071425{144},190:0.028078{145}):0.038979{146},(1:0.020267{147},33:0.063047{148}):0.117384{149}):0.019667{150}):0.020694{151},2:0.084342{152}):0.023439{153}):0.086726{154}):0.074316{155}):0.103828{156}):0.22174{157},(((((96:0.047644{158},71:0.017724{159}):0.079408{160},(29:0.058493{161},158:0.110122{162}):0.057941{163}):0.077188{164},((28:0.032344{165},179:0.019221{166}):0.129582{167},(((126:0.021211{168},122:0.003436{169}):0.122348{170},((120:0.048273{171},23:0.083327{172}):0.024246{173},59:0.078525{174}):0.022224{175}):0.060385{176},(100:0.092243{177},87:0.079571{178}):0.064832{179}):0.066272{180}):0.091976{181}):1e-06{182},69:0.084749{183}):0.086387{184},((((((46:0.003773{185},85:0.017394{186}):0.030542{187},70:0.090134{188}):0.018446{189},152:0.072279{190}):0.027947{191},(77:0.051355{192},30:0.021462{193}):0.075809{194}):0.005858{195},181:0.099444{196}):0.083413{197},(((182:0.064543{198},89:0.048253{199}):0.106031{200},((135:0.015705{201},7:0.00821{202}):0.030258{203},161:0.021676{204}):0.047744{205}):0.002861{206},(150:0.031904{207},((37:0.11002{208},61:0.068051{209}):0.018612{210},187:0.205805{211}):0.021969{212}):0.001188{213}):0.062534{214}):0.101311{215}):0.053824{216}):0.044002{217}):0.011366{218},(((((38:0.018712{219},92:0.035841{220}):0.028224{221},((154:0.021713{222},75:0.065821{223}):0.031695{224},132:0.049308{225}):0.049428{226}):0.019141{227},(24:0.084227{228},88:0.025948{229}):0.04393{230}):0.027486{231},((10:0.041678{232},50:0.099926{233}):0.089001{234},8:0.137018{235}):0.01696{236}):0.106719{237},(((((16:0.276403{238},((((95:0.11632{239},121:0.073923{240}):0.047448{241},(9:0.067187{242},136:0.037463{243}):0.124333{244}):0.019898{245},(((67:0.152317{246},41:0.038205{247}):0.00857{248},6:0.039272{249}):0.092741{250},(((130:0.023863{251},45:0.067713{252}):0.057962{253},(198:0.067436{254},(47:0.042684{255},12:0.016951{256}):0.079918{257}):0.019656{258}):0.043012{259},81:0.094409{260}):0.007624{261}):0.006874{262}):0.008771{263},(((76:0.035191{264},((51:0.030081{265},79:0.01744{266}):0.020574{267},124:0.020613{268}):0.034443{269}):0.079633{270},(98:0.074293{271},((123:0.060385{272},((168:0.019534{273},104:0.037426{274}):0.002911{275},131:0.013521{276}):0.03589{277}):0.012241{278},107:0.068656{279}):0.082793{280}):0.031941{281}):0.001462{282},186:0.144723{283}):0.00835{284}):0.029303{285}):0.010882{286},(66:0.173463{287},(3:0.142032{288},(102:0.056606{289},(165:0.041571{290},151:0.055941{291}):0.026282{292}):0.103159{293}):0.056229{294}):0.056911{295}):0.03039{296},((62:0.104589{297},(193:0.11889{298},15:0.057758{299}):1e-06{300}):0.056656{301},(((78:0.040435{302},137:0.055276{303}):0.028649{304},106:0.036861{305}):0.067081{306},26:0.070208{307}):0.123168{308}):0.042645{309}):0.010026{310},(((53:0.112361{311},153:0.068983{312}):0.029664{313},(159:0.092072{314},44:0.043428{315}):0.05657{316}):0.059581{317},(((196:0.024776{318},22:0.028294{319}):0.132592{320},((((185:0.032274{321},13:0.134446{322}):0.024481{323},(111:0.048261{324},173:0.012298{325}):0.174056{326}):0.006241{327},156:0.077862{328}):0.016951{329},(((110:0.015929{330},14:0.011513{331}):0.033346{332},40:0.066945{333}):0.079179{334},19:0.130813{335}):0.010367{336}):0.030079{337}):1e-06{338},(42:0.021955{339},183:0.040206{340}):0.056799{341}):0.032675{342}):0.104819{343}):0.003319{344},(((118:0.065071{345},109:0.109846{346}):0.002421{347},146:0.205691{348}):0.003295{349},((74:0.030031{350},148:0.023025{351}):0.048024{352},83:0.232573{353}):0.003829{354}):0.100581{355}):0.059103{356}):0.037669{357}):0.017766{358},((157:0.070487{359},134:0.121248{360}):0.046303{361},(((119:0.159111{362},(184:0.022568{363},143:0.016722{364}):0.068286{365}):0.076846{366},(149:0.088897{367},188:0.101306{368}):0.045138{369}):0.011258{370},(72:0.153764{371},49:0.087393{372}):0.038648{373}):0.010676{374}):0.009971{375},31:0.124516{376});", + "version": 3 +} diff --git a/bp/tests/data/200/placement.newick b/bp/tests/data/200/placement.newick new file mode 100644 index 0000000..d2e19a3 --- /dev/null +++ b/bp/tests/data/200/placement.newick @@ -0,0 +1 @@ +(((128:0.091649,(((((63:0.046046,34:0.026065):0.147471,(36:0.056854,113:0.119931):0.057087):0.048654,(((65:0.013097,39:0.023368):0.048249,(68:0.038382,(166:0.028569,(60:0.015049,86:0.070973):0.018681):0.065314):0.013338):0.037003,(172:0.11128,178:0.075705):0.033605):0.095847):0.063586,((((127:0.095186,(169:0.004654,((176:0.080311,199:0.025038):0.010857,171:0.036119):0.008868):0.039983):0.071999,(((((((160:0.030215,192:0.023451):0.048363,((197:0.1835,(141:0.072552,200:0.065412):0.038114):0.018307,138:0.111608):0.011379):0.064896,(20:0.132222,94:0.05062):0.029734):0.010164,((93:0.034447,17:0.16696):0.036792,((52:0.073619,(108:0.108945,54:0.071498):0.020253):0.00332,162:0.053887):0.004566):0.012139):0.020069,(((117:0.087632,175:0.04907):0.023117,105:0.063734):0.034021,142:0.159509):0.034053):0.008633,180:0.038679):0.011734,(32:0.177401,(191:0.012004,18:0.006781):0.070602):0.073237):0.027244):0.065858,(48:0.020422,145:0.041412):0.078334):0.158131,(((((4:0.077285,140:0.059016):0.041726,56:0.000706):0.061144,(112:0.054516,73:0.098056):0.07508):0.040879,(((64:0.034688,((((103:0.024899,164:0.027051):0.04707,174:0.045207):0.061816,(90:0.037625,170:0.013174):0.061949):0.04288,125:0.03853):0.000689):0.043095,(((((155:0.007038,116:0.010439):0.021865,(115:0.04627,194:0.050225):0.001882):0.007504,35:0.02298):0.045789,91:0.08655):0.012155,(((129:0.001131,144:0.004662):0.035561,(177:0.068837,27:0.109227):0.005532):0.025179,(97:0.048918,133:0.07577):0.049757):0.027304):0.007374):0.072662,(147:0.088135,11:0.064036):0.146731):0.045209):0.033272,(((57:0.038761,(25:0.012687,139:0.051202):0.011167):0.070301,((114:0.04211,167:0.018572):0.026461,58:0.098008):0.003546):0.046481,(((((((189:0.004832,84:0.00443):0.021258,101:0.048949):0.014593,55:0.045372):0.030444,(163:0.040324,80:0.095645):0.025838):0.023797,5:0.104474):0.005101,((21:0.071425,190:0.028078):0.038979,(1:0.020267,33:0.063047):0.117384):0.019667):0.020694,2:0.084342):0.023439):0.086726):0.074316):0.103828):0.22174,(((((96:0.047644,71:0.017724):0.079408,(29:0.058493,158:0.110122):0.057941):0.077188,((28:0.032344,179:0.019221):0.129582,(((126:0.021211,122:0.003436):0.122348,((120:0.048273,23:0.083327):0.024246,59:0.078525):0.022224):0.060385,(100:0.092243,87:0.079571):0.064832):0.066272):0.091976):0.000001,69:0.084749):0.086387,((((((46:0.003773,85:0.017394):0.030542,70:0.090134):0.018446,152:0.072279):0.027947,(77:0.051355,30:0.021462):0.075809):0.005858,181:0.099444):0.083413,(((182:0.064543,89:0.048253):0.106031,((135:0.015705,7:0.00821):0.030258,161:0.021676):0.047744):0.002861,(150:0.031904,((37:0.11002,61:0.068051):0.018612,187:0.205805):0.021969):0.001188):0.062534):0.101311):0.053824):0.044002):0.011366,(((((38:0.018712,92:0.035841):0.028224,((154:0.021713,75:0.065821):0.031695,132:0.049308):0.049428):0.019141,(24:0.084227,88:0.025948):0.04393):0.027486,((10:0.041678,50:0.099926):0.089001,8:0.137018):0.01696):0.106719,(((((16:0.276403,((((95:0.11632,121:0.073923):0.047448,(9:0.067187,136:0.037463):0.124333):0.019898,(((67:0.152317,41:0.038205):0.00857,6:0.039272):0.092741,(((130:0.023863,45:0.067713):0.057962,(198:0.067436,(47:0.042684,12:0.016951):0.079918):0.019656):0.043012,81:0.094409):0.007624):0.006874):0.008771,(((76:0.035191,((51:0.030081,79:0.01744):0.020574,124:0.020613):0.034443):0.079633,(98:0.074293,((123:0.060385,(((168:0.019534,104:0.037426):0.002911,131:0.013521):0.030657,195:0.045135):0.005233):0.012241,107:0.068656):0.082793):0.031941):0.001462,186:0.144723):0.00835):0.029303):0.010882,(66:0.173463,(3:0.142032,(102:0.056606,(165:0.041571,151:0.055941):0.026282):0.103159):0.056229):0.056911):0.03039,(((62:0.104589,(193:0.11889,15:0.057758):0.000001):0.056656,((((78:0.040435,137:0.055276):0.028649,106:0.036861):0.067081,26:0.070208):0.1102,99:0.065503):0.012968):0.010713,43:0.020947):0.031932):0.010026,(((53:0.112361,153:0.068983):0.029664,(159:0.092072,44:0.043428):0.05657):0.059581,(((196:0.024776,22:0.028294):0.132592,((((185:0.032274,13:0.134446):0.024481,(111:0.048261,173:0.012298):0.174056):0.006241,156:0.077862):0.016951,(((110:0.015929,14:0.011513):0.033346,40:0.066945):0.079179,19:0.130813):0.010367):0.030079):0.000001,(42:0.021955,183:0.040206):0.056799):0.032675):0.104819):0.003319,(((118:0.065071,109:0.109846):0.002421,146:0.205691):0.003295,((74:0.030031,148:0.023025):0.048024,83:0.232573):0.003829):0.100581):0.059103):0.037669):0.017766,(((157:0.070487,134:0.121248):0.026529,82:0.039355):0.019774,(((119:0.159111,(184:0.022568,143:0.016722):0.068286):0.076846,(149:0.088897,188:0.101306):0.045138):0.011258,(72:0.153764,49:0.087393):0.038648):0.010676):0.009971,31:0.124516); diff --git a/bp/tests/data/300/placement.full_resolve.newick b/bp/tests/data/300/placement.full_resolve.newick new file mode 100644 index 0000000..c4e31f3 --- /dev/null +++ b/bp/tests/data/300/placement.full_resolve.newick @@ -0,0 +1 @@ +(((128:0.091649,(((((63:0.046046,34:0.026065):0.147471,(36:0.056854,((((((((((113:0.004063,290:0.005988):0.000434,284:0.072903):0.003443,287:0.056094):0.001762,282:0.098423):0.003904,286:0.060559):0.002115,288:0.006143):0.002067,289:0.088623):0.000588,283:0.085678):0.001465,281:0.003223):0.002072,285:0.091293):0.098017):0.057087):0.048654,(((65:0.013097,39:0.023368):0.048249,(68:0.038382,(166:0.028569,(60:0.015049,86:0.070973):0.018681):0.065314):0.013338):0.037003,(172:0.11128,178:0.075705):0.033605):0.095847):0.063586,((((127:0.095186,(169:0.004654,((176:0.080311,199:0.025038):0.010857,171:0.036119):0.008868):0.039983):0.071999,(((((((((((((((((160:0.030215,192:0.023451):0.048363,((197:0.1835,(141:0.072552,200:0.065412):0.038114):0.018307,138:0.111608):0.011379):0.001504,223:0.090953):0.000226,230:0.047803):0.000344,229:0.068799):0.000164,226:0.00335):0.00045,222:0.031651):0.000887,228:0.038382):0.002752,221:0.091118):0.000559,227:0.084958):0.001811,225:0.074026):0.000567,224:0.003594):0.055633,(20:0.132222,94:0.05062):0.029734):0.010164,((((((((((((93:0.034447,17:0.16696):0.036792,((((((((((((52:0.073619,(108:0.108945,54:0.071498):0.020253):0.000176,294:0.099238):0.002077,296:0.035863):0.000773,300:0.071974):0.000295,162:0.053887):0.002136,297:0.079837):0.003267,291:0.094715):0.002088,292:0.028878):0.001585,299:0.024189):0.000163,293:0.070704):0.000281,298:0.058305):0.001655,295:0.06148):-0.006608):0.000093,252:0.027034):0.000133,255:0.002332):0.004035,258:0.038656):0.003066,260:0.037665):0.003569,256:0.020097):0.009588,257:0.040595):0.017057,251:0.080285):0.000343,259:0.078047):0.000655,253:0.032226):0.003105,254:0.010247):-0.029506):0.020069,(((117:0.087632,175:0.04907):0.023117,105:0.063734):0.034021,142:0.159509):0.034053):0.008633,180:0.038679):0.011734,(32:0.177401,(191:0.012004,18:0.006781):0.070602):0.073237):0.027244):0.065858,(48:0.020422,145:0.041412):0.078334):0.158131,(((((4:0.077285,140:0.059016):0.041726,56:0.000706):0.061144,(((((((((((112:0.000831,208:0.008299):0.0014,204:0.066574):0.001608,201:0.097385):0.000593,207:0.042287):0.000956,202:0.032033):0.000414,203:0.075777):0.001457,205:0.079511):0.00288,209:0.089969):0.007424,206:0.006239):0.002295,210:0.097627):0.034659,73:0.098056):0.07508):0.040879,(((64:0.034688,((((103:0.024899,164:0.027051):0.04707,174:0.045207):0.061816,(90:0.037625,((((((((((170:0.003516,268:0.044987):0.00756,264:0.022814):0.002436,269:0.063646):0.00001,263:0.037064):0.001021,270:0.020117):0.006095,267:0.004791):0.0058,265:0.012625):0.000153,266:0.018241):0.005366,262:0.091953):0.012666,261:0.075964):-0.031448):0.061949):0.04288,125:0.03853):0.000689):0.043095,(((((((((((((((155:0.007038,116:0.010439):0.016547,214:0.016392):0.013741,213:0.053041):0.002082,219:0.006335):0.013601,218:0.086967):0.001359,220:0.002828):0.001732,217:0.088396):0.009109,211:0.062074):0.005639,212:0.032374):0.01498,215:0.070369):0.02325,216:0.058501):-0.080176,(115:0.04627,194:0.050225):0.001882):0.007504,35:0.02298):0.045789,91:0.08655):0.012155,(((129:0.001131,144:0.004662):0.035561,(177:0.068837,27:0.109227):0.005532):0.025179,(97:0.048918,133:0.07577):0.049757):0.027304):0.007374):0.072662,(147:0.088135,11:0.064036):0.146731):0.045209):0.033272,(((57:0.038761,(25:0.012687,139:0.051202):0.011167):0.070301,((114:0.04211,167:0.018572):0.026461,58:0.098008):0.003546):0.046481,(((((((189:0.004832,84:0.00443):0.021258,101:0.048949):0.014593,55:0.045372):0.030444,(163:0.040324,80:0.095645):0.025838):0.023797,5:0.104474):0.005101,((21:0.071425,190:0.028078):0.038979,(1:0.020267,33:0.063047):0.117384):0.019667):0.020694,2:0.084342):0.023439):0.086726):0.074316):0.103828):0.22174,(((((96:0.047644,71:0.017724):0.079408,(29:0.058493,((((((((((158:0.000007,246:0.010132):0.000122,243:0.084106):0.00078,244:0.019057):0.000628,245:0.029957):0.000097,247:0.096535):0.000033,249:0.056484):0.000418,242:0.058161):0.000264,250:0.098013):0.000066,241:0.067649):0.000025,248:0.057714):0.107683):0.057941):0.077188,((28:0.032344,179:0.019221):0.129582,(((126:0.021211,122:0.003436):0.122348,((120:0.048273,23:0.083327):0.024246,((((((((((59:0.006102,278:0.073546):0.000279,272:0.086692):0.002661,275:0.084918):0.009397,277:0.038376):0.002339,271:0.000845):0.010633,273:0.068913):0.001691,274:0.088526):0.008005,276:0.064526):0.000855,279:0.034788):0.001741,280:0.056721):0.034822):0.022224):0.060385,(100:0.092243,87:0.079571):0.064832):0.066272):0.091976):0.000001,69:0.084749):0.086387,((((((((((((((((46:0.003773,85:0.017394):0.030542,70:0.090134):0.018446,152:0.072279):0.027947,(77:0.051355,30:0.021462):0.075809):0.000203,233:0.045389):0.00465,235:0.05425):0.003291,234:0.014328):0.000973,232:0.077092):0.004218,236:0.013624):0.005206,231:0.057416):0.0003,237:0.018566):0.003878,240:0.032864):0.004376,239:0.014682):0.001328,238:0.034672):-0.022566,181:0.099444):0.083413,(((182:0.064543,89:0.048253):0.106031,((135:0.015705,7:0.00821):0.030258,161:0.021676):0.047744):0.002861,(150:0.031904,((37:0.11002,61:0.068051):0.018612,187:0.205805):0.021969):0.001188):0.062534):0.101311):0.053824):0.044002):0.011366,(((((38:0.018712,92:0.035841):0.028224,((154:0.021713,75:0.065821):0.031695,132:0.049308):0.049428):0.019141,(24:0.084227,88:0.025948):0.04393):0.027486,((10:0.041678,50:0.099926):0.089001,8:0.137018):0.01696):0.106719,(((((16:0.276403,((((95:0.11632,121:0.073923):0.047448,(9:0.067187,136:0.037463):0.124333):0.019898,(((67:0.152317,41:0.038205):0.00857,6:0.039272):0.092741,(((130:0.023863,45:0.067713):0.057962,(198:0.067436,(47:0.042684,12:0.016951):0.079918):0.019656):0.043012,81:0.094409):0.007624):0.006874):0.008771,(((76:0.035191,((51:0.030081,79:0.01744):0.020574,124:0.020613):0.034443):0.079633,(98:0.074293,((123:0.060385,(((168:0.019534,104:0.037426):0.002911,131:0.013521):0.030657,195:0.045135):0.005233):0.012241,107:0.068656):0.082793):0.031941):0.001462,186:0.144723):0.00835):0.029303):0.010882,(66:0.173463,(3:0.142032,(102:0.056606,(165:0.041571,151:0.055941):0.026282):0.103159):0.056229):0.056911):0.03039,(((62:0.104589,(193:0.11889,15:0.057758):0.000001):0.056656,((((78:0.040435,137:0.055276):0.028649,106:0.036861):0.067081,26:0.070208):0.1102,99:0.065503):0.012968):0.010713,43:0.020947):0.031932):0.010026,(((53:0.112361,153:0.068983):0.029664,(159:0.092072,44:0.043428):0.05657):0.059581,(((196:0.024776,22:0.028294):0.132592,((((185:0.032274,13:0.134446):0.024481,(111:0.048261,173:0.012298):0.174056):0.006241,156:0.077862):0.016951,(((110:0.015929,14:0.011513):0.033346,40:0.066945):0.079179,19:0.130813):0.010367):0.030079):0.000001,(42:0.021955,183:0.040206):0.056799):0.032675):0.104819):0.003319,(((118:0.065071,109:0.109846):0.002421,146:0.205691):0.003295,((74:0.030031,148:0.023025):0.048024,83:0.232573):0.003829):0.100581):0.059103):0.037669):0.017766,(((157:0.070487,134:0.121248):0.026529,82:0.039355):0.019774,(((119:0.159111,(184:0.022568,143:0.016722):0.068286):0.076846,(149:0.088897,188:0.101306):0.045138):0.011258,(72:0.153764,49:0.087393):0.038648):0.010676):0.009971,31:0.124516); diff --git a/bp/tests/data/300/placement_mul.jplace b/bp/tests/data/300/placement_mul.jplace new file mode 100644 index 0000000..b6d55cc --- /dev/null +++ b/bp/tests/data/300/placement_mul.jplace @@ -0,0 +1 @@ +{"fields": ["edge_num", "likelihood", "like_weight_ratio", "distal_length", "pendant_length"], "metadata": {"invocation": "/home/y5jiang/miniconda3/envs/std/bin/run_apples.py -q model.200.10000000.0.000001/01/1/query.fa -s model.200.10000000.0.000001/01/1/backbone.fa -t model.200.10000000.0.000001/01/1/jc_result/run.raxml.bestTree -o model.200.10000000.0.000001/01/1/jc_result/placement.jplace -f 0 -b 5 -D"}, "placements": [{"n": ["82"], "p": [[361, 0.01013206496780672, 1, 0.02652932626620403, 0.039354548684623215]]}, {"n": ["99"], "p": [[308, 0.04520741687623886, 1, 0.11020044356641526, 0.06550337922097477]]}, {"n": ["43"], "p": [[309, 0.04054866161921744, 1, 0.010712923050783987, 0.020946988900520196]]}, {"n": ["195"], "p": [[277, 0.01918907908397749, 1, 0.03065741838803451, 0.04513513498399864]]}, {"n": ["162"], "p": [[55, 0.01758935282545493, 1, 0.0033199487685078776, 0.05388735804976052]]}, {"n": ["56"], "p": [[81, 0.2366882303770561, 1, 0.04172580852519453, 0.0007060238727097983]]}, {"n": ["91"], "p": [[105, 0.0001863393767883581, 1, 0.04578898721138839, 0.08655004339151215]]}, {"n": ["174"], "p": [[89, 0.01216463967379211, 1, 0.04707020642820376, 0.045206727542450205]]}, {"n": ["5"], "p": [[143, 0.012162345471765756, 1, 0.023797389484252734, 0.10447375403452556]]}, {"n": ["55"], "p": [[139, 0.09563944060686769, 1, 0.014593217782258146, 0.04537214236560885]]}, {"n": ["201"], "p": [[82, 0, 1, 0.0038392824534644932, 0.09738497526912704]]}, {"n": ["202"], "p": [[82, 0, 1, 0.00538773823683071, 0.032032960914394386]]}, {"n": ["203"], "p": [[82, 0, 1, 0.005801486748959656, 0.07577745216073528]]}, {"n": ["204"], "p": [[82, 0, 1, 0.0022313670167670714, 0.06657407149107182]]}, {"n": ["205"], "p": [[82, 0, 1, 0.007258285742496784, 0.0795108053167541]]}, {"n": ["206"], "p": [[82, 0, 1, 0.01756211733321826, 0.006238859758160742]]}, {"n": ["207"], "p": [[82, 0, 1, 0.004431883939348495, 0.04228685810244977]]}, {"n": ["208"], "p": [[82, 0, 1, 0.000831186124187672, 0.008298893486876858]]}, {"n": ["209"], "p": [[82, 0, 1, 0.010138002398385186, 0.08996899063567976]]}, {"n": ["210"], "p": [[82, 0, 1, 0.019857347084761147, 0.09762673781554322]]}, {"n": ["211"], "p": [[99, 0, 1, 0.05817121463528927, 0.062073731536758714]]}, {"n": ["212"], "p": [[99, 0, 1, 0.06381062694080353, 0.032373568085737825]]}, {"n": ["213"], "p": [[99, 0, 1, 0.03028823953685711, 0.05304055142721412]]}, {"n": ["214"], "p": [[99, 0, 1, 0.016547219749226325, 0.016391605372241335]]}, {"n": ["215"], "p": [[99, 0, 1, 0.07879101455738889, 0.07036932424282365]]}, {"n": ["216"], "p": [[99, 0, 1, 0.10204066488361377, 0.058500618162122354]]}, {"n": ["217"], "p": [[99, 0, 1, 0.04906249141328772, 0.08839613241770768]]}, {"n": ["218"], "p": [[99, 0, 1, 0.045971427260312794, 0.08696673554654553]]}, {"n": ["219"], "p": [[99, 0, 1, 0.03237070165118943, 0.006334874014950454]]}, {"n": ["220"], "p": [[99, 0, 1, 0.04733071059127464, 0.0028279960074323544]]}, {"n": ["221"], "p": [[43, 0, 1, 0.006325862907639057, 0.09111791720504678]]}, {"n": ["222"], "p": [[43, 0, 1, 0.002686703042868586, 0.0316512148732311]]}, {"n": ["223"], "p": [[43, 0, 1, 0.0015035609528728996, 0.09095341462064187]]}, {"n": ["224"], "p": [[43, 0, 1, 0.009262887050070149, 0.0035942670895906196]]}, {"n": ["225"], "p": [[43, 0, 1, 0.008695754915554806, 0.07402648602287575]]}, {"n": ["226"], "p": [[43, 0, 1, 0.0022365238321496814, 0.0033497094894365587]]}, {"n": ["227"], "p": [[43, 0, 1, 0.006885235457101727, 0.08495752397682416]]}, {"n": ["228"], "p": [[43, 0, 1, 0.0035739191459535813, 0.03838151801335741]]}, {"n": ["229"], "p": [[43, 0, 1, 0.0020728349983266912, 0.06879871242504297]]}, {"n": ["230"], "p": [[43, 0, 1, 0.0017292248833447473, 0.0478032903627909]]}, {"n": ["231"], "p": [[195, 0, 1, 0.018542079279493546, 0.05741622518262618]]}, {"n": ["232"], "p": [[195, 0, 1, 0.00911759221261531, 0.07709240371928998]]}, {"n": ["233"], "p": [[195, 0, 1, 0.00020319242574295114, 0.04538866538714619]]}, {"n": ["234"], "p": [[195, 0, 1, 0.008144144843706217, 0.014327999762752498]]}, {"n": ["235"], "p": [[195, 0, 1, 0.004853394537979706, 0.05424997642712658]]}, {"n": ["236"], "p": [[195, 0, 1, 0.013335581527542776, 0.01362360117744147]]}, {"n": ["237"], "p": [[195, 0, 1, 0.01884185541465883, 0.01856597288915145]]}, {"n": ["238"], "p": [[195, 0, 1, 0.028424432342402117, 0.0346715341119091]]}, {"n": ["239"], "p": [[195, 0, 1, 0.02709635449716477, 0.014682050864100994]]}, {"n": ["240"], "p": [[195, 0, 1, 0.022719999261047792, 0.03286365871050772]]}, {"n": ["241"], "p": [[162, 0, 1, 0.002413623158154767, 0.06764881473116402]]}, {"n": ["242"], "p": [[162, 0, 1, 0.002084003310834421, 0.05816110057007791]]}, {"n": ["243"], "p": [[162, 0, 1, 0.00012841791875353866, 0.08410564388060832]]}, {"n": ["244"], "p": [[162, 0, 1, 0.0009081699513484975, 0.019057370501793805]]}, {"n": ["245"], "p": [[162, 0, 1, 0.0015362448061163048, 0.0299567478307208]]}, {"n": ["246"], "p": [[162, 0, 1, 6.502620439317826e-06, 0.010131616101684948]]}, {"n": ["247"], "p": [[162, 0, 1, 0.0016329995755277153, 0.09653453597841082]]}, {"n": ["248"], "p": [[162, 0, 1, 0.002438739063810635, 0.05771424955563628]]}, {"n": ["249"], "p": [[162, 0, 1, 0.00166571868259112, 0.05648419244303224]]}, {"n": ["250"], "p": [[162, 0, 1, 0.002347510056324297, 0.0980128375802925]]}, {"n": ["251"], "p": [[56, 0, 1, 0.03754183067497629, 0.08028486400656981]]}, {"n": ["252"], "p": [[56, 0, 1, 9.308132946008312e-05, 0.027033850979417996]]}, {"n": ["253"], "p": [[56, 0, 1, 0.03853972935513422, 0.03222639346515282]]}, {"n": ["254"], "p": [[56, 0, 1, 0.0416447839933881, 0.010247295724155604]]}, {"n": ["255"], "p": [[56, 0, 1, 0.00022650593572792356, 0.0023322118147899975]]}, {"n": ["256"], "p": [[56, 0, 1, 0.010896419479392783, 0.02009719174718031]]}, {"n": ["257"], "p": [[56, 0, 1, 0.020484866308019878, 0.040594671641619784]]}, {"n": ["258"], "p": [[56, 0, 1, 0.004261847471710495, 0.038655734404798414]]}, {"n": ["259"], "p": [[56, 0, 1, 0.037885033227844846, 0.0780474705476729]]}, {"n": ["260"], "p": [[56, 0, 1, 0.007327447653239102, 0.03766518019461845]]}, {"n": ["261"], "p": [[91, 0, 1, 0.044622174119930795, 0.07596413427814225]]}, {"n": ["262"], "p": [[91, 0, 1, 0.03195580867179439, 0.09195266235774964]]}, {"n": ["263"], "p": [[91, 0, 1, 0.013521585106735944, 0.03706402606132619]]}, {"n": ["264"], "p": [[91, 0, 1, 0.011075687820536058, 0.022814405720244624]]}, {"n": ["265"], "p": [[91, 0, 1, 0.026436963025463164, 0.012625199583600345]]}, {"n": ["266"], "p": [[91, 0, 1, 0.026589636186294962, 0.01824087755866659]]}, {"n": ["267"], "p": [[91, 0, 1, 0.02063734341795329, 0.004791104073642561]]}, {"n": ["268"], "p": [[91, 0, 1, 0.0035160423741338264, 0.04498656379340781]]}, {"n": ["269"], "p": [[91, 0, 1, 0.013511901464408727, 0.06364639184383179]]}, {"n": ["270"], "p": [[91, 0, 1, 0.014542448105622207, 0.02011739269298679]]}, {"n": ["271"], "p": [[174, 0, 1, 0.020778314066793896, 0.0008447184600889446]]}, {"n": ["272"], "p": [[174, 0, 1, 0.006381238233165224, 0.08669217824012865]]}, {"n": ["273"], "p": [[174, 0, 1, 0.03141180769387497, 0.0689128265938673]]}, {"n": ["274"], "p": [[174, 0, 1, 0.03310323147018247, 0.08852618741739243]]}, {"n": ["275"], "p": [[174, 0, 1, 0.009042278367296576, 0.08491831866457393]]}, {"n": ["276"], "p": [[174, 0, 1, 0.041107791928253444, 0.06452589289586465]]}, {"n": ["277"], "p": [[174, 0, 1, 0.018439103882379904, 0.038376083655843496]]}, {"n": ["278"], "p": [[174, 0, 1, 0.006102071391935765, 0.07354615008868613]]}, {"n": ["279"], "p": [[174, 0, 1, 0.04196240401900247, 0.034788431851590494]]}, {"n": ["280"], "p": [[174, 0, 1, 0.04370311067003953, 0.05672056605900292]]}, {"n": ["281"], "p": [[5, 0, 1, 0.019841704630175813, 0.003223061374573555]]}, {"n": ["282"], "p": [[5, 0, 1, 0.009702313799660963, 0.09842319880826393]]}, {"n": ["283"], "p": [[5, 0, 1, 0.018376981392323598, 0.0856782610322325]]}, {"n": ["284"], "p": [[5, 0, 1, 0.00449752634898426, 0.07290332316282015]]}, {"n": ["285"], "p": [[5, 0, 1, 0.02191391517652802, 0.09129293782283024]]}, {"n": ["286"], "p": [[5, 0, 1, 0.013606608435547207, 0.06055919854081219]]}, {"n": ["287"], "p": [[5, 0, 1, 0.007940484155814099, 0.05609399449434607]]}, {"n": ["288"], "p": [[5, 0, 1, 0.01572196902031946, 0.006143145957555097]]}, {"n": ["289"], "p": [[5, 0, 1, 0.017789442953763043, 0.08862323433217094]]}, {"n": ["290"], "p": [[5, 0, 1, 0.004063376214766736, 0.005988317791778242]]}, {"n": ["291"], "p": [[55, 0, 1, 0.008722795973856707, 0.09471491815328043]]}, {"n": ["292"], "p": [[55, 0, 1, 0.010810845046100902, 0.02887811138505705]]}, {"n": ["293"], "p": [[55, 0, 1, 0.012558260925821522, 0.07070398547982587]]}, {"n": ["294"], "p": [[55, 0, 1, 0.00017579504469218633, 0.09923788030546805]]}, {"n": ["295"], "p": [[55, 0, 1, 0.01449369616781433, 0.06148017400252222]]}, {"n": ["296"], "p": [[55, 0, 1, 0.0022525211215865875, 0.03586335857639652]]}, {"n": ["297"], "p": [[55, 0, 1, 0.005455581874504406, 0.07983681101178416]]}, {"n": ["298"], "p": [[55, 0, 1, 0.012838861087384591, 0.05830458097797218]]}, {"n": ["299"], "p": [[55, 0, 1, 0.012395505224489706, 0.0241892221887115]]}, {"n": ["300"], "p": [[55, 0, 1, 0.0030252844954950517, 0.0719738912287745]]}], "tree": "(((128:0.091649{0},(((((63:0.046046{1},34:0.026065{2}):0.147471{3},(36:0.056854{4},113:0.119931{5}):0.057087{6}):0.048654{7},(((65:0.013097{8},39:0.023368{9}):0.048249{10},(68:0.038382{11},(166:0.028569{12},(60:0.015049{13},86:0.070973{14}):0.018681{15}):0.065314{16}):0.013338{17}):0.037003{18},(172:0.11128{19},178:0.075705{20}):0.033605{21}):0.095847{22}):0.063586{23},((((127:0.095186{24},(169:0.004654{25},((176:0.080311{26},199:0.025038{27}):0.010857{28},171:0.036119{29}):0.008868{30}):0.039983{31}):0.071999{32},(((((((160:0.030215{33},192:0.023451{34}):0.048363{35},((197:0.1835{36},(141:0.072552{37},200:0.065412{38}):0.038114{39}):0.018307{40},138:0.111608{41}):0.011379{42}):0.064896{43},(20:0.132222{44},94:0.05062{45}):0.029734{46}):0.010164{47},((93:0.034447{48},17:0.16696{49}):0.036792{50},(52:0.073619{51},(108:0.108945{52},54:0.071498{53}):0.020253{54}):0.007886{55}):0.012139{56}):0.020069{57},(((117:0.087632{58},175:0.04907{59}):0.023117{60},105:0.063734{61}):0.034021{62},142:0.159509{63}):0.034053{64}):0.008633{65},180:0.038679{66}):0.011734{67},(32:0.177401{68},(191:0.012004{69},18:0.006781{70}):0.070602{71}):0.073237{72}):0.027244{73}):0.065858{74},(48:0.020422{75},145:0.041412{76}):0.078334{77}):0.158131{78},((((4:0.077285{79},140:0.059016{80}):0.10287{81},(112:0.054516{82},73:0.098056{83}):0.07508{84}):0.040879{85},(((64:0.034688{86},(((103:0.024899{87},164:0.027051{88}):0.108886{89},(90:0.037625{90},170:0.013174{91}):0.061949{92}):0.04288{93},125:0.03853{94}):0.000689{95}):0.043095{96},((((155:0.007038{97},116:0.010439{98}):0.021865{99},(115:0.04627{100},194:0.050225{101}):0.001882{102}):0.007504{103},35:0.02298{104}):0.057944{105},(((129:0.001131{106},144:0.004662{107}):0.035561{108},(177:0.068837{109},27:0.109227{110}):0.005532{111}):0.025179{112},(97:0.048918{113},133:0.07577{114}):0.049757{115}):0.027304{116}):0.007374{117}):0.072662{118},(147:0.088135{119},11:0.064036{120}):0.146731{121}):0.045209{122}):0.033272{123},(((57:0.038761{124},(25:0.012687{125},139:0.051202{126}):0.011167{127}):0.070301{128},((114:0.04211{129},167:0.018572{130}):0.026461{131},58:0.098008{132}):0.003546{133}):0.046481{134},(((((189:0.004832{135},84:0.00443{136}):0.021258{137},101:0.048949{138}):0.045037{139},(163:0.040324{140},80:0.095645{141}):0.025838{142}):0.028898{143},((21:0.071425{144},190:0.028078{145}):0.038979{146},(1:0.020267{147},33:0.063047{148}):0.117384{149}):0.019667{150}):0.020694{151},2:0.084342{152}):0.023439{153}):0.086726{154}):0.074316{155}):0.103828{156}):0.22174{157},(((((96:0.047644{158},71:0.017724{159}):0.079408{160},(29:0.058493{161},158:0.110122{162}):0.057941{163}):0.077188{164},((28:0.032344{165},179:0.019221{166}):0.129582{167},(((126:0.021211{168},122:0.003436{169}):0.122348{170},((120:0.048273{171},23:0.083327{172}):0.024246{173},59:0.078525{174}):0.022224{175}):0.060385{176},(100:0.092243{177},87:0.079571{178}):0.064832{179}):0.066272{180}):0.091976{181}):1e-06{182},69:0.084749{183}):0.086387{184},((((((46:0.003773{185},85:0.017394{186}):0.030542{187},70:0.090134{188}):0.018446{189},152:0.072279{190}):0.027947{191},(77:0.051355{192},30:0.021462{193}):0.075809{194}):0.005858{195},181:0.099444{196}):0.083413{197},(((182:0.064543{198},89:0.048253{199}):0.106031{200},((135:0.015705{201},7:0.00821{202}):0.030258{203},161:0.021676{204}):0.047744{205}):0.002861{206},(150:0.031904{207},((37:0.11002{208},61:0.068051{209}):0.018612{210},187:0.205805{211}):0.021969{212}):0.001188{213}):0.062534{214}):0.101311{215}):0.053824{216}):0.044002{217}):0.011366{218},(((((38:0.018712{219},92:0.035841{220}):0.028224{221},((154:0.021713{222},75:0.065821{223}):0.031695{224},132:0.049308{225}):0.049428{226}):0.019141{227},(24:0.084227{228},88:0.025948{229}):0.04393{230}):0.027486{231},((10:0.041678{232},50:0.099926{233}):0.089001{234},8:0.137018{235}):0.01696{236}):0.106719{237},(((((16:0.276403{238},((((95:0.11632{239},121:0.073923{240}):0.047448{241},(9:0.067187{242},136:0.037463{243}):0.124333{244}):0.019898{245},(((67:0.152317{246},41:0.038205{247}):0.00857{248},6:0.039272{249}):0.092741{250},(((130:0.023863{251},45:0.067713{252}):0.057962{253},(198:0.067436{254},(47:0.042684{255},12:0.016951{256}):0.079918{257}):0.019656{258}):0.043012{259},81:0.094409{260}):0.007624{261}):0.006874{262}):0.008771{263},(((76:0.035191{264},((51:0.030081{265},79:0.01744{266}):0.020574{267},124:0.020613{268}):0.034443{269}):0.079633{270},(98:0.074293{271},((123:0.060385{272},((168:0.019534{273},104:0.037426{274}):0.002911{275},131:0.013521{276}):0.03589{277}):0.012241{278},107:0.068656{279}):0.082793{280}):0.031941{281}):0.001462{282},186:0.144723{283}):0.00835{284}):0.029303{285}):0.010882{286},(66:0.173463{287},(3:0.142032{288},(102:0.056606{289},(165:0.041571{290},151:0.055941{291}):0.026282{292}):0.103159{293}):0.056229{294}):0.056911{295}):0.03039{296},((62:0.104589{297},(193:0.11889{298},15:0.057758{299}):1e-06{300}):0.056656{301},(((78:0.040435{302},137:0.055276{303}):0.028649{304},106:0.036861{305}):0.067081{306},26:0.070208{307}):0.123168{308}):0.042645{309}):0.010026{310},(((53:0.112361{311},153:0.068983{312}):0.029664{313},(159:0.092072{314},44:0.043428{315}):0.05657{316}):0.059581{317},(((196:0.024776{318},22:0.028294{319}):0.132592{320},((((185:0.032274{321},13:0.134446{322}):0.024481{323},(111:0.048261{324},173:0.012298{325}):0.174056{326}):0.006241{327},156:0.077862{328}):0.016951{329},(((110:0.015929{330},14:0.011513{331}):0.033346{332},40:0.066945{333}):0.079179{334},19:0.130813{335}):0.010367{336}):0.030079{337}):1e-06{338},(42:0.021955{339},183:0.040206{340}):0.056799{341}):0.032675{342}):0.104819{343}):0.003319{344},(((118:0.065071{345},109:0.109846{346}):0.002421{347},146:0.205691{348}):0.003295{349},((74:0.030031{350},148:0.023025{351}):0.048024{352},83:0.232573{353}):0.003829{354}):0.100581{355}):0.059103{356}):0.037669{357}):0.017766{358},((157:0.070487{359},134:0.121248{360}):0.046303{361},(((119:0.159111{362},(184:0.022568{363},143:0.016722{364}):0.068286{365}):0.076846{366},(149:0.088897{367},188:0.101306{368}):0.045138{369}):0.011258{370},(72:0.153764{371},49:0.087393{372}):0.038648{373}):0.010676{374}):0.009971{375},31:0.124516{376});", "version": 3} \ No newline at end of file diff --git a/bp/tests/data/300/placement_mul.newick b/bp/tests/data/300/placement_mul.newick new file mode 100644 index 0000000..325aaf2 --- /dev/null +++ b/bp/tests/data/300/placement_mul.newick @@ -0,0 +1 @@ +(((128:0.091649,(((((63:0.046046,34:0.026065):0.147471,(36:0.056854,(113:0.013345,(281:0,290:0.002765,288:0.00292,287:0.052871,286:0.057336,284:0.06968,283:0.082455,289:0.0854,285:0.08807,282:0.0952):0.003223):0.106586):0.057087):0.048654,(((65:0.013097,39:0.023368):0.048249,(68:0.038382,(166:0.028569,(60:0.015049,86:0.070973):0.018681):0.065314):0.013338):0.037003,(172:0.11128,178:0.075705):0.033605):0.095847):0.063586,((((127:0.095186,(169:0.004654,((176:0.080311,199:0.025038):0.010857,171:0.036119):0.008868):0.039983):0.071999,((((((((160:0.030215,192:0.023451):0.048363,((197:0.1835,(141:0.072552,200:0.065412):0.038114):0.018307,138:0.111608):0.011379):0.004497,(226:0,224:0.000245,222:0.028302,228:0.035032,230:0.044454,229:0.065449,225:0.070677,227:0.081608,223:0.087604,221:0.087768):0.00335):0.060399,(20:0.132222,94:0.05062):0.029734):0.010164,(((93:0.034447,17:0.16696):0.036792,((52:0.073619,(108:0.108945,54:0.071498):0.020253):0.007823,(299:0,292:0.004689,296:0.011674,162:0.029698,298:0.034115,295:0.037291,293:0.046515,300:0.047785,297:0.055648,291:0.070526,294:0.075049):0.024189):0.000063):0.01989,(255:0,254:0.007915,256:0.017765,252:0.024702,253:0.029894,260:0.035333,258:0.036324,257:0.038262,259:0.075715,251:0.077953):0.002332):0):0.020069,(((117:0.087632,175:0.04907):0.023117,105:0.063734):0.034021,142:0.159509):0.034053):0.008633,180:0.038679):0.011734,(32:0.177401,(191:0.012004,18:0.006781):0.070602):0.073237):0.027244):0.065858,(48:0.020422,145:0.041412):0.078334):0.158131,(((((4:0.077285,140:0.059016):0.041726,56:0.000706):0.061144,((112:0.007734,(206:0,208:0.00206,202:0.025794,207:0.036048,204:0.060335,203:0.069539,205:0.073272,209:0.08373,201:0.091146,210:0.091388):0.006239):0.046782,73:0.098056):0.07508):0.040879,(((64:0.034688,((((103:0.024899,164:0.027051):0.04707,174:0.045207):0.061816,(90:0.037625,(170:0.020641,(267:0,265:0.007834,266:0.01345,270:0.015326,264:0.018023,263:0.032273,268:0.040195,269:0.058855,261:0.071173,262:0.087162):0.004791):0):0.061949):0.04288,125:0.03853):0.000689):0.043095,((((((155:0.007038,116:0.010439):0.052438,(220:0,219:0.003507,214:0.013564,212:0.029546,213:0.050213,216:0.055673,211:0.059246,215:0.067541,218:0.084139,217:0.085568):0.002828):0,(115:0.04627,194:0.050225):0.001882):0.007504,35:0.02298):0.045789,91:0.08655):0.012155,(((129:0.001131,144:0.004662):0.035561,(177:0.068837,27:0.109227):0.005532):0.025179,(97:0.048918,133:0.07577):0.049757):0.027304):0.007374):0.072662,(147:0.088135,11:0.064036):0.146731):0.045209):0.033272,(((57:0.038761,(25:0.012687,139:0.051202):0.011167):0.070301,((114:0.04211,167:0.018572):0.026461,58:0.098008):0.003546):0.046481,(((((((189:0.004832,84:0.00443):0.021258,101:0.048949):0.014593,55:0.045372):0.030444,(163:0.040324,80:0.095645):0.025838):0.023797,5:0.104474):0.005101,((21:0.071425,190:0.028078):0.038979,(1:0.020267,33:0.063047):0.117384):0.019667):0.020694,2:0.084342):0.023439):0.086726):0.074316):0.103828):0.22174,(((((96:0.047644,71:0.017724):0.079408,(29:0.058493,(158:0.001516,(246:0,244:0.008926,245:0.019825,249:0.046353,248:0.047583,242:0.048029,241:0.057517,243:0.073974,247:0.086403,250:0.087881):0.010132):0.108606):0.057941):0.077188,((28:0.032344,179:0.019221):0.129582,(((126:0.021211,122:0.003436):0.122348,((120:0.048273,23:0.083327):0.024246,(59:0.025203,(271:0,279:0.033944,277:0.037531,280:0.055876,276:0.063681,273:0.068068,278:0.072701,275:0.084074,272:0.085847,274:0.087681):0.000845):0.053322):0.022224):0.060385,(100:0.092243,87:0.079571):0.064832):0.066272):0.091976):0.000001,69:0.084749):0.086387,(((((((46:0.003773,85:0.017394):0.030542,70:0.090134):0.018446,152:0.072279):0.027947,(77:0.051355,30:0.021462):0.075809):0.015128,(236:0,234:0.000704,239:0.001058,237:0.004942,240:0.01924,238:0.021048,233:0.031765,235:0.040626,231:0.043793,232:0.063469):0.013624):0,181:0.099444):0.083413,(((182:0.064543,89:0.048253):0.106031,((135:0.015705,7:0.00821):0.030258,161:0.021676):0.047744):0.002861,(150:0.031904,((37:0.11002,61:0.068051):0.018612,187:0.205805):0.021969):0.001188):0.062534):0.101311):0.053824):0.044002):0.011366,(((((38:0.018712,92:0.035841):0.028224,((154:0.021713,75:0.065821):0.031695,132:0.049308):0.049428):0.019141,(24:0.084227,88:0.025948):0.04393):0.027486,((10:0.041678,50:0.099926):0.089001,8:0.137018):0.01696):0.106719,(((((16:0.276403,((((95:0.11632,121:0.073923):0.047448,(9:0.067187,136:0.037463):0.124333):0.019898,(((67:0.152317,41:0.038205):0.00857,6:0.039272):0.092741,(((130:0.023863,45:0.067713):0.057962,(198:0.067436,(47:0.042684,12:0.016951):0.079918):0.019656):0.043012,81:0.094409):0.007624):0.006874):0.008771,(((76:0.035191,((51:0.030081,79:0.01744):0.020574,124:0.020613):0.034443):0.079633,(98:0.074293,((123:0.060385,(((168:0.019534,104:0.037426):0.002911,131:0.013521):0.030657,195:0.045135):0.005233):0.012241,107:0.068656):0.082793):0.031941):0.001462,186:0.144723):0.00835):0.029303):0.010882,(66:0.173463,(3:0.142032,(102:0.056606,(165:0.041571,151:0.055941):0.026282):0.103159):0.056229):0.056911):0.03039,(((62:0.104589,(193:0.11889,15:0.057758):0.000001):0.056656,((((78:0.040435,137:0.055276):0.028649,106:0.036861):0.067081,26:0.070208):0.1102,99:0.065503):0.012968):0.010713,43:0.020947):0.031932):0.010026,(((53:0.112361,153:0.068983):0.029664,(159:0.092072,44:0.043428):0.05657):0.059581,(((196:0.024776,22:0.028294):0.132592,((((185:0.032274,13:0.134446):0.024481,(111:0.048261,173:0.012298):0.174056):0.006241,156:0.077862):0.016951,(((110:0.015929,14:0.011513):0.033346,40:0.066945):0.079179,19:0.130813):0.010367):0.030079):0.000001,(42:0.021955,183:0.040206):0.056799):0.032675):0.104819):0.003319,(((118:0.065071,109:0.109846):0.002421,146:0.205691):0.003295,((74:0.030031,148:0.023025):0.048024,83:0.232573):0.003829):0.100581):0.059103):0.037669):0.017766,(((157:0.070487,134:0.121248):0.026529,82:0.039355):0.019774,(((119:0.159111,(184:0.022568,143:0.016722):0.068286):0.076846,(149:0.088897,188:0.101306):0.045138):0.011258,(72:0.153764,49:0.087393):0.038648):0.010676):0.009971,31:0.124516); diff --git a/bp/tests/test_io.py b/bp/tests/test_io.py index d74de10..d191c96 100644 --- a/bp/tests/test_io.py +++ b/bp/tests/test_io.py @@ -1,6 +1,10 @@ from unittest import TestCase, main -from bp import parse_newick, to_skbio_treenode, write_newick - +from bp import parse_newick, to_skbio_treenode, write_newick, parse_jplace +import re +import json +import pandas as pd +import pandas.testing as pdt +import pkg_resources import skbio import io import numpy as np @@ -152,5 +156,51 @@ def test_parse_newick_name_with_semicolon(self): self.assertEqual(obs_bp.length(i), e_l) +class JPlaceParseTests(TestCase): + package = 'bp.tests' + + def setUp(self): + self.jplacedata = open(self.get_data_path('200/placement.jplace')) + self.jplacedata = self.jplacedata.read() + no_edge_numbers = re.sub(r"{\d+}", '', + json.loads(self.jplacedata)['tree']) + self.tree = skbio.TreeNode.read([no_edge_numbers]) + + def get_data_path(self, filename): + # adapted from qiime2.plugin.testing.TestPluginBase + return pkg_resources.resource_filename(self.package, + 'data/%s' % filename) + + def test_parse_jplace_simple(self): + columns = ['fragment', 'edge_num', 'likelihood', 'like_weight_ratio', + 'distal_length', 'pendant_length'] + exp_df = [["82", 361, 0.01013206496780672, 1, 0.02652932626620403, + 0.039354548684623215], + ["99", 308, 0.04520741687623886, 1, 0.11020044356641526, + 0.06550337922097477], + ["43", 309, 0.04054866161921744, 1, 0.010712923050783987, + 0.020946988900520196], + ["195", 277, 0.01918907908397749, 1, 0.03065741838803451, + 0.04513513498399864], + ["162", 55, 0.01758935282545493, 1, 0.0033199487685078776, + 0.05388735804976052], + ["56", 81, 0.2366882303770561, 1, 0.04172580852519453, + 0.0007060238727097983], + ["91", 105, 0.0001863393767883581, 1, 0.04578898721138839, + 0.08655004339151215], + ["174", 89, 0.01216463967379211, 1, 0.04707020642820376, + 0.045206727542450205], + ["5", 143, 0.012162345471765756, 1, 0.023797389484252734, + 0.10447375403452556], + ["55", 139, 0.09563944060686769, 1, 0.014593217782258146, + 0.04537214236560885]] + exp_df = pd.DataFrame(exp_df, columns=columns) + exp_tree = self.tree + obs_df, obs_tree = parse_jplace(self.jplacedata) + obs_tree = to_skbio_treenode(obs_tree) + pdt.assert_frame_equal(obs_df, exp_df) + self.assertEqual(obs_tree.compare_rfd(exp_tree), 0) + + if __name__ == '__main__': main() From 7c745a3c77b34b336704e0d9aaafa51ca5ca7e04 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 13:44:04 -0800 Subject: [PATCH 02/17] pandas dependency --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 971e463..49bf5ad 100644 --- a/setup.py +++ b/setup.py @@ -119,6 +119,7 @@ def run(self): 'numpy >= 1.9.2', 'nose >= 1.3.7', 'cython >= 0.24.1', + 'pandas', 'scikit-bio >= 0.5.0, < 0.6.0'], long_description=long_description, cmdclass={'build_py': BitArrayBuild, From eceaf7b315eb6ba14c4ee8b4ffdeda7d23d7a1ad Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 13:44:18 -0800 Subject: [PATCH 03/17] jplace parser --- bp/__init__.py | 4 ++-- bp/_io.pyx | 32 +++++++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/bp/__init__.py b/bp/__init__.py index f64a4d7..431fccc 100644 --- a/bp/__init__.py +++ b/bp/__init__.py @@ -6,8 +6,8 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from ._bp import BP -from ._io import parse_newick, write_newick +from ._io import parse_newick, write_newick, parse_jplace from ._conv import to_skbio_treenode, from_skbio_treenode, to_skbio_treearray __all__ = ['BP', 'parse_newick', 'to_skbio_treenode', 'from_skbio_treenode', - 'to_skbio_treearray', 'write_newick'] + 'to_skbio_treearray', 'write_newick', 'parse_jplace'] diff --git a/bp/_io.pyx b/bp/_io.pyx index 3022e2d..abe0edb 100644 --- a/bp/_io.pyx +++ b/bp/_io.pyx @@ -4,6 +4,8 @@ from ._bp cimport BP import time import numpy as np +import pandas as pd +import json cimport numpy as np cimport cython np.import_array() @@ -24,13 +26,15 @@ cdef inline np.double_t length_from_edge(unicode token): cdef inline np.int32_t number_from_edge(unicode token): cdef: Py_ssize_t split_idx + Py_ssize_t end # 0.12345{0123} -> 0123 split_idx = token.find('{') if split_idx == -1: return 0 else: - return np.int32(token[split_idx + 1:-1]) + end = len(token) + return np.int32(token[split_idx + 1:end - 1]) cdef void _set_node_metadata(np.uint32_t ptr, unicode token, @@ -41,7 +45,7 @@ cdef void _set_node_metadata(np.uint32_t ptr, unicode token, cdef: np.double_t length np.int32_t edge - Py_ssize_t split_idx, i + Py_ssize_t split_idx, i, end unicode name, token_parsed name = None @@ -65,7 +69,9 @@ cdef void _set_node_metadata(np.uint32_t ptr, unicode token, name = name.strip("'").strip() elif u'{' in token: # strip as " {123}" is valid? - edge = np.int32(token.strip()[1:-1]) + token = token.strip() + end = len(token) + edge = np.int32(token.strip()[1:end - 1]) else: name = token.replace("'", "").replace('"', "").strip() @@ -311,3 +317,23 @@ cdef inline Py_ssize_t _ctoken(unicode data, Py_ssize_t datalen, Py_ssize_t star return idx return idx + 1 + + +def parse_jplace(object data): + """Takes a jplace string, returns a DataFrame of placements and the tree""" + as_json = json.loads(data) + + fields = ['fragment'] + as_json['fields'] + + placements = [] + for placement in as_json['placements']: + fragments = placement['n'] + for p in placement['p']: + for frag in fragments: + placements.append([frag] + p) + + tree = parse_newick(as_json['tree']) + + return pd.DataFrame(placements, columns=fields), tree + + From 52904b58b137d51a0e035cba5969cee8e7504f79 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 13:58:16 -0800 Subject: [PATCH 04/17] type the parsing logic --- bp/_io.pyx | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/bp/_io.pyx b/bp/_io.pyx index abe0edb..ace4a40 100644 --- a/bp/_io.pyx +++ b/bp/_io.pyx @@ -321,18 +321,39 @@ cdef inline Py_ssize_t _ctoken(unicode data, Py_ssize_t datalen, Py_ssize_t star def parse_jplace(object data): """Takes a jplace string, returns a DataFrame of placements and the tree""" + cdef: + dict as_json + list fields, placements, fragments, p, placement_data, + list placement_inner_data, pquery, entry + unicode frag, newick + Py_ssize_t placement_idx, placement_inner_idx, fragment_idx, + Py_ssize_t n_fragments + BP tree + as_json = json.loads(data) + newick = as_json['tree'] + placement_data = as_json['placements'] - fields = ['fragment'] + as_json['fields'] + fields = as_json['fields'] + fields = ['fragment', ] + fields placements = [] - for placement in as_json['placements']: + for placement_idx in range(len(placement_data)): + placement = placement_data[placement_idx] + + placement_inner_data = placement['p'] fragments = placement['n'] - for p in placement['p']: - for frag in fragments: - placements.append([frag] + p) + n_fragments = len(fragments) + + for placement_inner_idx in range(len(placement_inner_data)): + pquery = placement_inner_data[placement_inner_idx] + + for fragment_idx in range(n_fragments): + frag = fragments[fragment_idx] + entry = [frag, ] + pquery + placements.append(entry) - tree = parse_newick(as_json['tree']) + tree = parse_newick(newick) return pd.DataFrame(placements, columns=fields), tree From 07490e836fdb83ba27327382ad8cfc1f81a8b56a Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 15:28:49 -0800 Subject: [PATCH 05/17] insertion tests --- bp/GPL/tests/test_insert.py | 33 +++++++++++++++++++++++++++++++++ bp/tests/test_insert.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 bp/GPL/tests/test_insert.py create mode 100644 bp/tests/test_insert.py diff --git a/bp/GPL/tests/test_insert.py b/bp/GPL/tests/test_insert.py new file mode 100644 index 0000000..a3ba85a --- /dev/null +++ b/bp/GPL/tests/test_insert.py @@ -0,0 +1,33 @@ +import unittest +import pkg_resources +from bp import parse_jplace +from bp.GPL import insert_multifurcating + + +class InsertTests(unittest.TestCase): + package = 'bp.GPL.tests' + + def setUp(self): + self.jplacedata_multiple = \ + open(self.get_data_path('300/placement_mul.jplace')).read() + self.final_multiple_multifurcating = \ + skbio.TreeNode.read(self.get_data_path('300/placement_mul.newick')) + + def get_data_path(self, filename): + # adapted from qiime2.plugin.testing.TestPluginBase + return pkg_resources.resource_filename(self.package, + '../tests/data/%s' % filename) + + def test_insert_multifurcating(self): + exp = self.final_multiple_multifurcating + placements, backbone = parse_jplace(self.jplacedata_multiple) + obs = insert_multifurcating(placements, backbone) + self.assertEqual({n.name for n in obs.tips()}, + {n.name for n in exp.tips()}) + self.assertEqual(obs.compare_rfd(exp), 0) + self.assertAlmostEqual(obs.compare_tip_distances(exp), 0) + + +if __name__ == '__main__': + unittest.main() + diff --git a/bp/tests/test_insert.py b/bp/tests/test_insert.py new file mode 100644 index 0000000..a6e8290 --- /dev/null +++ b/bp/tests/test_insert.py @@ -0,0 +1,30 @@ +import unittest +import pkg_resources +from bp import parse_jplace, insert_fully_resolved + + +class InsertTests(unittest.TestCase): + package = 'bp.tests' + def setUp(self): + self.jplacedata_multiple = \ + open('jplace_data/300/placement_mul.jplace').read() + self.final_multiple_fully_resolved = \ + skbio.TreeNode.read('jplace_data/300/placement.full_resolve.newick') + + def get_data_path(self, filename): + # adapted from qiime2.plugin.testing.TestPluginBase + return pkg_resources.resource_filename(self.package, + 'data/%s' % filename) + + def test_insert_fully_resolved(self): + exp = self.final_multiple_fully_resolved + placements, backbone = parse_jplace(self.jplacedata_multiple) + obs = insert_fully_resolved(placements, backbone) + self.assertEqual({n.name for n in obs.tips()}, + {n.name for n in exp.tips()}) + self.assertEqual(obs.compare_rfd(exp), 0) + self.assertAlmostEqual(obs.compare_tip_distances(exp), 0) + + +if __name__ == '__main__': + unittest.main() From 351a0f9f3db88aa17234a21be99bc4545f6a7425 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 15:38:08 -0800 Subject: [PATCH 06/17] vanilla fully resolved support --- bp/__init__.py | 5 +- bp/_insert.pyx | 110 ++++++++++++++++++++++++++++++++++++++++ bp/tests/test_insert.py | 5 +- setup.py | 2 + 4 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 bp/_insert.pyx diff --git a/bp/__init__.py b/bp/__init__.py index 431fccc..2c45f89 100644 --- a/bp/__init__.py +++ b/bp/__init__.py @@ -8,6 +8,9 @@ from ._bp import BP from ._io import parse_newick, write_newick, parse_jplace from ._conv import to_skbio_treenode, from_skbio_treenode, to_skbio_treearray +from ._insert import insert_fully_resolved + __all__ = ['BP', 'parse_newick', 'to_skbio_treenode', 'from_skbio_treenode', - 'to_skbio_treearray', 'write_newick', 'parse_jplace'] + 'to_skbio_treearray', 'write_newick', 'parse_jplace', + 'insert_fully_resolved'] diff --git a/bp/_insert.pyx b/bp/_insert.pyx new file mode 100644 index 0000000..0eda8fe --- /dev/null +++ b/bp/_insert.pyx @@ -0,0 +1,110 @@ +# encoding: utf-8 +# cython: profile=False, boundscheck=False, wraparound=False + +from ._bp cimport BP +from . import to_skbio_treenode +import pandas as pd +import json +import skbio +cimport cython + + +# see the comment in _insert_setup. Avoid the use of invalidate_caches as it +# is very expensive for tree mutation operations +class TreeNodeNoValidate(skbio.TreeNode): + def invalidate_caches(self): + pass + + +# our noop used when monkey patching invalidate_caches +def noop(*arg, **kwargs): + pass + + +# pandas apply functions for preallocation of objects in bulk +def _preallocate_fragment(r): + return TreeNodeNoValidate(name=r['fragment'], length=r['pendant_length']) + + +def _preallocate_empty(r): + return TreeNodeNoValidate() + + +def _insert_setup(placements, bptree, insert_type): + # insertion setup addresses: + # * treenode caching + # * placement ordering + # * preallocation of objects where "easy" + + sktree = to_skbio_treenode(bptree) + node_lookup = {n.edge_num: n for n in sktree.traverse(include_self=True)} + + # mutation operations against TreeNode is expensive as every append or + # remove triggers a call to invalidate caches, which requires a traversal + # to find the root (and do other stuff). so let's monkey patch the method + # to force a noop + for node in sktree.traverse(include_self=True): + node.invalidate_caches = noop + + if insert_type == 'multifurcating': + placements = placements.sort_values(['edge_num', 'pendant_length']) + elif insert_type == 'fully_resolved': + placements = placements.sort_values(['edge_num', 'distal_length'], + ascending=[True, False]) + else: + raise ValueError() + + placements['node'] = placements.apply(_preallocate_fragment, axis=1) + + if insert_type == 'fully_resolved': + placements['parent'] = placements.apply(_preallocate_empty, axis=1) + + return placements, sktree, node_lookup + + +# pd.DataFrame is not a resolved type so we cannot use it here for cython +def insert_fully_resolved(object placements, BP bptree): + """Update the backbone, fully resolving edges with multiple queries + + Parameters + ---------- + placements : pd.DataFrame + jplace data represented as a DataFrame + bptree : bp.BP + An instance of a BP tree, this is expected to contain edge numbers + and correspond to the backbone for the jplace data + + Returns + ------- + skbio.TreeNode + A tree with the fragments placed + """ + placements, sktree, node_lookup = \ + _insert_setup(placements, bptree, 'fully_resolved') + + for edge, edge_grp in placements.groupby('edge_num'): + existing_node = node_lookup[edge] + current_parent = existing_node.parent + + for _, fragment in edge_grp.iterrows(): + distal_length = fragment['distal_length'] + fragment_node = fragment['node'] + fragment_parent = fragment['parent'] + + # break the edge + current_parent.remove(existing_node) + existing_node.parent = None + + # update branch lengths + fragment_parent.length = existing_node.length - distal_length + existing_node.length = distal_length + + # attach the nodes + fragment_parent.append(fragment_node) + fragment_parent.append(existing_node) + current_parent.append(fragment_parent) + + # update + current_parent = fragment_parent + + return sktree diff --git a/bp/tests/test_insert.py b/bp/tests/test_insert.py index a6e8290..849d2e1 100644 --- a/bp/tests/test_insert.py +++ b/bp/tests/test_insert.py @@ -1,15 +1,16 @@ import unittest import pkg_resources from bp import parse_jplace, insert_fully_resolved +import skbio class InsertTests(unittest.TestCase): package = 'bp.tests' def setUp(self): self.jplacedata_multiple = \ - open('jplace_data/300/placement_mul.jplace').read() + open(self.get_data_path('300/placement_mul.jplace')).read() self.final_multiple_fully_resolved = \ - skbio.TreeNode.read('jplace_data/300/placement.full_resolve.newick') + skbio.TreeNode.read(self.get_data_path('300/placement.full_resolve.newick')) def get_data_path(self, filename): # adapted from qiime2.plugin.testing.TestPluginBase diff --git a/setup.py b/setup.py index 49bf5ad..f61e82f 100644 --- a/setup.py +++ b/setup.py @@ -80,6 +80,8 @@ def run(self): ["bp/_conv" + ext], ), Extension("bp._binary_tree", ["bp/_binary_tree" + ext], ), + Extension("bp._insert", + ["bp/_insert" + ext], ), Extension("bp.tests.test_bp_cy", ["bp/tests/test_bp_cy" + ext], include_dirs=['bp/BitArray/'], From 42282dd79769f5e0320e673ba14a8baec125d7ae Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 17:05:57 -0800 Subject: [PATCH 07/17] multifurcation insertion --- bp/GPL/Genesis_LICENSE.txt | 674 ++++++++++++++++++++++++++++++++++++ bp/GPL/_insert.pyx | 127 +++++++ bp/GPL/tests/test_insert.py | 5 +- setup.py | 2 + 4 files changed, 806 insertions(+), 2 deletions(-) create mode 100644 bp/GPL/Genesis_LICENSE.txt create mode 100644 bp/GPL/_insert.pyx diff --git a/bp/GPL/Genesis_LICENSE.txt b/bp/GPL/Genesis_LICENSE.txt new file mode 100644 index 0000000..94a9ed0 --- /dev/null +++ b/bp/GPL/Genesis_LICENSE.txt @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/bp/GPL/_insert.pyx b/bp/GPL/_insert.pyx new file mode 100644 index 0000000..9ead234 --- /dev/null +++ b/bp/GPL/_insert.pyx @@ -0,0 +1,127 @@ +# encoding: utf-8 +# cython: profile=False, boundscheck=False, wraparound=False + +from .._bp cimport BP +from bp._insert import _insert_setup, TreeNodeNoValidate +from .. import to_skbio_treenode +import pandas as pd +import json +import skbio +cimport cython + + +# this method described here was derived from Genesis +# https://github.com/lczech/genesis +# The license for Genesis is included herein under +# Genesis_LICENSE.txt +def insert_multifurcating(object placements, BP bptree): + """Update the backbone, with multifurcation for edges with multiple queries + + Parameters + ---------- + placements : pd.DataFrame + jplace data represented as a DataFrame + bptree : bp.BP + An instance of a BP tree, this is expected to contain edge numbers + and correspond to the backbone for the jplace data + + Note + ---- + This method was derived directly from the Genesis codebase, and is + therefore GPL. + + Returns + ------- + skbio.TreeNode + A tree with the fragments placed + """ + placements, sktree, node_lookup = \ + _insert_setup(placements, bptree, 'multifurcating') + + # it is much faster to bulk allocate than construct on the fly, so let's + # do that + new_parents = [TreeNodeNoValidate() + for _ in range(len(placements['edge_num'].unique()))] + parent_idx = 0 + + new_bridges = [TreeNodeNoValidate() + for _ in range(placements['edge_num'].duplicated().sum())] + bridge_idx = 0 + + for edge, edge_grp in placements.groupby('edge_num'): + # update topology + existing_node = node_lookup[edge] + current_parent = existing_node.parent + current_parent.remove(existing_node) + + # get a pre-allocated node + new_parent = new_parents[parent_idx] + parent_idx += 1 # make sure we update our offset + + # gather detail on our minimal placement + min_frag = edge_grp.iloc[0] + min_pendant = min_frag['pendant_length'] + min_distal = min_frag['distal_length'] + new_node = min_frag['node'] + + if len(edge_grp) > 1: + # if we have multiple placements, we construct a node that contains + # all of the placements, and place this under a "bridge" such that + # it is a sister to the existing node. + + # derived from + # https://github.com/lczech/genesis/blob/98c064d8e3e2efaa97da33c9263f6fef3724f0a5/lib/genesis/placement/function/tree.cpp#L295 + + # update topology + bridge = new_bridges[bridge_idx] + bridge_idx += 1 + bridge.append(existing_node) + bridge.append(new_parent) + current_parent.append(bridge) + new_parent.append(new_node) + + # Gappa uses the average distal for joining the node encompassing + # placements and existing back to the tree. As we are subtracting + # against the existing node, it is possible to introduce a negative + # branch length, so we enforce a minimum of 0.0 + avg_prox_len = edge_grp['distal_length'].mean() + bridge.length = max(0.0, existing_node.length - avg_prox_len) + + # update edges. the first node we place has a length of zero as + # its parent accounts for its pendant + existing_node.length = avg_prox_len + new_parent.length = min_pendant + new_node.length = 0.0 + + for i in range(1, len(edge_grp)): + # gather placement detail + frag_row = edge_grp.iloc[i] + frag_node = frag_row['node'] + + # update topology + new_parent.append(frag_node) + + # update the branch length. Note that "frag_node" has its + # length first set to its pendant during the preallocation + # step. we subtract the parent pendant to avoid counting + # extra edge length. This should never be < 0 as the data + # are presorted by pendant length, so the fragment evaluated + # here is assured to have a > length than the pendant used + # with the parent. + frag_node.length = frag_node.length - new_parent.length + else: + # if we only have a single placement, we place the fragment as a + # sister to the existing node. + + # update topology + current_parent.append(new_parent) + new_parent.append(existing_node) + new_parent.append(new_node) + + # update branch lengths + new_node.length = min_pendant + new_parent.length = existing_node.length - min_distal + existing_node.length = min_distal + + + return sktree diff --git a/bp/GPL/tests/test_insert.py b/bp/GPL/tests/test_insert.py index a3ba85a..b4080aa 100644 --- a/bp/GPL/tests/test_insert.py +++ b/bp/GPL/tests/test_insert.py @@ -2,10 +2,11 @@ import pkg_resources from bp import parse_jplace from bp.GPL import insert_multifurcating +import skbio class InsertTests(unittest.TestCase): - package = 'bp.GPL.tests' + package = 'bp.tests' def setUp(self): self.jplacedata_multiple = \ @@ -16,7 +17,7 @@ def setUp(self): def get_data_path(self, filename): # adapted from qiime2.plugin.testing.TestPluginBase return pkg_resources.resource_filename(self.package, - '../tests/data/%s' % filename) + '/data/%s' % filename) def test_insert_multifurcating(self): exp = self.final_multiple_multifurcating diff --git a/setup.py b/setup.py index f61e82f..5a11fc7 100644 --- a/setup.py +++ b/setup.py @@ -82,6 +82,8 @@ def run(self): ["bp/_binary_tree" + ext], ), Extension("bp._insert", ["bp/_insert" + ext], ), + Extension("bp.GPL._insert", + ["bp/GPL/_insert" + ext], ), Extension("bp.tests.test_bp_cy", ["bp/tests/test_bp_cy" + ext], include_dirs=['bp/BitArray/'], From 6d6c20ed5e234087483ccb5b5daeb6892a11dfd1 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 17:13:45 -0800 Subject: [PATCH 08/17] add commandline interface --- bp/_cli.py | 33 +++++++++++++++++++++++++++++++++ setup.py | 6 +++++- 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 bp/_cli.py diff --git a/bp/_cli.py b/bp/_cli.py new file mode 100644 index 0000000..69b3243 --- /dev/null +++ b/bp/_cli.py @@ -0,0 +1,33 @@ +import click +from bp import parse_jplace, insert_fully_resolved +from bp.GPL import insert_multifurcating + + +@click.group() +def cli(): + pass + + +@cli.command() +@click.option('--placements', type=click.Path(exists=True), + required=True, help='jplace formatted data') +@click.option('--output', type=click.Path(exists=False), + required=True, help='Where to write the resulting newick') +@click.option('--method', + type=click.Choice(['fully-resolved', 'multifurcating']), + required=True, help='Whether to fully resolve or multifurcate') +def placement(placements, output, method): + if method == 'fully-resolved': + f = insert_fully_resolved + elif method == 'multifurcating': + f = insert_multifurcating + else: + raise ValueError("Unknown method: %s" % method) + + placement_df, tree = parse_jplace(open(placements).read()) + sktree = f(placement_df, tree) + sktree.write(output) + + +if __name__ == '__main__': + cli() diff --git a/setup.py b/setup.py index 5a11fc7..a2e1688 100644 --- a/setup.py +++ b/setup.py @@ -130,4 +130,8 @@ def run(self): 'install': BitArrayInstall, 'develop': BitArrayDevelop, 'egg_info': BitArrayEggInfo - }) + }, + entry_points=''' + [console_scripts] + bp=bp._cli:cli + ''') From fbc5e008ca979d2a7db43a2e3c14e7d0a2d897c7 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 17:17:52 -0800 Subject: [PATCH 09/17] missed init --- bp/GPL/__init__.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 bp/GPL/__init__.py diff --git a/bp/GPL/__init__.py b/bp/GPL/__init__.py new file mode 100644 index 0000000..f95d761 --- /dev/null +++ b/bp/GPL/__init__.py @@ -0,0 +1,3 @@ +from ._insert import insert_multifurcating + +__all__ = ['insert_multifurcating'] From 1b6b8fb4939062067e914d5ab94a743d5bc1268f Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 17:18:07 -0800 Subject: [PATCH 10/17] add click dep --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index a2e1688..f7f783d 100644 --- a/setup.py +++ b/setup.py @@ -124,6 +124,7 @@ def run(self): 'nose >= 1.3.7', 'cython >= 0.24.1', 'pandas', + 'click', 'scikit-bio >= 0.5.0, < 0.6.0'], long_description=long_description, cmdclass={'build_py': BitArrayBuild, From 575c2da3eb741ae288b330e82cbf9425831695e1 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 17:36:35 -0800 Subject: [PATCH 11/17] doc for doing insertion --- README.md | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0004a51..787fe33 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Improved Octo Waddle -------------------- An implementation of the balanced parentheses tree structure as described by -Cordova and Navarro (http://www.dcc.uchile.cl/~gnavarro/ps/tcs16.2.pdf). +[Cordova and Navarro](http://www.dcc.uchile.cl/~gnavarro/ps/tcs16.2.pdf). Install notes ------------- @@ -12,6 +12,29 @@ problem of requiring numpy and cython for setup.py to execute. The package is named iow in pypi as "bp" was taken at time of registration. ``` -conda install numpy cython -pip install iow +$ conda install numpy cython +$ pip install iow +``` + +Fragment insertion +------------------ + +BP supports the [jplace format](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009). Fragments can be inserted using either fully-resolved or multifurcation mode to resolve multiple placements to the same edge. In fully resolved, the edge placed against is broken N times where N is the number of fragments on the edge. In multifurcation, a new node is constructed as the average of the distal length for the N fragments, and a separate multifurcation node is added which encompasses the placed fragments. + +Insertions can be handled by the command line following install: + +``` +$ bp placement --help +Usage: bp placement [OPTIONS] + +Options: + --placements PATH jplace formatted data [required] + --output PATH Where to write the resulting newick + [required] + + --method [fully-resolved|multifurcating] + Whether to fully resolve or multifurcate + [required] + + --help Show this message and exit. ``` From 697fb541a27fe2cfd266bbfb879236c04e76a9c8 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 17:38:12 -0800 Subject: [PATCH 12/17] Minor comment --- bp/GPL/_insert.pyx | 1 + bp/_insert.pyx | 1 + 2 files changed, 2 insertions(+) diff --git a/bp/GPL/_insert.pyx b/bp/GPL/_insert.pyx index 9ead234..7651ae9 100644 --- a/bp/GPL/_insert.pyx +++ b/bp/GPL/_insert.pyx @@ -35,6 +35,7 @@ def insert_multifurcating(object placements, BP bptree): skbio.TreeNode A tree with the fragments placed """ + # TODO: profile, type and re-profile placements, sktree, node_lookup = \ _insert_setup(placements, bptree, 'multifurcating') diff --git a/bp/_insert.pyx b/bp/_insert.pyx index 0eda8fe..047bd85 100644 --- a/bp/_insert.pyx +++ b/bp/_insert.pyx @@ -79,6 +79,7 @@ def insert_fully_resolved(object placements, BP bptree): skbio.TreeNode A tree with the fragments placed """ + # TODO: profile, type and re-profile placements, sktree, node_lookup = \ _insert_setup(placements, bptree, 'fully_resolved') From 7e6e8797eeeb8af4243336f0284fbf98b7ace2e1 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 11 Jan 2022 17:39:44 -0800 Subject: [PATCH 13/17] extra whitespace --- bp/GPL/_insert.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/bp/GPL/_insert.pyx b/bp/GPL/_insert.pyx index 7651ae9..005c795 100644 --- a/bp/GPL/_insert.pyx +++ b/bp/GPL/_insert.pyx @@ -124,5 +124,4 @@ def insert_multifurcating(object placements, BP bptree): new_parent.length = existing_node.length - min_distal existing_node.length = min_distal - return sktree From a648c91ba7edea7705d17d04c2b0d83d99f72562 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Wed, 12 Jan 2022 12:27:24 -0800 Subject: [PATCH 14/17] minor tweak to inner loop --- bp/_insert.pyx | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/bp/_insert.pyx b/bp/_insert.pyx index 047bd85..39e4f7a 100644 --- a/bp/_insert.pyx +++ b/bp/_insert.pyx @@ -86,26 +86,30 @@ def insert_fully_resolved(object placements, BP bptree): for edge, edge_grp in placements.groupby('edge_num'): existing_node = node_lookup[edge] current_parent = existing_node.parent + + # break the edge + current_parent.remove(existing_node) + existing_node.parent = None + existing_length = existing_node.length for _, fragment in edge_grp.iterrows(): distal_length = fragment['distal_length'] fragment_node = fragment['node'] fragment_parent = fragment['parent'] - # break the edge - current_parent.remove(existing_node) - existing_node.parent = None - # update branch lengths - fragment_parent.length = existing_node.length - distal_length - existing_node.length = distal_length + fragment_parent.length = existing_length - distal_length + existing_length = distal_length # attach the nodes fragment_parent.append(fragment_node) - fragment_parent.append(existing_node) current_parent.append(fragment_parent) # update current_parent = fragment_parent + + existing_node.length = existing_length + current_parent.append(existing_node) + existing_node.length = distal_length return sktree From 09196f287ce9730e703818b9877ce3ae123a4e6f Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 20 Jan 2022 08:45:58 -0800 Subject: [PATCH 15/17] address @antgonza's comments --- bp/_io.pyx | 16 +++++++++++++--- bp/tests/test_io.py | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/bp/_io.pyx b/bp/_io.pyx index ace4a40..4e3412c 100644 --- a/bp/_io.pyx +++ b/bp/_io.pyx @@ -16,7 +16,10 @@ cdef inline np.double_t length_from_edge(unicode token): Py_ssize_t split_idx # 0.12345{0123} -> 0.12345 - split_idx = token.find('{') + # OR 0.12345[0123] -> 0.12345 + split_idx_curly = token.find('{') + split_idx_square = token.find('[') + split_idx = max(split_idx_curly, split_idx_square) if split_idx == -1: return np.double(token) else: @@ -29,7 +32,10 @@ cdef inline np.int32_t number_from_edge(unicode token): Py_ssize_t end # 0.12345{0123} -> 0123 - split_idx = token.find('{') + # OR 0.12345[0123] -> 0.12345 + split_idx_curly = token.find('{') + split_idx_square = token.find('[') + split_idx = max(split_idx_curly, split_idx_square) if split_idx == -1: return 0 else: @@ -67,7 +73,7 @@ cdef void _set_node_metadata(np.uint32_t ptr, unicode token, length = length_from_edge(token_parsed) edge = number_from_edge(token_parsed) name = name.strip("'").strip() - elif u'{' in token: + elif u'{' in token or u'[' in token: # strip as " {123}" is valid? token = token.strip() end = len(token) @@ -342,6 +348,10 @@ def parse_jplace(object data): placement = placement_data[placement_idx] placement_inner_data = placement['p'] + + if 'n' not in placement: + raise KeyError("jplace parsing limited to entries with 'n' keys") + fragments = placement['n'] n_fragments = len(fragments) diff --git a/bp/tests/test_io.py b/bp/tests/test_io.py index d191c96..880bb9a 100644 --- a/bp/tests/test_io.py +++ b/bp/tests/test_io.py @@ -171,6 +171,20 @@ def get_data_path(self, filename): return pkg_resources.resource_filename(self.package, 'data/%s' % filename) + def test_place_jplace_square_braces(self): + self.jplacedata = json.loads(self.jplacedata) + treestr = self.jplacedata['tree'] + treestr = re.sub(r"{(\d+)}", r"[\1]", treestr) + self.jplacedata['tree'] = treestr + self.jplacedata = json.dumps(self.jplacedata) + + exp_tree = self.tree + obs_df, obs_tree = parse_jplace(self.jplacedata) + obs_tree = to_skbio_treenode(obs_tree) + self.assertEqual(obs_tree.compare_rfd(exp_tree), 0) + for n in obs_tree.traverse(include_self=False): + self.assertTrue(n.edge_num >= 0) + def test_parse_jplace_simple(self): columns = ['fragment', 'edge_num', 'likelihood', 'like_weight_ratio', 'distal_length', 'pendant_length'] From c53b2cc66b624b64fa6c26cd71659d10e0b36d1f Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 20 Jan 2022 08:52:42 -0800 Subject: [PATCH 16/17] comment on caveats --- bp/_io.pyx | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/bp/_io.pyx b/bp/_io.pyx index 4e3412c..c8c506f 100644 --- a/bp/_io.pyx +++ b/bp/_io.pyx @@ -326,7 +326,19 @@ cdef inline Py_ssize_t _ctoken(unicode data, Py_ssize_t datalen, Py_ssize_t star def parse_jplace(object data): - """Takes a jplace string, returns a DataFrame of placements and the tree""" + """Takes a jplace string, returns a DataFrame of placements and the tree + + Implementation specific caveats: + + 1) we do not support multiplicities. placements are required to have an "n" + entry, and we ignore "nm" + 2) Matsen et al (https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009) + define [] for denoting edge labels and {} for denoting edge numbers. We + currently support either [] OR {}, we do not support edges with both. + In addition, we REQUIRE the edge labels if specified to be integer. + + If either of these caveats are problems, then we need to modify the code. + """ cdef: dict as_json list fields, placements, fragments, p, placement_data, From 9a4089f5cac3cde2d16f952380d3b33df78f64c5 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 21 Jan 2022 16:02:26 -0800 Subject: [PATCH 17/17] Addressing @antgonza's comments --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 787fe33..6f65eb6 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,22 @@ problem of requiring numpy and cython for setup.py to execute. The package is named iow in pypi as "bp" was taken at time of registration. ``` +$ conda create --name bp python=3.8 +$ conda activate bp $ conda install numpy cython $ pip install iow ``` +Developer notes +--------------- + +If pulling the source, please note that we use a submodule and Github does not +by default bring it down. After a clone, please run: + +``` +$ git submodule update --init --recursive +``` + Fragment insertion ------------------ @@ -38,3 +50,5 @@ Options: --help Show this message and exit. ``` + +Note that the multifurcating support relies on GPL code derived from the Genesis project. That code and LICENSE can be found under `bp/GPL`.