Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better cluster naming #62

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 35 additions & 6 deletions ariba/cdhit.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def __init__(
length_diff_cutoff=0.9,
verbose=False,
cd_hit_est='cd-hit-est',
rename_suffix='x',
):

if not os.path.exists(infile):
Expand All @@ -28,23 +29,29 @@ def __init__(
self.length_diff_cutoff = length_diff_cutoff
self.verbose = verbose
self.cd_hit_est = cd_hit_est
self.rename_suffix = rename_suffix


def fake_run(self):
'''Doesn't actually run cd-hit. Instead, puts each input sequence into its own cluster. So it's as if cdhit was run, but didn't cluster anything'''
tmpdir = tempfile.mkdtemp(prefix='tmp.run_cd-hit.', dir=os.getcwd())
tmp_fa = os.path.join(tmpdir, 'cdhit.fa')
clusters = {}
seq_reader = pyfastaq.sequences.file_reader(self.infile)
f = pyfastaq.utils.open_file_write(self.outfile)
f = pyfastaq.utils.open_file_write(tmp_fa)

for seq in seq_reader:
if seq.id in clusters:
pyfastaq.utils.close(f)
shutil.rmtree(tmpdir)
raise Error('Sequence name "' + seq.id + '" not unique. Cannot continue')

clusters[seq.id] = {seq.id}
print(seq, file=f)

pyfastaq.utils.close(f)
clusters = self._rename_clusters(clusters, tmp_fa, self.outfile)
shutil.rmtree(tmpdir)
return clusters


Expand Down Expand Up @@ -87,6 +94,32 @@ def _parse_cluster_info_file(infile, cluster_representatives):
return clusters


@staticmethod
def _rename_clusters(clusters_dict, infile, outfile, rename_suffix='x'):
new_clusters_dict = {}
freader = pyfastaq.sequences.file_reader(infile)
f_out = pyfastaq.utils.open_file_write(outfile)

for seq in freader:
original_name = seq.id
assert original_name in clusters_dict
new_name = original_name.split('.')[0] + '.' + rename_suffix

if new_name in new_clusters_dict:
suffix = 2
while new_name + '.' + str(suffix) in new_clusters_dict:
suffix += 1
new_name += '.' + str(suffix)

new_clusters_dict[new_name] = clusters_dict[original_name]
seq.id = new_name
print(seq, file=f_out)

pyfastaq.utils.close(f_out)

return new_clusters_dict


def run(self):
tmpdir = tempfile.mkdtemp(prefix='tmp.run_cd-hit.', dir=os.getcwd())
cdhit_fasta = os.path.join(tmpdir, 'cdhit')
Expand All @@ -106,11 +139,7 @@ def run(self):
common.syscall(cmd, verbose=self.verbose)
cluster_representatives = self._get_ids(cdhit_fasta)
clusters = self._parse_cluster_info_file(cluster_info_outfile, cluster_representatives)

try:
os.rename(cdhit_fasta, self.outfile)
except:
raise Error('Error rname ' + cdhit_fasta + ' ' + self.outfile + '. Cannot continue')
clusters = self._rename_clusters(clusters, cdhit_fasta, self.outfile, rename_suffix=self.rename_suffix)

shutil.rmtree(tmpdir)
return clusters
Expand Down
3 changes: 2 additions & 1 deletion ariba/reference_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,8 @@ def cluster_with_cdhit(self, inprefix, outprefix, seq_identity_threshold=0.9, th
threads=threads,
length_diff_cutoff=length_diff_cutoff,
verbose=verbose,
cd_hit_est=cd_hit_est
cd_hit_est=cd_hit_est,
rename_suffix = seqs_type[0]
)

if nocluster:
Expand Down
2 changes: 1 addition & 1 deletion ariba/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
'ref_type', # 1 type of reference sequence (presence/absence, variants only, noncoding)
'flag', # 2 cluster flag
'reads', # 3 number of reads in this cluster
'cluster_rep', # 4 name of cluster representitive from cd hit
'cluster', # 4 name of cluster
'ref_len', # 5 length of reference sequence
'ref_base_assembled', # 6 number of reference nucleotides assembled by this contig
'pc_ident', # 7 %identity between ref sequence and contig
Expand Down
36 changes: 29 additions & 7 deletions ariba/tests/cdhit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,29 @@ def test_parse_cluster_info_file(self):
self.assertEqual(expected_clusters, got_clusters)


def test_rename_clusters(self):
'''test _rename_clusters'''
infile = os.path.join(data_dir, 'cdhit_test_rename_clusters.in.fa')
tmpfile = 'tmp.test_rename_clusters.out.fa'
expected_file = os.path.join(data_dir, 'cdhit_test_rename_clusters.expected.fa')

clusters_in = {
'seq.foo': {'seq.foo', 'seq'},
'seq.bar': {'seq.bar', 'seq3.spam'},
'seq4.eggs': {'seq4.eggs'}
}
tmp_out = 'tmp.test_rename_clusters.out.fa'
expected_clusters = {
'seq.x': {'seq.foo', 'seq'},
'seq.x.2': {'seq.bar', 'seq3.spam'},
'seq4.x': {'seq4.eggs'}
}
got = cdhit.Runner._rename_clusters(clusters_in, infile, tmpfile)
self.assertEqual(expected_clusters, got)
self.assertTrue(filecmp.cmp(expected_file, tmpfile, shallow=False))
os.unlink(tmpfile)


def test_run(self):
'''test run'''
infile = os.path.join(data_dir, 'cdhit_test_run.in.fa')
Expand All @@ -44,8 +67,8 @@ def test_run(self):
r = cdhit.Runner(infile, tmpfile, cd_hit_est=extern_progs.exe('cdhit'))
clusters = r.run()
expected_clusters = {
'seq1': {'seq1', 'seq2', 'seq3'},
'seq4': {'seq4'},
'seq1.x': {'seq1', 'seq2', 'seq3'},
'seq4.x': {'seq4'},
}
self.assertEqual(clusters, expected_clusters)
self.assertTrue(filecmp.cmp(tmpfile, expected_outfile, shallow=False))
Expand All @@ -60,10 +83,10 @@ def test_fake_run(self):
r = cdhit.Runner(infile, tmpfile, cd_hit_est=extern_progs.exe('cdhit'))
clusters = r.fake_run()
expected_clusters = {
'seq1': {'seq1'},
'seq2': {'seq2'},
'seq3': {'seq3'},
'seq4': {'seq4'},
'seq1.x': {'seq1'},
'seq2.x': {'seq2'},
'seq3.x': {'seq3'},
'seq4.x': {'seq4'},
}
self.assertEqual(clusters, expected_clusters)
self.assertTrue(filecmp.cmp(tmpfile, expected_outfile, shallow=False))
Expand All @@ -77,5 +100,4 @@ def test_fake_run_fail(self):
r = cdhit.Runner(infile, tmpfile, cd_hit_est=extern_progs.exe('cdhit'))
with self.assertRaises(cdhit.Error):
clusters = r.fake_run()
os.unlink(tmpfile)

8 changes: 4 additions & 4 deletions ariba/tests/data/cdhit_test_fake_run.out.fa
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
>seq1
>seq1.x
TGGGGAATATAGTGGGTACTGTGTGTTGAGCGATTCCCGAGCCCTATGCAGGCTTGTGAA
GGAGGTCGTGGGATGCTCGTGTCTTCACGAACTTAAAGCCCCTCTTTGGCTTAGGGCCGG
AGATCGCGTCATAAGTGTAATCTAGCGTTGCAGGTATGGGTAAGGCCATACACTTAGCTC
Expand All @@ -8,7 +8,7 @@ ACGGTGAATAGAATCTTGGCATACGGTTAATCAGTGCTCTGCTAGTCCTGCTTTCTCTAA
GCTTATAGAATTCCTGATATATTAAGTAACTTTTCCATTCCATAGACGCGACGAACTGGA
TACACTCACGTAGGCTCGATCATGAAAGTGAAAGGCGCTCCAAGTTGCGAATTGAAACAA
AACTCTATCGTAGGGTCGCA
>seq2
>seq2.x
TGGGGAATATAGTGGGTACTGTGTGTTGAGCGATTCCCGAGCCCTATGCAGGCTTGTGAA
GGAGTTCGTGGGATGCTCGTGTCTTCACGAACTTAAAGCCCCTCTTTGGCTTAGGGCCGG
AGATCGCGTCATAAGTGTAATCTAGCGTTGCAGGTATGGGTAAGGCCATACACTTAGCTC
Expand All @@ -18,7 +18,7 @@ ACGGTGAATGGAATCTTGGCATACGGTTAATCAGTGCTCTGCTAGTCCTGCTTTCTCTAA
GCTTATAGAATTCCTGATATATTAAGTAACTTTTCCATTCCATAGACGCGACGAACTGGA
TACACTCACGTATGCTCGATCATGAAAGTGAAAGGCGCTCCAAGTTGCGAATTGAAACAA
AACTCTATGTAGGGTCGCA
>seq3
>seq3.x
TGGGGAATATAGTGGGTACTGTGTGTTGAGCGATTCCCGAGTTCTATGCAGGCTTGTGAA
GGAGTTCGTGGGATGCTCGTGTCTTCACGAACTTAAAGCCCCTCTTTGGCTTAGGGCCGG
AGATCGCGTCATAAGTGTAATCTAGCGTTGCAGGTATGGGTAGGCCATACACTTAGCTCA
Expand All @@ -28,7 +28,7 @@ ACGGTGAATGGAATCTTGGCATACGGTTAATCAGTGCTCTGCTAGTCCTGCTTTCTCTAA
GCTTATAGAATTCCTGATATATTAAGTAACTTTTCCATTCCATAGACGCGACGAACTGGA
TACACTCACGTATGCTCGATCATGAAAGTGAAAGGCGCTCCAAGATGCGAATTGAAACAA
AACTCTATGTAGGGTCGCA
>seq4
>seq4.x
CAAGGGCGGATTCGAACGGGTAACAGGGATCTGATTGGCTCCGGCCAGCTGGTGGATATC
TGCATCCGTTGACCCACCAACTTTAGCAGTATAGACCCTAAACTGGCATGGTGCCCTTTT
TATATCCCGATGCATCTGGAGAAACCGTCAGGACCTCTTAAGCCCCGTGGAGAGCCAAAC
Expand Down
6 changes: 6 additions & 0 deletions ariba/tests/data/cdhit_test_rename_clusters.expected.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>seq.x
AGCT
>seq.x.2
CCC
>seq4.x
AAA
6 changes: 6 additions & 0 deletions ariba/tests/data/cdhit_test_rename_clusters.in.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>seq.foo
AGCT
>seq.bar
CCC
>seq4.eggs
AAA
4 changes: 2 additions & 2 deletions ariba/tests/data/cdhit_test_run.out.fa
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
>seq1
>seq1.x
TGGGGAATATAGTGGGTACTGTGTGTTGAGCGATTCCCGAGCCCTATGCAGGCTTGTGAA
GGAGGTCGTGGGATGCTCGTGTCTTCACGAACTTAAAGCCCCTCTTTGGCTTAGGGCCGG
AGATCGCGTCATAAGTGTAATCTAGCGTTGCAGGTATGGGTAAGGCCATACACTTAGCTC
Expand All @@ -8,7 +8,7 @@ ACGGTGAATAGAATCTTGGCATACGGTTAATCAGTGCTCTGCTAGTCCTGCTTTCTCTAA
GCTTATAGAATTCCTGATATATTAAGTAACTTTTCCATTCCATAGACGCGACGAACTGGA
TACACTCACGTAGGCTCGATCATGAAAGTGAAAGGCGCTCCAAGTTGCGAATTGAAACAA
AACTCTATCGTAGGGTCGCA
>seq4
>seq4.x
CAAGGGCGGATTCGAACGGGTAACAGGGATCTGATTGGCTCCGGCCAGCTGGTGGATATC
TGCATCCGTTGACCCACCAACTTTAGCAGTATAGACCCTAAACTGGCATGGTGCCCTTTT
TATATCCCGATGCATCTGGAGAAACCGTCAGGACCTCTTAAGCCCCGTGGAGAGCCAAAC
Expand Down
2 changes: 1 addition & 1 deletion ariba/tests/data/clusters_test_write_report.tsv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#ref_name ref_type flag reads cluster_rep ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
#ref_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
gene1 line1
gene2 line2
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
presence_absence1 presence_absence2
presence_absence3 presence_absence4
noncoding1
presence_absence1.p presence_absence1 presence_absence2
presence_absence3.p presence_absence3 presence_absence4
noncoding1.n noncoding1
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
>presence_absence1
>presence_absence1.p
ATGGATCGCGAAGCGATGACCCATGAAGCGACCGAACGCGCGAGCACCAACATTAGCCAT
ATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAA
>presence_absence3
>presence_absence3.p
ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTAA
>noncoding1
>noncoding1.n
GTACATTACTGGCGACCCAAGGAAGGGAAATCTGTTAAACATGATCTCGGTAGTCTATAG
AACAGATTTAACATTACCTGGTGTTTGCTCCTTGCATCATCCTCTGACTATTCTAGACCA
GGGAGGACTTTGGTCACGCGCGACCTTGCACTGTGGCGACGCCATAGAACCGTCTACCTG
Expand Down
2 changes: 1 addition & 1 deletion ariba/tests/data/report_filter_test_init_bad.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#ef_name ref_type flag reads cluster_rep ref_len ref_base_assembled pc_ident ctg ctg_len known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
#ef_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 . 500 Description_of_variant.C42T free_text
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 Description_of_variant.A51G free_text2
cluster2 variants_only 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 Description_of_variant.I42L free_text3
2 changes: 1 addition & 1 deletion ariba/tests/data/report_filter_test_init_good.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#ref_name ref_type flag reads cluster_rep ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
#ref_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 10.5 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 . 500 Description_of_variant.C42T free_text
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 10.5 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 Description_of_variant.A51G free_text2
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.2 1300 12.4 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 Description_of_variant.A51G free_text3
Expand Down
2 changes: 1 addition & 1 deletion ariba/tests/data/report_filter_test_load_report_bad.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#ef_name ref_type flag reads cluster_rep ref_len ref_base_assembled pc_ident ctg ctg_len known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
#ef_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 . 500 Description_of_variant.C42T free_text
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 Description_of_variant.A51G free_text2
cluster2 variants_only 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 Description_of_variant.I42L free_text3
2 changes: 1 addition & 1 deletion ariba/tests/data/report_filter_test_load_report_good.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#ref_name ref_type flag reads cluster_rep ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
#ref_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 12.2 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 . 500 Description_of_variant.C42T free_text
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 12.2 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 Description_of_variant.A51G free_text2
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.2 1300 22.2 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 Description_of_variant.A51G free_text3
Expand Down
2 changes: 1 addition & 1 deletion ariba/tests/data/report_filter_test_run.expected.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#ref_name ref_type flag reads cluster_rep ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
#ref_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 12.4 1 SNP n A51G 1 . . 51 51 C 151 151 C 542 . 542 Description_of_variant.A51G free_text2
cluster2 variants_only 179 20000 cluster2 1042 1042 99.0 cluster2.scaffold.1 1442 13.5 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 Description_of_variant.I42L free_text3
cluster4 variants_only 179 20000 cluster4 1042 1042 99.0 cluster4.scaffold.1 1442 14.6 . . . . . . . . . . . . . . . . . free_text3
Expand Down
2 changes: 1 addition & 1 deletion ariba/tests/data/report_filter_test_run.in.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#ref_name ref_type flag reads cluster_rep ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
#ref_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
cluster1 non_coding 27 10000 cluster1 1000 0 99.42 cluster1.scaffold.1 1300 12.4 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 . 500 Description_of_variant.C42T free_text
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 12.4 1 SNP n A51G 1 . . 51 51 C 151 151 C 542 . 542 Description_of_variant.A51G free_text2
cluster2 variants_only 179 20000 cluster2 1042 1042 99.0 cluster2.scaffold.1 1442 13.5 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 Description_of_variant.I42L free_text3
Expand Down
2 changes: 1 addition & 1 deletion ariba/tests/data/report_filter_test_write_report.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#ref_name ref_type flag reads cluster_rep ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
#ref_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 42.4 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 . 500 Description_of_variant.C42T free_text
cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 42.4 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 Description_of_variant.A51G free_text2
cluster2 variants_only 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 42.4 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 Description_of_variant.I42L free_text3
2 changes: 1 addition & 1 deletion ariba/tests/data/summary_test_gather_output_rows.in.1.tsv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#ref_name ref_type flag reads cluster_rep ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
#ref_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
noncoding1 non_coding 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 10.0 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 . 17 noncoding1_n_A14T_N_ref has wild type, reads have variant so should report generic description of noncoding1
presence_absence1 presence_absence 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 20.1 1 SNP p A10V 1 A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence1_p_A10V_N_Ref has wild, reads have variant so report Generic description of presence_absence1
Loading