Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Task expandflag #181

Merged
merged 3 commits into from
Jun 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ariba/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
'ref_seq_chooser',
'report',
'report_filter',
'report_flag_expander',
'scaffold_graph',
'samtools_variants',
'sequence_metadata',
Expand Down
3 changes: 3 additions & 0 deletions ariba/flag.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,6 @@ def to_long_string(self):
def has(self, s):
return self.flags[s]


def to_comma_separated_string(self):
return ','.join([f for f in flags_in_order if self.flags[f]])
37 changes: 37 additions & 0 deletions ariba/report_flag_expander.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import copy
import sys

import pyfastaq

from ariba import flag

class Error (Exception): pass

class ReportFlagExpander:
def __init__(self, infile, outfile):
self.infile = infile
self.outfile = outfile


def run(self):
f_in = pyfastaq.utils.open_file_read(self.infile)
f_out = pyfastaq.utils.open_file_write(self.outfile)
flag_index = None

for line in f_in:
fields = line.rstrip().split()

if flag_index is None:
try:
flag_index = fields.index('flag')
except:
raise Error('"flag" column not found in first line of file ' + self.infile +'. Cannot continue')
else:
f = flag.Flag(int(fields[flag_index]))
fields[flag_index] = f.to_comma_separated_string()

print(*fields, sep='\t', file=f_out)

f_in.close()
f_out.close()

1 change: 1 addition & 0 deletions ariba/tasks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
__all__ = [
'aln2meta',
'expandflag',
'flag',
'getref',
'micplot',
Expand Down
8 changes: 8 additions & 0 deletions ariba/tasks/expandflag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import argparse
import sys
import ariba

def run(options):
expander = ariba.report_flag_expander.ReportFlagExpander(options.infile, options.outfile)
expander.run()

3 changes: 3 additions & 0 deletions ariba/tests/data/report_flag_expander.run.in.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#ariba column1 flag foo
name 1 1 foo
name 2 27 bar
3 changes: 3 additions & 0 deletions ariba/tests/data/report_flag_expander.run.out.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#ariba column1 flag foo
name 1 assembled foo
name 2 assembled,assembled_into_one_contig,complete_gene,unique_contig bar
8 changes: 8 additions & 0 deletions ariba/tests/flag_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,11 @@ def test_has(self):
self.assertFalse(f.has(x))
f.add(x)
self.assertTrue(f.has(x))


def test_to_comma_separated_string(self):
'''Test to_comma_separated_string'''
f = flag.Flag(27)
expected = 'assembled,assembled_into_one_contig,complete_gene,unique_contig'
self.assertEqual(expected, f.to_comma_separated_string())

20 changes: 20 additions & 0 deletions ariba/tests/report_flag_expander_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import unittest
import os
import filecmp
from ariba import report_flag_expander

modules_dir = os.path.dirname(os.path.abspath(report_flag_expander.__file__))
data_dir = os.path.join(modules_dir, 'tests', 'data')


class TestReportFlagExpander(unittest.TestCase):
def test_run(self):
'''test run'''
infile = os.path.join(data_dir, 'report_flag_expander.run.in.tsv')
expected = os.path.join(data_dir, 'report_flag_expander.run.out.tsv')
tmp_out = 'tmp.report_flag_expander.out.tsv'
expander = report_flag_expander.ReportFlagExpander(infile, tmp_out)
expander.run()
self.assertTrue(filecmp.cmp(expected, tmp_out, shallow=False))
os.unlink(tmp_out)

12 changes: 12 additions & 0 deletions scripts/ariba
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,18 @@ subparser_aln2meta.add_argument('outprefix', help='Prefix of output filenames')
subparser_aln2meta.set_defaults(func=ariba.tasks.aln2meta.run)


#---------------------------- expandflag ------------------------------
subparser_expandflag = subparsers.add_parser(
'expandflag',
help='Expands flag column of report file',
usage='ariba expandflag <in.report.tsv> <out.tsv',
description='Expands the flag column in a report file from number to comma-separated list of flag bits',
)

subparser_expandflag.add_argument('infile', help='Name of input report TSV file')
subparser_expandflag.add_argument('outfile', help='Name of output report TSV file')
subparser_expandflag.set_defaults(func=ariba.tasks.expandflag.run)


#---------------------------- flag ------------------------------------
subparser_flag = subparsers.add_parser(
Expand Down