-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* removing contribution comment, since GH tracks such things * tidying * cli test module, and test data * avoid problematic gt_types attribute in cyvcf2 * more cli tests * lint * update badges * Update snps.missing_gts.vcf Add a test for a double missing variant in a sample. * Update test_cli.py Updating the truth based on the new line in the test vcf for for a double missing variant in a sample. * move `iterate_with_ambiguity_warning` inside spectra function for safety Co-authored-by: Mitchell Robert Vollger <[email protected]>
- Loading branch information
Showing
8 changed files
with
226 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
#! /usr/bin/env python | ||
|
||
import pytest | ||
from mutyper import cli | ||
import argparse | ||
import pandas as pd | ||
import io | ||
|
||
|
||
def test_spectra(capsys): | ||
args = argparse.Namespace( | ||
vcf="tests/test_data/snps.vcf", population=False, randomize=False | ||
) | ||
cli.spectra(args) | ||
captured = capsys.readouterr() | ||
df = pd.read_csv(io.StringIO(captured.out), sep="\t", index_col=0) | ||
df_target = pd.DataFrame( | ||
{"ACA>ATA": [0, 1], "ACC>ATC": [1, 0], "ACG>ATG": [1, 1], "ACT>ATT": [2, 0]}, | ||
index=pd.Index(["sample1", "sample2"], name="sample"), | ||
) | ||
pd.testing.assert_frame_equal(df, df_target) | ||
|
||
|
||
def test_spectra_randomize(capsys): | ||
args = argparse.Namespace( | ||
vcf="tests/test_data/snps.vcf", population=False, randomize=True | ||
) | ||
cli.spectra(args) | ||
captured = capsys.readouterr() | ||
df = pd.read_csv(io.StringIO(captured.out), sep="\t", index_col=0) | ||
# there are two possibilities due to haplotype randomization | ||
df_target1 = pd.DataFrame( | ||
{"ACA>ATA": [0, 1], "ACC>ATC": [1, 0], "ACG>ATG": [1, 0], "ACT>ATT": [1, 0]}, | ||
index=pd.Index(["sample1", "sample2"], name="sample"), | ||
) | ||
df_target2 = pd.DataFrame( | ||
{"ACA>ATA": [0, 1], "ACC>ATC": [1, 0], "ACG>ATG": [0, 1], "ACT>ATT": [1, 0]}, | ||
index=pd.Index(["sample1", "sample2"], name="sample"), | ||
) | ||
|
||
try: | ||
pd.testing.assert_frame_equal(df, df_target1) | ||
except AssertionError: | ||
pd.testing.assert_frame_equal(df, df_target2) | ||
|
||
|
||
def test_spectra_haploid(capsys): | ||
args = argparse.Namespace( | ||
vcf="tests/test_data/snps.haploid.vcf", population=False, randomize=False | ||
) | ||
cli.spectra(args) | ||
captured = capsys.readouterr() | ||
df = pd.read_csv(io.StringIO(captured.out), sep="\t", index_col=0) | ||
df_target = pd.DataFrame( | ||
{"ACA>ATA": [0, 1], "ACC>ATC": [0, 0], "ACG>ATG": [1, 1], "ACT>ATT": [1, 0]}, | ||
index=pd.Index(["sample1", "sample2"], name="sample"), | ||
) | ||
pd.testing.assert_frame_equal(df, df_target) | ||
|
||
|
||
def test_spectra_missing_gts(capsys, caplog): | ||
args = argparse.Namespace( | ||
vcf="tests/test_data/snps.missing_gts.vcf", population=False, randomize=False | ||
) | ||
cli.spectra(args) | ||
captured = capsys.readouterr() | ||
df = pd.read_csv(io.StringIO(captured.out), sep="\t", index_col=0) | ||
df_target = pd.DataFrame( | ||
{"ACA>ATA": [0, 1], "ACC>ATC": [1, 0], "ACG>ATG": [1, 1], "ACT>ATT": [1, 1]}, | ||
index=pd.Index(["sample1", "sample2"], name="sample"), | ||
) | ||
pd.testing.assert_frame_equal(df, df_target) | ||
assert "Ambiguous genotypes found" in caplog.text | ||
|
||
|
||
def test_ksfs(capsys): | ||
args = argparse.Namespace(vcf="tests/test_data/snps.vcf", k=3) | ||
cli.ksfs(args) | ||
captured = capsys.readouterr() | ||
df = pd.read_csv(io.StringIO(captured.out), sep="\t", index_col=0) | ||
df_target = pd.DataFrame( | ||
{ | ||
"ACA>ATA": [1, 0, 0], | ||
"ACC>ATC": [1, 0, 0], | ||
"ACG>ATG": [0, 1, 0], | ||
"ACT>ATT": [0, 1, 0], | ||
}, | ||
index=pd.Index([1, 2, 3], name="sample_frequency"), | ||
) | ||
pd.testing.assert_frame_equal(df, df_target) | ||
|
||
|
||
def test_ksfs_missing_gts(): | ||
args = argparse.Namespace(vcf="tests/test_data/snps.missing_gts.vcf", k=3) | ||
with pytest.raises( | ||
ValueError, match=r"different AN [0-9]* and [0-9]* indicates missing genotypes" | ||
): | ||
cli.ksfs(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
##fileformat=VCFv4.2 | ||
##FILTER=<ID=PASS,Description="All filters passed"> | ||
##contig=<ID=chr1,length=248387328> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> | ||
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> | ||
##INFO=<ID=mutation_type,Number=1,Type=Character,Description="ancestral 3-mer mutation type"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 sample2 | ||
chr1 10 . C T 30 PASS AN=4;AC=1;mutation_type=ACA>ATA GT 0 1 | ||
chr1 20 . C T 30 PASS AN=4;AC=1;mutation_type=ACC>ATC GT 0 0 | ||
chr1 30 . C T 30 PASS AN=4;AC=1;mutation_type=ACG>ATG GT 1 1 | ||
chr1 40 . C T 30 PASS AN=4;AC=2;mutation_type=ACT>ATT GT 1 0 |
Oops, something went wrong.