Skip to content

Commit

Permalink
reHC*, refactored old code in order to allow multi-allelic genotypes …
Browse files Browse the repository at this point in the history
…and haplotypes.
  • Loading branch information
yp committed Jun 1, 2011
1 parent e223c11 commit 23fdd18
Show file tree
Hide file tree
Showing 7 changed files with 371 additions and 318 deletions.
320 changes: 118 additions & 202 deletions include/haplotypes_genotypes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "assertion.hpp"
#include "utility.hpp"


/**
*
*
Expand All @@ -56,126 +57,89 @@
*
**/

template <int T_HOMO1= 0, int T_HOMO2= 1, int T_HETER= 2, int T_MISS= 5>
class single_biallelic_genotype_t
:public enum_like_t<single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>, 4, 3> {
private:
typedef unsigned int allele_t;

BOOST_STATIC_ASSERT( (0 <= T_HOMO1) && (T_HOMO1 < 10));
BOOST_STATIC_ASSERT( (0 <= T_HOMO2) && (T_HOMO2 < 10));
BOOST_STATIC_ASSERT( (0 <= T_HETER) && (T_HETER < 10));
BOOST_STATIC_ASSERT( (0 <= T_MISS) && (T_MISS < 10));
BOOST_STATIC_ASSERT( (T_HOMO1 != T_HOMO2) && (T_HOMO1 != T_HETER) &&
(T_HOMO1 != T_MISS) && (T_HOMO2 != T_HETER) &&
(T_HOMO2 != T_MISS) && (T_HETER != T_MISS) );

typedef enum_like_t<single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>, 4, 3> base;

enum _values {
_HOMO1,
_HOMO2,
_HETER,
_MISS
};

single_biallelic_genotype_t(const int g)
: base(g)
{
}
class single_multiallelic_genotype_t {

public:
private:
allele_t _allele1;
allele_t _allele2;

single_biallelic_genotype_t()
: base(_MISS)
{}
public:

static const single_biallelic_genotype_t HOMO1;
static const single_biallelic_genotype_t HOMO2;
static const single_biallelic_genotype_t HETER;
static const single_biallelic_genotype_t MISS;
static const single_multiallelic_genotype_t MISS;
static const single_multiallelic_genotype_t HOMO1;
static const single_multiallelic_genotype_t HOMO2;
static const single_multiallelic_genotype_t HETER12;

static const int int_values[];
static const std::string str_values[];
static const single_biallelic_genotype_t enum_values[];
};
single_multiallelic_genotype_t()
:_allele1(0), _allele2(0)
{};

explicit single_multiallelic_genotype_t(const allele_t allele1,
const allele_t allele2)
:_allele1(std::min(allele1, allele2)),
_allele2(std::max(allele1, allele2))
{};

typedef single_biallelic_genotype_t<> std_single_biallelic_genotype_t;
single_multiallelic_genotype_t(const single_multiallelic_genotype_t& g)
:_allele1(g._allele1), _allele2(g._allele2)
{};

template <int T_HOMO1, int T_HOMO2, int T_HETER, int T_MISS>
const single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>
single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::HOMO1(_HOMO1);
allele_t allele1() const {
return _allele1;
};

template <int T_HOMO1, int T_HOMO2, int T_HETER, int T_MISS>
const single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>
single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::HOMO2(_HOMO2);
allele_t allele2() const {
return _allele2;
};

template <int T_HOMO1, int T_HOMO2, int T_HETER, int T_MISS>
const single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>
single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::HETER(_HETER);
void set_alleles(const allele_t allele1,
const allele_t allele2) {
if (((allele1 == 0)&&(allele2 != 0))||
((allele1 != 0)&&(allele2 == 0))) {
MY_FAIL;
}
_allele1= std::min(allele1, allele2);
_allele2= std::max(allele1, allele2);
};

template <int T_HOMO1, int T_HOMO2, int T_HETER, int T_MISS>
const single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>
single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::MISS(_MISS);
};

template <int T_HOMO1, int T_HOMO2, int T_HETER, int T_MISS>
const int single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::int_values[]= {T_HOMO1, T_HOMO2, T_HETER, T_MISS};
bool
operator==(const single_multiallelic_genotype_t& g1,
const single_multiallelic_genotype_t& g2);

template <int T_HOMO1, int T_HOMO2, int T_HETER, int T_MISS>
const std::string single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::str_values[]= {
std::string(1, '0'+T_HOMO1),
std::string(1, '0'+T_HOMO2),
std::string(1, '0'+T_HETER),
std::string(1, '0'+T_MISS)
};
bool
operator!=(const single_multiallelic_genotype_t& g1,
const single_multiallelic_genotype_t& g2);

template <int T_HOMO1, int T_HOMO2, int T_HETER, int T_MISS>
const single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>
single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::enum_values[]= {
single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::HOMO1,
single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::HOMO2,
single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::HETER,
single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::MISS
};
bool
operator<=(const single_multiallelic_genotype_t& g1,
const single_multiallelic_genotype_t& g2);

bool
operator<(const single_multiallelic_genotype_t& g1,
const single_multiallelic_genotype_t& g2);

template <int T_HOMO1, int T_HOMO2, int T_HETER, int T_MISS>
bool is_genotyped(const single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>& g) {
return g != single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::MISS;
}
std::ostream&
operator<<(std::ostream& out,
const single_multiallelic_genotype_t& g);

template <typename S_GEN_T>
bool is_genotyped(const S_GEN_T& g) {
return g != S_GEN_T::MISS;
};
std::istream&
operator>>(std::istream& in,
single_multiallelic_genotype_t& g);

template <int T_HOMO1, int T_HOMO2, int T_HETER, int T_MISS>
bool is_homozigous(const single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>& g) {
return
is_genotyped(g) &&
(g != single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::HETER);
}
bool
is_genotyped(const single_multiallelic_genotype_t& g);

template <typename S_GEN_T>
bool is_homozygous(const S_GEN_T& g) {
return
is_genotyped(g) &&
(g != S_GEN_T::HETER);
};
bool
is_homozygous(const single_multiallelic_genotype_t& g);

template <int T_HOMO1, int T_HOMO2, int T_HETER, int T_MISS>
bool is_heterozygous(const single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>& g) {
return
is_genotyped(g) &&
(g == single_biallelic_genotype_t<T_HOMO1, T_HOMO2, T_HETER, T_MISS>::HETER);
}
bool
is_heterozygous(const single_multiallelic_genotype_t& g);

template <typename S_GEN_T>
bool is_heterozygous(const S_GEN_T& g) {
return
is_genotyped(g) &&
(g == S_GEN_T::HETER);
};

/**
*
Expand All @@ -188,127 +152,78 @@ bool is_heterozygous(const S_GEN_T& g) {
**/


template <int T_ALLELE1= 1, int T_ALLELE2= 2, int T_MISS= 0>
class single_biallelic_haplotype_t
:public enum_like_t<single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>, 3, 2> {
private:

BOOST_STATIC_ASSERT( (0 <= T_ALLELE1) && (T_ALLELE1 < 10));
BOOST_STATIC_ASSERT( (0 <= T_ALLELE2) && (T_ALLELE2 < 10));
BOOST_STATIC_ASSERT( (0 <= T_MISS) && (T_MISS < 10));
BOOST_STATIC_ASSERT( (T_ALLELE1 != T_ALLELE2) && (T_ALLELE1 != T_MISS) && (T_ALLELE2 != T_MISS) );

typedef enum_like_t<single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>, 3, 2> base;

enum _values {
_ALLELE1,
_ALLELE2,
_MISS
};
class single_multiallelic_haplotype_t {
private:
allele_t _allele;
public:

single_biallelic_haplotype_t(const int h)
: base(h)
{
}
static const single_multiallelic_haplotype_t MISS;
static const single_multiallelic_haplotype_t ALLELE1;
static const single_multiallelic_haplotype_t ALLELE2;
static const single_multiallelic_haplotype_t ALLELE3;
static const single_multiallelic_haplotype_t ALLELE4;
static const single_multiallelic_haplotype_t ALLELE5;

public:
single_multiallelic_haplotype_t()
:_allele(0)
{};

single_biallelic_haplotype_t()
: base(_MISS)
{}
explicit single_multiallelic_haplotype_t(const allele_t allele)
:_allele(allele)
{};

static const single_biallelic_haplotype_t ALLELE1;
static const single_biallelic_haplotype_t ALLELE2;
static const single_biallelic_haplotype_t MISS;
single_multiallelic_haplotype_t(const single_multiallelic_haplotype_t& h)
:_allele(h._allele)
{};

static const int int_values[];
static const std::string str_values[];
static const single_biallelic_haplotype_t enum_values[];
allele_t allele() const {
return _allele;
};

};

template <typename h_t, typename g_t>
const h_t& homozygous_to_haplotype(const g_t& g) {
MY_ASSERT_DBG( (g == g_t::HOMO1) || (g == g_t::HOMO2) );
if ( g == g_t::HOMO1 ) {
return h_t::ALLELE1;
} else if ( g == g_t::HOMO2 ) {
return h_t::ALLELE2;
} else {
MY_FAIL;
return h_t::MISS;
}
};
bool
operator==(const single_multiallelic_haplotype_t& h1,
const single_multiallelic_haplotype_t& h2);

bool
operator!=(const single_multiallelic_haplotype_t& h1,
const single_multiallelic_haplotype_t& h2);

template <typename h_t, typename g_t>
bool
haplotype_genotype_consistent(const h_t& h1, const h_t& h2,
const g_t& g) {
if ((h1 == h_t::MISS) && (h2 == h_t::MISS))
return true;
if (! is_genotyped(g))
return true;
if ((h1 == h_t::MISS) && (h2 != h_t::MISS))
return haplotype_genotype_consistent(h2, h1, g);

MY_ASSERT( h1 != h_t::MISS );
MY_ASSERT( is_genotyped(g) );

if (is_homozigous(g)) {
return
((h2 == h_t::MISS) || (h1 == h2)) &&
(h1 == homozygous_to_haplotype<h_t, g_t>(g));
} else {
return (h2 == h_t::MISS) || (h1 != h2);
}
};
operator<=(const single_multiallelic_haplotype_t& h1,
const single_multiallelic_haplotype_t& h2);

template <typename h_t, typename g_t>
bool
strict_haplotype_genotype_consistent(const h_t& h1, const h_t& h2,
const g_t& g) {
if ((h1 == h_t::MISS) || (h2 == h_t::MISS))
return false;
else
return haplotype_genotype_consistent(h1, h2, g);
}
operator<(const single_multiallelic_haplotype_t& h1,
const single_multiallelic_haplotype_t& h2);

typedef single_biallelic_haplotype_t<> std_single_biallelic_haplotype_t;
std::ostream&
operator<<(std::ostream& out,
const single_multiallelic_haplotype_t& h);

template <int T_ALLELE1, int T_ALLELE2, int T_MISS>
const single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>
single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>::ALLELE1(_ALLELE1);
std::istream&
operator>>(std::istream& in,
single_multiallelic_haplotype_t& h);

template <int T_ALLELE1, int T_ALLELE2, int T_MISS>
const single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>
single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>::ALLELE2(_ALLELE2);
bool
is_missing(const single_multiallelic_haplotype_t& h);

template <int T_ALLELE1, int T_ALLELE2, int T_MISS>
const single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>
single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>::MISS(_MISS);

template <int T_ALLELE1, int T_ALLELE2, int T_MISS>
const int single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>::int_values[]= {
T_ALLELE1,
T_ALLELE2,
T_MISS
};
const single_multiallelic_haplotype_t
homozygous_to_haplotype(const single_multiallelic_genotype_t& g);

template <int T_ALLELE1, int T_ALLELE2, int T_MISS>
const std::string single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>::str_values[]= {
std::string(1, '0'+T_ALLELE1),
std::string(1, '0'+T_ALLELE2),
std::string(1, '0'+T_MISS),
};
bool
haplotype_genotype_consistent(const single_multiallelic_haplotype_t& h1,
const single_multiallelic_haplotype_t& h2,
const single_multiallelic_genotype_t& g);

bool
strict_haplotype_genotype_consistent(const single_multiallelic_haplotype_t& h1,
const single_multiallelic_haplotype_t& h2,
const single_multiallelic_genotype_t& g);

template <int T_ALLELE1, int T_ALLELE2, int T_MISS>
const single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>
single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>::enum_values[]= {
single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>::ALLELE1,
single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>::ALLELE2,
single_biallelic_haplotype_t<T_ALLELE1, T_ALLELE2, T_MISS>::MISS
};


/**
Expand Down Expand Up @@ -431,8 +346,8 @@ class generic_fixlen_vector_t {
const typename v_t::base* v1it= begin();
const typename v_t::base* v2it= v.begin();
for (; v1it != end(); ++v1it, ++v2it) {
if ( (*v1it != v_t::base::MISS) &&
(*v2it != v_t::base::MISS) &&
if ( (is_genotyped(*v1it)) &&
(is_genotyped(*v2it)) &&
(*v1it != *v2it) ) {
return false;
}
Expand Down Expand Up @@ -513,8 +428,9 @@ strict_multilocus_mendelian_consistent(const h_t& hpf,
return std::min(recomb1, recomb2);
}

typedef generic_fixlen_vector_t<std_single_biallelic_genotype_t> genotype_t;
typedef generic_fixlen_vector_t<std_single_biallelic_haplotype_t> haplotype_t;

typedef generic_fixlen_vector_t<single_multiallelic_genotype_t> genotype_t;
typedef generic_fixlen_vector_t<single_multiallelic_haplotype_t> haplotype_t;

template <class _base_t,
class _reader,
Expand Down
Loading

0 comments on commit 23fdd18

Please sign in to comment.