Skip to content

Commit

Permalink
Remove different citation lists.
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed Mar 10, 2021
1 parent f295c1b commit 5a0048a
Show file tree
Hide file tree
Showing 13 changed files with 96 additions and 108 deletions.
22 changes: 8 additions & 14 deletions stdpopsim/catalog/AraTha/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,25 @@
chromosomes=_chromosomes,
assembly_name=genome_data.data["assembly_name"],
assembly_accession=genome_data.data["assembly_accession"],
mutation_rate_citations=[
citations=[
stdpopsim.Citation(
author="Ossowski et al.",
year=2010,
doi="https://doi.org/10.1126/science.1180677",
reasons={stdpopsim.CiteReason.MUT_RATE},
)
],
recombination_rate_citations=[
),
stdpopsim.Citation(
author="Huber et al.",
year=2014,
doi="https://doi.org/10.1093/molbev/msu247",
reasons={stdpopsim.CiteReason.REC_RATE},
)
],
assembly_citations=[
),
stdpopsim.Citation(
doi="https://doi.org/10.1093/nar/gkm965",
year=2007,
author="Swarbreck et al.",
reasons={stdpopsim.CiteReason.ASSEMBLY},
)
),
],
)

Expand All @@ -61,22 +57,20 @@
common_name="A. thaliana",
genome=_genome,
generation_time=1.0,
generation_time_citations=[
population_size=10 ** 4,
citations=[
stdpopsim.Citation(
doi="https://doi.org/10.1890/0012-9658(2002)083[1006:GTINSO]2.0.CO;2",
year=2002,
author="Donohue",
reasons={stdpopsim.CiteReason.GEN_TIME},
)
],
population_size=10 ** 4,
population_size_citations=[
),
stdpopsim.Citation(
doi="https://doi.org/10.1016/j.cell.2016.05.063",
year=2016,
author="1001GenomesConsortium",
reasons={stdpopsim.CiteReason.POP_SIZE},
)
),
],
)

Expand Down
10 changes: 5 additions & 5 deletions stdpopsim/catalog/BosTau/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
doi="https://doi.org/10.1093/molbev/mst125",
year="2013",
author="MacLeod et al.",
reasons={stdpopsim.CiteReason.GEN_TIME, stdpopsim.CiteReason.POP_SIZE},
)

# Recombination rate has been derived from dairy cattle crossovers
Expand Down Expand Up @@ -66,11 +67,11 @@

_genome = stdpopsim.Genome(
chromosomes=_chromosomes,
mutation_rate_citations=[
citations=[
_HarlandEtAl.because(stdpopsim.CiteReason.MUT_RATE),
_MaEtAl.because(stdpopsim.CiteReason.REC_RATE),
_RosenEtAl.because(stdpopsim.CiteReason.ASSEMBLY),
],
recombination_rate_citations=[_MaEtAl.because(stdpopsim.CiteReason.REC_RATE)],
assembly_citations=[_RosenEtAl.because(stdpopsim.CiteReason.ASSEMBLY)],
)

_species = stdpopsim.Species(
Expand All @@ -80,9 +81,8 @@
common_name="Cattle",
genome=_genome,
generation_time=5,
generation_time_citations=[_MacLeodEtAl.because(stdpopsim.CiteReason.GEN_TIME)],
population_size=62000,
population_size_citations=[_MacLeodEtAl.because(stdpopsim.CiteReason.POP_SIZE)],
citations=[_MacLeodEtAl],
)

stdpopsim.register_species(_species)
14 changes: 7 additions & 7 deletions stdpopsim/catalog/CanFam/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@
chromosomes=_chromosomes,
assembly_name=genome_data.data["assembly_name"],
assembly_accession=genome_data.data["assembly_accession"],
mutation_rate_citations=[
citations=[
_SkoglundEtAl.because(stdpopsim.CiteReason.MUT_RATE),
_FranzEtAl.because(stdpopsim.CiteReason.MUT_RATE),
_CampbellEtAl.because(stdpopsim.CiteReason.REC_RATE),
_LindbladTohEtAl.because(stdpopsim.CiteReason.ASSEMBLY),
],
recombination_rate_citations=[_CampbellEtAl.because(stdpopsim.CiteReason.REC_RATE)],
assembly_citations=[_LindbladTohEtAl.because(stdpopsim.CiteReason.ASSEMBLY)],
)

_species = stdpopsim.Species(
Expand All @@ -109,9 +109,10 @@
name="Canis familiaris",
common_name="Dog",
genome=_genome,
population_size=13000, # ancestral dog size
generation_time=3,
generation_time_citations=[
# Everyone uses 3 years because everyone else uses it.
citations=[
# Everyone uses 3 years for generation time because everyone else uses it.
# It's likely higher, at least in wolves:
# https://academic.oup.com/mbe/article/35/6/1366/4990884
# Reasoning behind a generation time of 3 years:
Expand All @@ -122,9 +123,8 @@
# intervention in breeding. In case (2), you might want to match what other
# studies have done (thus using 3 year generations), or you might want to
# consider what is known about modern wolves.
_LindbladTohEtAl.because(stdpopsim.CiteReason.POP_SIZE)
],
population_size=13000, # ancestral dog size
population_size_citations=[_LindbladTohEtAl.because(stdpopsim.CiteReason.POP_SIZE)],
)

stdpopsim.register_species(_species)
2 changes: 0 additions & 2 deletions stdpopsim/catalog/DroMel/demographic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def _afr_3epoch():
)
]
generation_time = _species.generation_time
citations.extend(_species.generation_time_citations)

# Parameter values from "Simulating Data" section
# these are assumptions, not estimates
Expand Down Expand Up @@ -95,7 +94,6 @@ def _ooa_2():
populations = [_afr_population, _eur_population]
citations = [_LiAndStephan.because(stdpopsim.CiteReason.DEM_MODEL)]
generation_time = _species.generation_time
citations.extend(_species.generation_time_citations)

# African Parameter values from "Demographic History of the African
# Population" section
Expand Down
11 changes: 6 additions & 5 deletions stdpopsim/catalog/DroMel/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@


_LiAndStephan = stdpopsim.Citation(
author="Li et al.", year=2006, doi="https://doi.org/10.1371/journal.pgen.0020166"
author="Li et al.",
year=2006,
doi="https://doi.org/10.1371/journal.pgen.0020166",
reasons={stdpopsim.CiteReason.GEN_TIME, stdpopsim.CiteReason.POP_SIZE},
)

_SchriderEtAl = stdpopsim.Citation(
Expand Down Expand Up @@ -45,8 +48,7 @@
chromosomes=_chromosomes,
assembly_name=genome_data.data["assembly_name"],
assembly_accession=genome_data.data["assembly_accession"],
mutation_rate_citations=[_SchriderEtAl.because(stdpopsim.CiteReason.MUT_RATE)],
assembly_citations=[_DosSantosEtAl],
citations=[_SchriderEtAl.because(stdpopsim.CiteReason.MUT_RATE), _DosSantosEtAl],
)

_species = stdpopsim.Species(
Expand All @@ -56,9 +58,8 @@
common_name="D. melanogaster",
genome=_genome,
generation_time=0.1,
generation_time_citations=[_LiAndStephan.because(stdpopsim.CiteReason.GEN_TIME)],
population_size=1720600,
population_size_citations=[_LiAndStephan.because(stdpopsim.CiteReason.POP_SIZE)],
citations=[_LiAndStephan],
)

stdpopsim.register_species(_species)
10 changes: 6 additions & 4 deletions stdpopsim/catalog/EscCol/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@
chromosomes=_chromosomes,
assembly_name=genome_data.data["assembly_name"],
assembly_accession=genome_data.data["assembly_accession"],
mutation_rate_citations=[
citations=[
_wielgoss_et_al.because(stdpopsim.CiteReason.MUT_RATE),
_blattner_et_al.because(stdpopsim.CiteReason.ASSEMBLY),
],
assembly_citations=[_blattner_et_al.because(stdpopsim.CiteReason.ASSEMBLY)],
)


Expand All @@ -61,11 +61,13 @@
# E. coli K-12 strain MG1655 "doubling time during steady-state growth in
# Luria-Bertani broth was 20 min".
generation_time=0.00003805175, # 1.0 / (525600 min/year / 20 min/gen)
generation_time_citations=[_sezonov_et_al.because(stdpopsim.CiteReason.GEN_TIME)],
# Hartl et al. calculated Ne for "natural isolates of E. coli",
# assuming mu=5e-10 (from Drake 1991).
population_size=1.8e8,
population_size_citations=[_hartl_et_al.because(stdpopsim.CiteReason.POP_SIZE)],
citations=[
_sezonov_et_al.because(stdpopsim.CiteReason.GEN_TIME),
_hartl_et_al.because(stdpopsim.CiteReason.POP_SIZE),
],
)


Expand Down
14 changes: 9 additions & 5 deletions stdpopsim/catalog/HomSap/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,13 @@

_genome = stdpopsim.Genome(
chromosomes=_chromosomes,
mutation_rate_citations=[_tian2019.because(stdpopsim.CiteReason.MUT_RATE)],
recombination_rate_citations=[_hapmap2007.because(stdpopsim.CiteReason.REC_RATE)],
assembly_name=genome_data.data["assembly_name"],
assembly_accession=genome_data.data["assembly_accession"],
assembly_citations=[_genome2001],
citations=[
_genome2001,
_tian2019.because(stdpopsim.CiteReason.MUT_RATE),
_hapmap2007.because(stdpopsim.CiteReason.REC_RATE),
],
)

_species = stdpopsim.Species(
Expand All @@ -93,9 +95,11 @@
common_name="Human",
genome=_genome,
generation_time=30,
generation_time_citations=[_tremblay2000.because(stdpopsim.CiteReason.GEN_TIME)],
population_size=10 ** 4,
population_size_citations=[_takahata1993.because(stdpopsim.CiteReason.POP_SIZE)],
citations=[
_tremblay2000.because(stdpopsim.CiteReason.GEN_TIME),
_takahata1993.because(stdpopsim.CiteReason.POP_SIZE),
],
)

stdpopsim.register_species(_species)
15 changes: 10 additions & 5 deletions stdpopsim/catalog/PonAbe/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,17 @@
}

_locke2011 = stdpopsim.Citation(
author="Locke et al.", year=2011, doi="http://doi.org/10.1038/nature09687"
author="Locke et al.",
year=2011,
doi="http://doi.org/10.1038/nature09687",
reasons={stdpopsim.CiteReason.GEN_TIME, stdpopsim.CiteReason.POP_SIZE},
)

_nater2017 = stdpopsim.Citation(
author="Nater et al.", year=2017, doi="https://doi.org/10.1016/j.cub.2017.09.047"
author="Nater et al.",
year=2017,
doi="https://doi.org/10.1016/j.cub.2017.09.047",
reasons={stdpopsim.CiteReason.MUT_RATE, stdpopsim.CiteReason.REC_RATE},
)

_chromosomes = []
Expand All @@ -59,7 +65,7 @@
chromosomes=_chromosomes,
assembly_name=genome_data.data["assembly_name"],
assembly_accession=genome_data.data["assembly_accession"],
mutation_rate_citations=[_nater2017.because(stdpopsim.CiteReason.MUT_RATE)],
citations=[_nater2017],
)

_species = stdpopsim.Species(
Expand All @@ -70,10 +76,9 @@
genome=_genome,
# generation time used by Locke et al. without further citation
generation_time=20,
generation_time_citations=[_locke2011.because(stdpopsim.CiteReason.GEN_TIME)],
# Locke et al. inferred ancestral Ne
population_size=1.79e4,
population_size_citations=[_locke2011.because(stdpopsim.CiteReason.POP_SIZE)],
citations=[_locke2011],
)

stdpopsim.register_species(_species)
11 changes: 6 additions & 5 deletions stdpopsim/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,8 @@ def get_citations(engine, model, contig, species):
"""
citations = [stdpopsim.citations._stdpopsim_citation]
citations.extend(engine.citations)
citations.extend(species.genome.assembly_citations)
citations.extend(species.genome.mutation_rate_citations)
citations.extend(species.genome.recombination_rate_citations)
citations.extend(species.citations)
citations.extend(species.genome.citations)
if contig.genetic_map is not None:
citations.extend(contig.genetic_map.citations)
citations.extend(model.citations)
Expand Down Expand Up @@ -498,8 +497,10 @@ def run_simulation(args):
if args.demographic_model is None:
model = stdpopsim.PiecewiseConstantSize(species.population_size)
model.generation_time = species.generation_time
model.citations.extend(species.population_size_citations)
model.citations.extend(species.generation_time_citations)
for citation in species.citations:
reasons = {stdpopsim.CiteReason.POP_SIZE, stdpopsim.CiteReason.GEN_TIME}
if len(citation.reasons & reasons) > 0:
model.citations.append(citation)
qc_complete = True
else:
model = get_model_wrapper(species, args.demographic_model)
Expand Down
38 changes: 16 additions & 22 deletions stdpopsim/genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,23 @@ class Genome:
:ivar chromosomes: A list of :class:`.Chromosome` objects.
:vartype chromosomes: list
:ivar mutation_rate_citations: A list of :class:`.Citation` objects
providing justification for the mutation rate estimate.
:vartype mutation_rate_citations: list
:ivar recombination_rate_citations: A list of :class:`.Citation` objects
providing justification for the recombination rate estimate.
:vartype recombination_rate_citations: list
:ivar assembly_citations: A list of :class:`.Citation` objects
providing reference to the source of the genome assembly.
:vartype assembly_citations: list
:ivar citations: A list of :class:`.Citation` objects
providing the source for the genome assembly,
mutation rate and recombination rate estimates.
:vartype citations: list
:ivar length: The total length of the genome.
:vartype length: int
"""

# TODO document the assembly_name and accession

chromosomes = attr.ib(factory=list)
assembly_name = attr.ib(default=None, kw_only=True)
assembly_accession = attr.ib(default=None, kw_only=True)
length = attr.ib(default=0, init=False)

# TODO these should all be combined into a single "citations" attr,
# since we already have a "reason" attribute in Citation.
mutation_rate_citations = attr.ib(factory=list, kw_only=True)
recombination_rate_citations = attr.ib(factory=list, kw_only=True)
assembly_citations = attr.ib(factory=list, kw_only=True)
citations = attr.ib(factory=list, kw_only=True)

@staticmethod
def from_data(genome_data, *, recombination_rate, mutation_rate):
def from_data(genome_data, *, recombination_rate, mutation_rate, citations):
"""
Construct a Genome object from the specified dictionary of
genome information from Ensembl, recombination_rate and
Expand All @@ -62,11 +53,12 @@ def from_data(genome_data, *, recombination_rate, mutation_rate):
chromosomes=chromosomes,
assembly_name=genome_data["assembly_name"],
assembly_accession=genome_data["assembly_accession"],
citations=citations,
)

def __attrs_post_init__(self):
for chromosome in self.chromosomes:
self.length += chromosome.length
@property
def length(self):
return sum(chrom.length for chrom in self.chromosomes)

def __str__(self):
s = "Chromosomes:\n"
Expand Down Expand Up @@ -96,9 +88,10 @@ def mean_recombination_rate(self):
"""
The length-weighted mean recombination rate across all chromosomes.
"""
length = self.length
mean_recombination_rate = 0
for chrom in self.chromosomes:
normalized_weight = chrom.length / self.length
normalized_weight = chrom.length / length
cont = chrom.recombination_rate * normalized_weight
mean_recombination_rate += cont
return mean_recombination_rate
Expand All @@ -108,9 +101,10 @@ def mean_mutation_rate(self):
"""
The length-weighted mean mutation rate across all chromosomes.
"""
length = self.length
mean_mutation_rate = 0
for chrom in self.chromosomes:
normalized_weight = chrom.length / self.length
normalized_weight = chrom.length / length
cont = chrom.mutation_rate * normalized_weight
mean_mutation_rate += cont
return mean_mutation_rate
Expand Down
Loading

0 comments on commit 5a0048a

Please sign in to comment.