Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataset "schema" v0.3 #276

Merged
merged 10 commits into from
Oct 2, 2020
42 changes: 42 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,48 @@ Methods
Tajimas_D
pc_relate

Variables
=========

.. autosummary::
:toctree: generated/

variables.base_prediction_spec
variables.call_allele_count_spec
variables.call_dosage_spec
variables.call_dosage_mask_spec
variables.call_genotype_spec
variables.call_genotype_mask_spec
variables.call_genotype_phased_spec
variables.call_genotype_probability_spec
variables.call_genotype_probability_mask_spec
variables.covariates_spec
variables.dosage_spec
variables.genotype_counts_spec
variables.loco_prediction_spec
variables.meta_prediction_spec
variables.pc_relate_phi_spec
variables.sample_id_spec
variables.sample_pcs_spec
variables.traits_spec
variables.variant_allele_spec
variables.variant_allele_count_spec
variables.variant_allele_frequency_spec
variables.variant_allele_total_spec
variables.variant_beta_spec
variables.variant_call_rate_spec
variables.variant_contig_spec
variables.variant_hwe_p_value_spec
variables.variant_id_spec
variables.variant_n_called_spec
variables.variant_n_het_spec
variables.variant_n_hom_alt_spec
variables.variant_n_hom_ref_spec
variables.variant_n_non_ref_spec
variables.variant_p_value_spec
variables.variant_position_spec
variables.variant_t_value_spec

Utilities
=========

Expand Down
23 changes: 23 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

# -- Path setup --------------------------------------------------------------

import logging as pylogging

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
Expand All @@ -15,6 +17,7 @@
from pathlib import Path

import xarray
from sphinx.util import logging

sys.path.insert(0, os.path.abspath(".."))

Expand Down Expand Up @@ -52,6 +55,26 @@
*[p.stem for p in (HERE / "extensions").glob("*.py")],
]


# Workaround https://github.com/agronholm/sphinx-autodoc-typehints/issues/123
# When this https://github.com/agronholm/sphinx-autodoc-typehints/pull/153
# gets merged, we can remove this
class FilterForIssue123(pylogging.Filter):
def filter(self, record: pylogging.LogRecord) -> bool:
msg = record.getMessage()
return not (
msg.startswith("Cannot treat a function")
and any(
s in msg
for s in ["sgkit.variables.Spec", "sgkit.variables.ArrayLikeSpec"]
)
)


logging.getLogger("sphinx_autodoc_typehints").logger.addFilter(FilterForIssue123())
# End of workaround


# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]

Expand Down
4 changes: 2 additions & 2 deletions docs/extensions/typed_returns.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import re
from typing import Iterator, List

from sphinx.application import Sphinx # type: ignore
from sphinx.ext.napoleon import NumpyDocstring # type: ignore
from sphinx.application import Sphinx
from sphinx.ext.napoleon import NumpyDocstring


def process_return(lines: List[str]) -> Iterator[str]:
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ ignore_missing_imports = True
ignore_missing_imports = True
[mypy-bed_reader.*]
ignore_missing_imports = True
[mypy-sphinx.*]
ignore_missing_imports = True
[mypy-sgkit.*]
allow_redefinition = True
[mypy-sgkit.*.tests.*]
Expand Down
1 change: 1 addition & 0 deletions sgkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,5 @@
"Tajimas_D",
"pc_relate",
"simulate_genotype_call_dataset",
"variables",
]
20 changes: 3 additions & 17 deletions sgkit/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import numpy as np
import xarray as xr

from . import variables
from .typing import ArrayLike
from .utils import check_array_like

DIM_VARIANT = "variants"
DIM_SAMPLE = "samples"
Expand Down Expand Up @@ -57,11 +57,6 @@ def create_genotype_call_dataset(
-------
The dataset of genotype calls.
"""
check_array_like(variant_contig, kind="i", ndim=1)
check_array_like(variant_position, kind="i", ndim=1)
check_array_like(variant_alleles, kind={"S", "O"}, ndim=2)
check_array_like(sample_id, kind={"U", "O"}, ndim=1)
check_array_like(call_genotype, kind="i", ndim=3)
data_vars: Dict[Hashable, Any] = {
"variant_contig": ([DIM_VARIANT], variant_contig),
"variant_position": ([DIM_VARIANT], variant_position),
Expand All @@ -74,16 +69,14 @@ def create_genotype_call_dataset(
),
}
if call_genotype_phased is not None:
check_array_like(call_genotype_phased, kind="b", ndim=2)
data_vars["call_genotype_phased"] = (
[DIM_VARIANT, DIM_SAMPLE],
call_genotype_phased,
)
if variant_id is not None:
check_array_like(variant_id, kind={"U", "O"}, ndim=1)
data_vars["variant_id"] = ([DIM_VARIANT], variant_id)
attrs: Dict[Hashable, Any] = {"contigs": variant_contig_names}
return xr.Dataset(data_vars=data_vars, attrs=attrs)
return variables.validate(xr.Dataset(data_vars=data_vars, attrs=attrs))


def create_genotype_dosage_dataset(
Expand Down Expand Up @@ -132,12 +125,6 @@ def create_genotype_dosage_dataset(
The dataset of genotype calls.

"""
check_array_like(variant_contig, kind="i", ndim=1)
check_array_like(variant_position, kind="i", ndim=1)
check_array_like(variant_alleles, kind={"S", "O"}, ndim=2)
check_array_like(sample_id, kind={"U", "O"}, ndim=1)
check_array_like(call_dosage, kind="f", ndim=2)
check_array_like(call_genotype_probability, kind="f", ndim=3)
data_vars: Dict[Hashable, Any] = {
"variant_contig": ([DIM_VARIANT], variant_contig),
"variant_position": ([DIM_VARIANT], variant_position),
Expand All @@ -155,7 +142,6 @@ def create_genotype_dosage_dataset(
),
}
if variant_id is not None:
check_array_like(variant_id, kind={"U", "O"}, ndim=1)
data_vars["variant_id"] = ([DIM_VARIANT], variant_id)
attrs: Dict[Hashable, Any] = {"contigs": variant_contig_names}
return xr.Dataset(data_vars=data_vars, attrs=attrs)
return variables.validate(xr.Dataset(data_vars=data_vars, attrs=attrs))
Loading