diff --git a/Contribute-Docs.md b/Contribute-Docs.md new file mode 100644 index 000000000..58e39bc8a --- /dev/null +++ b/Contribute-Docs.md @@ -0,0 +1,27 @@ +Contributions towards documentations and examples are always welcome! + +Documentation is built using [Sphinx.](https://www.sphinx-doc.org/en/master/). If you've ever seen any docs over at [readthedocs](https://readthedocs.org/) then you've likely seen some examples of sphinx out in the wild. + +## Build the Documentation + +These instructions require that you have docker installed. The best way to do that is to follow the installation instructions at [Get Docker](https://docs.docker.com/get-docker/). The upside to this is that you don't need to clobber any existing conda environments in order to build your docs. + +You don't need to have any particular understanding of docker to run these commands. We are treating the docker image as a shell. + +``` +docker build -t sphinx-sgkit -f docs/Dockerfile . +cd docs +docker run --rm -i -v "$(pwd):/docs" sphinx-sgkit make clean html +``` + +## Serve the Documentation + +Now that we've run built the docs let's view them in their native html state. + +``` +# You can use any port you'd like instead of 8080 +docker run -p 8080:80 -v "$(pwd)/_build/html:/usr/share/nginx/html:ro" nginx +``` + +Now open up localhost:8080 in your browser and you'll see the docs just as they appear on the docs website. + diff --git a/docs/Dockerfile b/docs/Dockerfile new file mode 100644 index 000000000..75879ea91 --- /dev/null +++ b/docs/Dockerfile @@ -0,0 +1,19 @@ +FROM continuumio/miniconda3 + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + graphviz \ + imagemagick \ + make \ + git \ + && apt-get autoremove \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /docs +ADD requirements-dev.txt /docs/ + +RUN conda install -y -c conda-forge scikit-allel sphinx nbsphinx pip pandoc && \ + pip3 install -r requirements-dev.txt && \ + pip3 install git+https://github.com/pystatgen/sgkit@96203d471531e7e2416d4dd9b48ca11d660a1bcc + diff --git a/docs/examples.rst b/docs/examples.rst new file mode 100644 index 000000000..1315aa546 --- /dev/null +++ b/docs/examples.rst @@ -0,0 +1,12 @@ +Examples +======== + +Understanding the Xarray Genotype Call Dataset +********************************************** + +.. toctree:: + :maxdepth: 1 + + examples/understanding-genotype-call-xarray-dataset/Genotype-Call-Dataset-From-VCF + examples/understanding-genotype-call-xarray-dataset/Genotype-Call-Dataset-From-SGKit-Zarr + examples/understanding-genotype-call-xarray-dataset/Genotype-Call-Dataset-Minimal-Numpy-Example diff --git a/docs/examples/notebooks/Genotype-Call-Dataset-From-SGKit-Zarr.ipynb b/docs/examples/notebooks/Genotype-Call-Dataset-From-SGKit-Zarr.ipynb new file mode 100644 index 000000000..6e63cc5a1 --- /dev/null +++ b/docs/examples/notebooks/Genotype-Call-Dataset-From-SGKit-Zarr.ipynb @@ -0,0 +1,1153 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load From Malaria Gen Zarr\n", + "\n", + "A central point to the SGkit API is the Genotype Call Dataset. This is the data structure that most of the other functions use. It uses [Xarray](http://xarray.pydata.org/en/stable/) underneath the hood to give a programmatic interface that allows for the backend to be several different data files.\n", + "\n", + "The Xarray itself is *sort of* a transposed VCF file.\n", + "\n", + "For this example we are going to from the preprocessed zarr to the sgkit Genotype Call XArray Dataset.\n", + "\n", + "This is only meant to demonstrate the datatypes that we feed into the Xarray dataset. For a more conceptual understanding please check out the `Genotype-Call-Dataset-From-VCF.ipynb`." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import zarr\n", + "import pandas as pd\n", + "import dask.array as da\n", + "import allel\n", + "from pprint import pprint\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a Dask Cluster\n", + "\n", + "This isn't that important for this example, but SGkit can use Dask under the hood for many of it's calculations. Divide and conquer your statistical genomics data!" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "60ad30bcd7044d6fb7f8fd803c140a26", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value='
array([0, 0, 0, ..., 0, 0, 0])
array([ 9526, 9531, 9536, ..., 64411, 64416, 64418], dtype=int32)
array([[b'A', b'G'],\n", + " [b'A', b'T'],\n", + " [b'T', b'C'],\n", + " ...,\n", + " [b'A', b'T'],\n", + " [b'G', b'T'],\n", + " [b'T', b'C']], dtype='|S1')
array(['AA0040-C', 'AA0041-C', 'AA0042-C', ..., 'AY0089-C', 'AY0090-C',\n", + " 'AY0091-C'], dtype='<U8')
array([[[0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " ...,\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0]],\n", + "\n", + " [[0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " ...,\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0]],\n", + "\n", + " [[0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " ...,\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0]],\n", + "\n", + " ...,\n", + "\n", + " [[0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " ...,\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0]],\n", + "\n", + " [[0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " ...,\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0]],\n", + "\n", + " [[0, 0],\n", + " [0, 0],\n", + " [0, 0],\n", + " ...,\n", + " [0, 0],\n", + " [0, 0],\n", + " [0, 0]]], dtype=int8)
array([[[False, False],\n", + " [False, False],\n", + " [False, False],\n", + " ...,\n", + " [False, False],\n", + " [False, False],\n", + " [False, False]],\n", + "\n", + " [[False, False],\n", + " [False, False],\n", + " [False, False],\n", + " ...,\n", + " [False, False],\n", + " [False, False],\n", + " [False, False]],\n", + "\n", + " [[False, False],\n", + " [False, False],\n", + " [False, False],\n", + " ...,\n", + " [False, False],\n", + " [False, False],\n", + " [False, False]],\n", + "\n", + " ...,\n", + "\n", + " [[False, False],\n", + " [False, False],\n", + " [False, False],\n", + " ...,\n", + " [False, False],\n", + " [False, False],\n", + " [False, False]],\n", + "\n", + " [[False, False],\n", + " [False, False],\n", + " [False, False],\n", + " ...,\n", + " [False, False],\n", + " [False, False],\n", + " [False, False]],\n", + "\n", + " [[False, False],\n", + " [False, False],\n", + " [False, False],\n", + " ...,\n", + " [False, False],\n", + " [False, False],\n", + " [False, False]]])
\n", + " | variant_contig_index | \n", + "variant_position | \n", + "
---|---|---|
0 | \n", + "0 | \n", + "1 | \n", + "
1 | \n", + "0 | \n", + "2 | \n", + "
2 | \n", + "1 | \n", + "1 | \n", + "
3 | \n", + "1 | \n", + "2 | \n", + "
\n", + " | variant_contig_index | \n", + "variant_position | \n", + "description | \n", + "
---|---|---|---|
0 | \n", + "0 | \n", + "1 | \n", + "Chr: chr1 Pos: 1 | \n", + "
1 | \n", + "0 | \n", + "2 | \n", + "Chr: chr1 Pos: 2 | \n", + "
2 | \n", + "1 | \n", + "1 | \n", + "Chr: chr2 Pos: 1 | \n", + "
3 | \n", + "1 | \n", + "2 | \n", + "Chr: chr2 Pos: 2 | \n", + "
array([0, 0, 0], dtype=int32)
array([39967768, 39967778, 39967793], dtype=int32)
array([[b'T', b'A'],\n", + " [b'G', b'C'],\n", + " [b'C', b'T']], dtype='|S1')
array(['HG00098', 'HG00100', 'HG00106', 'HG00112', 'HG00114', 'HG00116',\n", + " 'HG00117', 'HG00118', 'HG00119', 'HG00120', 'HG00122', 'HG00123',\n", + " 'HG00124', 'HG00126', 'HG00131', 'HG00141', 'HG00142', 'HG00143',\n", + " 'HG00144', 'HG00145', 'HG00146', 'HG00147', 'HG00148', 'HG00149',\n", + " 'HG00150', 'HG00151', 'HG00152', 'HG00153', 'HG00156', 'HG00158',\n", + " 'HG00159', 'HG00160', 'HG00171', 'HG00173', 'HG00174', 'HG00176',\n", + " 'HG00177', 'HG00178', 'HG00179', 'HG00180', 'HG00181', 'HG00182',\n", + " 'HG00183', 'HG00185', 'HG00186', 'HG00187', 'HG00188', 'HG00189',\n", + " 'HG00190', 'HG00231', 'HG00239', 'HG00242', 'HG00243', 'HG00244',\n", + " 'HG00245', 'HG00247', 'HG00258', 'HG00262', 'HG00264', 'HG00265',\n", + " 'HG00266', 'HG00267', 'HG00269', 'HG00270', 'HG00272', 'HG00306',\n", + " 'HG00308', 'HG00311', 'HG00312', 'HG00357', 'HG00361', 'HG00366',\n", + " 'HG00367', 'HG00368', 'HG00369', 'HG00372', 'HG00373', 'HG00377',\n", + " 'HG00380', 'HG00403', 'HG00404', 'HG00406', 'HG00407', 'HG00445',\n", + " 'HG00446', 'HG00452', 'HG00457', 'HG00553', 'HG00554', 'HG00559',\n", + " 'HG00560', 'HG00565', 'HG00566', 'HG00577', 'HG00578', 'HG00592',\n", + " 'HG00593', 'HG00596', 'HG00610', 'HG00611', 'HG00625', 'HG00626',\n", + " 'HG00628', 'HG00629', 'HG00634', 'HG00635', 'HG00637', 'HG00638',\n", + " 'HG00640', 'NA06984', 'NA06985', 'NA06986', 'NA06989', 'NA06994',\n", + " 'NA07000', 'NA07037', 'NA07048', 'NA07051', 'NA07056', 'NA07346',\n", + " 'NA07347', 'NA07357', 'NA10847', 'NA10851', 'NA11829', 'NA11830',\n", + " 'NA11831', 'NA11832', 'NA11840', 'NA11843', 'NA11881', 'NA11892',\n", + " 'NA11893', 'NA11894', 'NA11918', 'NA11919', 'NA11920', 'NA11930',\n", + " 'NA11931', 'NA11932', 'NA11933', 'NA11992', 'NA11993', 'NA11994',\n", + " 'NA11995', 'NA12003', 'NA12004', 'NA12005', 'NA12006', 'NA12043',\n", + " 'NA12044', 'NA12045', 'NA12046', 'NA12058', 'NA12144', 'NA12154',\n", + " 'NA12155', 'NA12156', 'NA12249', 'NA12272', 'NA12273', 'NA12275',\n", + " 'NA12287', 'NA12340', 'NA12341', 'NA12342', 'NA12347', 'NA12348',\n", + " 'NA12383', 'NA12399', 'NA12400', 'NA12413', 'NA12414', 'NA12489',\n", + " 'NA12546', 'NA12716', 'NA12717', 'NA12718', 'NA12749', 'NA12750',\n", + " 'NA12751', 'NA12761', 'NA12762', 'NA12763', 'NA12775', 'NA12776',\n", + " 'NA12777', 'NA12778', 'NA12812', 'NA12813', 'NA12814', 'NA12815',\n", + " 'NA12828', 'NA12830', 'NA12872', 'NA12873', 'NA12874', 'NA12889',\n", + " 'NA12890', 'NA18486', 'NA18487', 'NA18489', 'NA18498', 'NA18499',\n", + " 'NA18501', 'NA18502', 'NA18504', 'NA18505', 'NA18507', 'NA18508',\n", + " 'NA18510', 'NA18511', 'NA18516', 'NA18517', 'NA18519', 'NA18520',\n", + " 'NA18522', 'NA18523', 'NA18525', 'NA18526', 'NA18527', 'NA18532',\n", + " 'NA18535', 'NA18537', 'NA18538', 'NA18539', 'NA18541', 'NA18542',\n", + " 'NA18545', 'NA18547', 'NA18550', 'NA18552', 'NA18553', 'NA18555',\n", + " 'NA18558', 'NA18560', 'NA18561', 'NA18562', 'NA18563', 'NA18564',\n", + " 'NA18565', 'NA18566', 'NA18567', 'NA18570', 'NA18571', 'NA18572',\n", + " 'NA18573', 'NA18574', 'NA18576', 'NA18577', 'NA18579', 'NA18582',\n", + " 'NA18592', 'NA18593', 'NA18603', 'NA18605', 'NA18608', 'NA18609',\n", + " 'NA18611', 'NA18612', 'NA18614', 'NA18615', 'NA18616', 'NA18617',\n", + " 'NA18618', 'NA18619', 'NA18620', 'NA18621', 'NA18622', 'NA18623',\n", + " 'NA18624', 'NA18625', 'NA18626', 'NA18627', 'NA18628', 'NA18630',\n", + " 'NA18631', 'NA18632', 'NA18633', 'NA18634', 'NA18636', 'NA18638',\n", + " 'NA18640', 'NA18642', 'NA18643', 'NA18745', 'NA18853', 'NA18856',\n", + " 'NA18858', 'NA18861', 'NA18867', 'NA18868', 'NA18870', 'NA18871',\n", + " 'NA18873', 'NA18874', 'NA18907', 'NA18908', 'NA18909', 'NA18910',\n", + " 'NA18912', 'NA18916', 'NA18940', 'NA18941', 'NA18942', 'NA18943',\n", + " 'NA18944', 'NA18945', 'NA18947', 'NA18948', 'NA18949', 'NA18950',\n", + " 'NA18951', 'NA18952', 'NA18953', 'NA18955', 'NA18956', 'NA18959',\n", + " 'NA18960', 'NA18961', 'NA18963', 'NA18964', 'NA18965', 'NA18967',\n", + " 'NA18968', 'NA18970', 'NA18971', 'NA18972', 'NA18973', 'NA18974',\n", + " 'NA18975', 'NA18976', 'NA18977', 'NA18979', 'NA18980', 'NA18981',\n", + " 'NA18982', 'NA18983', 'NA18984', 'NA18985', 'NA18986', 'NA18987',\n", + " 'NA18988', 'NA18989', 'NA18990', 'NA18997', 'NA18999', 'NA19000',\n", + " 'NA19001', 'NA19002', 'NA19003', 'NA19004', 'NA19005', 'NA19007',\n", + " 'NA19009', 'NA19010', 'NA19012', 'NA19027', 'NA19044', 'NA19054',\n", + " 'NA19055', 'NA19056', 'NA19057', 'NA19058', 'NA19059', 'NA19060',\n", + " 'NA19062', 'NA19063', 'NA19064', 'NA19065', 'NA19066', 'NA19067',\n", + " 'NA19068', 'NA19070', 'NA19072', 'NA19074', 'NA19075', 'NA19076',\n", + " 'NA19077', 'NA19078', 'NA19079', 'NA19082', 'NA19083', 'NA19084',\n", + " 'NA19085', 'NA19086', 'NA19087', 'NA19088', 'NA19093', 'NA19098',\n", + " 'NA19099', 'NA19102', 'NA19107', 'NA19108', 'NA19113', 'NA19114',\n", + " 'NA19116', 'NA19119', 'NA19129', 'NA19130', 'NA19131', 'NA19137',\n", + " 'NA19138', 'NA19141', 'NA19143', 'NA19144', 'NA19147', 'NA19152',\n", + " 'NA19153', 'NA19159', 'NA19160', 'NA19171', 'NA19172', 'NA19184',\n", + " 'NA19189', 'NA19190', 'NA19200', 'NA19201', 'NA19204', 'NA19206',\n", + " 'NA19207', 'NA19209', 'NA19210', 'NA19213', 'NA19225', 'NA19235',\n", + " 'NA19236', 'NA19247', 'NA19248', 'NA19256', 'NA19257', 'NA19311',\n", + " 'NA19312', 'NA19313', 'NA19314', 'NA19332', 'NA19334', 'NA19338',\n", + " 'NA19346', 'NA19347', 'NA19350', 'NA19355', 'NA19359', 'NA19360',\n", + " 'NA19371', 'NA19372', 'NA19375', 'NA19376', 'NA19377', 'NA19379',\n", + " 'NA19381', 'NA19382', 'NA19383', 'NA19384', 'NA19385', 'NA19390',\n", + " 'NA19391', 'NA19393', 'NA19394', 'NA19395', 'NA19397', 'NA19398',\n", + " 'NA19399', 'NA19401', 'NA19404', 'NA19428', 'NA19429', 'NA19434',\n", + " 'NA19435', 'NA19436', 'NA19437', 'NA19438', 'NA19439', 'NA19440',\n", + " 'NA19443', 'NA19444', 'NA19445', 'NA19446', 'NA19448', 'NA19449',\n", + " 'NA19451', 'NA19452', 'NA19453', 'NA19455', 'NA19456', 'NA19457',\n", + " 'NA19461', 'NA19462', 'NA19463', 'NA19466', 'NA19467', 'NA19469',\n", + " 'NA19471', 'NA19472', 'NA19473', 'NA19474', 'NA19625', 'NA19648',\n", + " 'NA19649', 'NA19651', 'NA19652', 'NA19654', 'NA19655', 'NA19658',\n", + " 'NA19660', 'NA19661', 'NA19678', 'NA19684', 'NA19685', 'NA19700',\n", + " 'NA19701', 'NA19703', 'NA19704', 'NA19707', 'NA19712', 'NA19713',\n", + " 'NA19720', 'NA19722', 'NA19723', 'NA19725', 'NA19726', 'NA19818',\n", + " 'NA19819', 'NA19834', 'NA19835', 'NA19900', 'NA19901', 'NA19904',\n", + " 'NA19908', 'NA19909', 'NA19914', 'NA19916', 'NA19917', 'NA19920',\n", + " 'NA19921', 'NA19982', 'NA20414', 'NA20502', 'NA20505', 'NA20508',\n", + " 'NA20509', 'NA20510', 'NA20512', 'NA20515', 'NA20516', 'NA20517',\n", + " 'NA20518', 'NA20519', 'NA20520', 'NA20521', 'NA20522', 'NA20524',\n", + " 'NA20525', 'NA20526', 'NA20527', 'NA20528', 'NA20529', 'NA20530',\n", + " 'NA20531', 'NA20532', 'NA20533', 'NA20534', 'NA20535', 'NA20536',\n", + " 'NA20537', 'NA20538', 'NA20539', 'NA20540', 'NA20541', 'NA20542',\n", + " 'NA20543', 'NA20544', 'NA20581', 'NA20582', 'NA20585', 'NA20586',\n", + " 'NA20588', 'NA20589', 'NA20752', 'NA20753', 'NA20754', 'NA20755',\n", + " 'NA20756', 'NA20757', 'NA20758', 'NA20759', 'NA20760', 'NA20761',\n", + " 'NA20765', 'NA20769', 'NA20770', 'NA20771', 'NA20772', 'NA20773',\n", + " 'NA20774', 'NA20775', 'NA20778', 'NA20783', 'NA20785', 'NA20786',\n", + " 'NA20787', 'NA20790', 'NA20792', 'NA20795', 'NA20796', 'NA20797',\n", + " 'NA20798', 'NA20799', 'NA20800', 'NA20801', 'NA20802', 'NA20803',\n", + " 'NA20804', 'NA20805', 'NA20806', 'NA20807', 'NA20808', 'NA20809',\n", + " 'NA20810', 'NA20811', 'NA20812', 'NA20813', 'NA20814', 'NA20815',\n", + " 'NA20816', 'NA20818', 'NA20819', 'NA20826', 'NA20828'], dtype='<U7')
array([[[ 0, 0],\n", + " [ 0, 0],\n", + " [ 1, 1],\n", + " ...,\n", + " [ 0, 1],\n", + " [ 1, 1],\n", + " [ 1, 0]],\n", + "\n", + " [[-1, -1],\n", + " [-1, -1],\n", + " [-1, -1],\n", + " ...,\n", + " [-1, -1],\n", + " [-1, -1],\n", + " [-1, -1]],\n", + "\n", + " [[ 0, 0],\n", + " [ 0, 0],\n", + " [ 0, 0],\n", + " ...,\n", + " [ 0, 0],\n", + " [ 0, 0],\n", + " [ 0, 0]]], dtype=int32)
array([[[False, False],\n", + " [False, False],\n", + " [False, False],\n", + " ...,\n", + " [False, False],\n", + " [False, False],\n", + " [False, False]],\n", + "\n", + " [[ True, True],\n", + " [ True, True],\n", + " [ True, True],\n", + " ...,\n", + " [ True, True],\n", + " [ True, True],\n", + " [ True, True]],\n", + "\n", + " [[False, False],\n", + " [False, False],\n", + " [False, False],\n", + " ...,\n", + " [False, False],\n", + " [False, False],\n", + " [False, False]]])
array([0], dtype=int32)
array([1], dtype=int32)
array([[b'A', b'T']], dtype='|S1')
array(['sample-1'], dtype='<U8')
array([[[0, 0]]], dtype=int32)
array([[[False, False]]])
array([0, 0, 0, ..., 0, 0, 0])
array([ 9526, 9531, 9536, ..., 64411, 64416, 64418], dtype=int32)
array([[b'A', b'G'], + [b'A', b'T'], + [b'T', b'C'], + ..., + [b'A', b'T'], + [b'G', b'T'], + [b'T', b'C']], dtype='|S1')
array(['AA0040-C', 'AA0041-C', 'AA0042-C', ..., 'AY0089-C', 'AY0090-C', + 'AY0091-C'], dtype='<U8')
array([[[0, 0], + [0, 0], + [0, 0], + ..., + [0, 0], + [0, 0], + [0, 0]], + + [[0, 0], + [0, 0], + [0, 0], + ..., + [0, 0], + [0, 0], + [0, 0]], + + [[0, 0], + [0, 0], + [0, 0], + ..., + [0, 0], + [0, 0], + [0, 0]], + + ..., + + [[0, 0], + [0, 0], + [0, 0], + ..., + [0, 0], + [0, 0], + [0, 0]], + + [[0, 0], + [0, 0], + [0, 0], + ..., + [0, 0], + [0, 0], + [0, 0]], + + [[0, 0], + [0, 0], + [0, 0], + ..., + [0, 0], + [0, 0], + [0, 0]]], dtype=int8)
array([[[False, False], + [False, False], + [False, False], + ..., + [False, False], + [False, False], + [False, False]], + + [[False, False], + [False, False], + [False, False], + ..., + [False, False], + [False, False], + [False, False]], + + [[False, False], + [False, False], + [False, False], + ..., + [False, False], + [False, False], + [False, False]], + + ..., + + [[False, False], + [False, False], + [False, False], + ..., + [False, False], + [False, False], + [False, False]], + + [[False, False], + [False, False], + [False, False], + ..., + [False, False], + [False, False], + [False, False]], + + [[False, False], + [False, False], + [False, False], + ..., + [False, False], + [False, False], + [False, False]]])
+ | variant_contig_index | +variant_position | +
---|---|---|
0 | +0 | +1 | +
1 | +0 | +2 | +
2 | +1 | +1 | +
3 | +1 | +2 | +
+ | variant_contig_index | +variant_position | +description | +
---|---|---|---|
0 | +0 | +1 | +Chr: chr1 Pos: 1 | +
1 | +0 | +2 | +Chr: chr1 Pos: 2 | +
2 | +1 | +1 | +Chr: chr2 Pos: 1 | +
3 | +1 | +2 | +Chr: chr2 Pos: 2 | +
array([0, 0, 0], dtype=int32)
array([39967768, 39967778, 39967793], dtype=int32)
array([[b'T', b'A'], + [b'G', b'C'], + [b'C', b'T']], dtype='|S1')
array(['HG00098', 'HG00100', 'HG00106', 'HG00112', 'HG00114', 'HG00116', + 'HG00117', 'HG00118', 'HG00119', 'HG00120', 'HG00122', 'HG00123', + 'HG00124', 'HG00126', 'HG00131', 'HG00141', 'HG00142', 'HG00143', + 'HG00144', 'HG00145', 'HG00146', 'HG00147', 'HG00148', 'HG00149', + 'HG00150', 'HG00151', 'HG00152', 'HG00153', 'HG00156', 'HG00158', + 'HG00159', 'HG00160', 'HG00171', 'HG00173', 'HG00174', 'HG00176', + 'HG00177', 'HG00178', 'HG00179', 'HG00180', 'HG00181', 'HG00182', + 'HG00183', 'HG00185', 'HG00186', 'HG00187', 'HG00188', 'HG00189', + 'HG00190', 'HG00231', 'HG00239', 'HG00242', 'HG00243', 'HG00244', + 'HG00245', 'HG00247', 'HG00258', 'HG00262', 'HG00264', 'HG00265', + 'HG00266', 'HG00267', 'HG00269', 'HG00270', 'HG00272', 'HG00306', + 'HG00308', 'HG00311', 'HG00312', 'HG00357', 'HG00361', 'HG00366', + 'HG00367', 'HG00368', 'HG00369', 'HG00372', 'HG00373', 'HG00377', + 'HG00380', 'HG00403', 'HG00404', 'HG00406', 'HG00407', 'HG00445', + 'HG00446', 'HG00452', 'HG00457', 'HG00553', 'HG00554', 'HG00559', + 'HG00560', 'HG00565', 'HG00566', 'HG00577', 'HG00578', 'HG00592', + 'HG00593', 'HG00596', 'HG00610', 'HG00611', 'HG00625', 'HG00626', + 'HG00628', 'HG00629', 'HG00634', 'HG00635', 'HG00637', 'HG00638', + 'HG00640', 'NA06984', 'NA06985', 'NA06986', 'NA06989', 'NA06994', + 'NA07000', 'NA07037', 'NA07048', 'NA07051', 'NA07056', 'NA07346', + 'NA07347', 'NA07357', 'NA10847', 'NA10851', 'NA11829', 'NA11830', + 'NA11831', 'NA11832', 'NA11840', 'NA11843', 'NA11881', 'NA11892', + 'NA11893', 'NA11894', 'NA11918', 'NA11919', 'NA11920', 'NA11930', + 'NA11931', 'NA11932', 'NA11933', 'NA11992', 'NA11993', 'NA11994', + 'NA11995', 'NA12003', 'NA12004', 'NA12005', 'NA12006', 'NA12043', + 'NA12044', 'NA12045', 'NA12046', 'NA12058', 'NA12144', 'NA12154', + 'NA12155', 'NA12156', 'NA12249', 'NA12272', 'NA12273', 'NA12275', + 'NA12287', 'NA12340', 'NA12341', 'NA12342', 'NA12347', 'NA12348', + 'NA12383', 'NA12399', 'NA12400', 'NA12413', 'NA12414', 'NA12489', + 'NA12546', 'NA12716', 'NA12717', 'NA12718', 'NA12749', 'NA12750', + 'NA12751', 'NA12761', 'NA12762', 'NA12763', 'NA12775', 'NA12776', + 'NA12777', 'NA12778', 'NA12812', 'NA12813', 'NA12814', 'NA12815', + 'NA12828', 'NA12830', 'NA12872', 'NA12873', 'NA12874', 'NA12889', + 'NA12890', 'NA18486', 'NA18487', 'NA18489', 'NA18498', 'NA18499', + 'NA18501', 'NA18502', 'NA18504', 'NA18505', 'NA18507', 'NA18508', + 'NA18510', 'NA18511', 'NA18516', 'NA18517', 'NA18519', 'NA18520', + 'NA18522', 'NA18523', 'NA18525', 'NA18526', 'NA18527', 'NA18532', + 'NA18535', 'NA18537', 'NA18538', 'NA18539', 'NA18541', 'NA18542', + 'NA18545', 'NA18547', 'NA18550', 'NA18552', 'NA18553', 'NA18555', + 'NA18558', 'NA18560', 'NA18561', 'NA18562', 'NA18563', 'NA18564', + 'NA18565', 'NA18566', 'NA18567', 'NA18570', 'NA18571', 'NA18572', + 'NA18573', 'NA18574', 'NA18576', 'NA18577', 'NA18579', 'NA18582', + 'NA18592', 'NA18593', 'NA18603', 'NA18605', 'NA18608', 'NA18609', + 'NA18611', 'NA18612', 'NA18614', 'NA18615', 'NA18616', 'NA18617', + 'NA18618', 'NA18619', 'NA18620', 'NA18621', 'NA18622', 'NA18623', + 'NA18624', 'NA18625', 'NA18626', 'NA18627', 'NA18628', 'NA18630', + 'NA18631', 'NA18632', 'NA18633', 'NA18634', 'NA18636', 'NA18638', + 'NA18640', 'NA18642', 'NA18643', 'NA18745', 'NA18853', 'NA18856', + 'NA18858', 'NA18861', 'NA18867', 'NA18868', 'NA18870', 'NA18871', + 'NA18873', 'NA18874', 'NA18907', 'NA18908', 'NA18909', 'NA18910', + 'NA18912', 'NA18916', 'NA18940', 'NA18941', 'NA18942', 'NA18943', + 'NA18944', 'NA18945', 'NA18947', 'NA18948', 'NA18949', 'NA18950', + 'NA18951', 'NA18952', 'NA18953', 'NA18955', 'NA18956', 'NA18959', + 'NA18960', 'NA18961', 'NA18963', 'NA18964', 'NA18965', 'NA18967', + 'NA18968', 'NA18970', 'NA18971', 'NA18972', 'NA18973', 'NA18974', + 'NA18975', 'NA18976', 'NA18977', 'NA18979', 'NA18980', 'NA18981', + 'NA18982', 'NA18983', 'NA18984', 'NA18985', 'NA18986', 'NA18987', + 'NA18988', 'NA18989', 'NA18990', 'NA18997', 'NA18999', 'NA19000', + 'NA19001', 'NA19002', 'NA19003', 'NA19004', 'NA19005', 'NA19007', + 'NA19009', 'NA19010', 'NA19012', 'NA19027', 'NA19044', 'NA19054', + 'NA19055', 'NA19056', 'NA19057', 'NA19058', 'NA19059', 'NA19060', + 'NA19062', 'NA19063', 'NA19064', 'NA19065', 'NA19066', 'NA19067', + 'NA19068', 'NA19070', 'NA19072', 'NA19074', 'NA19075', 'NA19076', + 'NA19077', 'NA19078', 'NA19079', 'NA19082', 'NA19083', 'NA19084', + 'NA19085', 'NA19086', 'NA19087', 'NA19088', 'NA19093', 'NA19098', + 'NA19099', 'NA19102', 'NA19107', 'NA19108', 'NA19113', 'NA19114', + 'NA19116', 'NA19119', 'NA19129', 'NA19130', 'NA19131', 'NA19137', + 'NA19138', 'NA19141', 'NA19143', 'NA19144', 'NA19147', 'NA19152', + 'NA19153', 'NA19159', 'NA19160', 'NA19171', 'NA19172', 'NA19184', + 'NA19189', 'NA19190', 'NA19200', 'NA19201', 'NA19204', 'NA19206', + 'NA19207', 'NA19209', 'NA19210', 'NA19213', 'NA19225', 'NA19235', + 'NA19236', 'NA19247', 'NA19248', 'NA19256', 'NA19257', 'NA19311', + 'NA19312', 'NA19313', 'NA19314', 'NA19332', 'NA19334', 'NA19338', + 'NA19346', 'NA19347', 'NA19350', 'NA19355', 'NA19359', 'NA19360', + 'NA19371', 'NA19372', 'NA19375', 'NA19376', 'NA19377', 'NA19379', + 'NA19381', 'NA19382', 'NA19383', 'NA19384', 'NA19385', 'NA19390', + 'NA19391', 'NA19393', 'NA19394', 'NA19395', 'NA19397', 'NA19398', + 'NA19399', 'NA19401', 'NA19404', 'NA19428', 'NA19429', 'NA19434', + 'NA19435', 'NA19436', 'NA19437', 'NA19438', 'NA19439', 'NA19440', + 'NA19443', 'NA19444', 'NA19445', 'NA19446', 'NA19448', 'NA19449', + 'NA19451', 'NA19452', 'NA19453', 'NA19455', 'NA19456', 'NA19457', + 'NA19461', 'NA19462', 'NA19463', 'NA19466', 'NA19467', 'NA19469', + 'NA19471', 'NA19472', 'NA19473', 'NA19474', 'NA19625', 'NA19648', + 'NA19649', 'NA19651', 'NA19652', 'NA19654', 'NA19655', 'NA19658', + 'NA19660', 'NA19661', 'NA19678', 'NA19684', 'NA19685', 'NA19700', + 'NA19701', 'NA19703', 'NA19704', 'NA19707', 'NA19712', 'NA19713', + 'NA19720', 'NA19722', 'NA19723', 'NA19725', 'NA19726', 'NA19818', + 'NA19819', 'NA19834', 'NA19835', 'NA19900', 'NA19901', 'NA19904', + 'NA19908', 'NA19909', 'NA19914', 'NA19916', 'NA19917', 'NA19920', + 'NA19921', 'NA19982', 'NA20414', 'NA20502', 'NA20505', 'NA20508', + 'NA20509', 'NA20510', 'NA20512', 'NA20515', 'NA20516', 'NA20517', + 'NA20518', 'NA20519', 'NA20520', 'NA20521', 'NA20522', 'NA20524', + 'NA20525', 'NA20526', 'NA20527', 'NA20528', 'NA20529', 'NA20530', + 'NA20531', 'NA20532', 'NA20533', 'NA20534', 'NA20535', 'NA20536', + 'NA20537', 'NA20538', 'NA20539', 'NA20540', 'NA20541', 'NA20542', + 'NA20543', 'NA20544', 'NA20581', 'NA20582', 'NA20585', 'NA20586', + 'NA20588', 'NA20589', 'NA20752', 'NA20753', 'NA20754', 'NA20755', + 'NA20756', 'NA20757', 'NA20758', 'NA20759', 'NA20760', 'NA20761', + 'NA20765', 'NA20769', 'NA20770', 'NA20771', 'NA20772', 'NA20773', + 'NA20774', 'NA20775', 'NA20778', 'NA20783', 'NA20785', 'NA20786', + 'NA20787', 'NA20790', 'NA20792', 'NA20795', 'NA20796', 'NA20797', + 'NA20798', 'NA20799', 'NA20800', 'NA20801', 'NA20802', 'NA20803', + 'NA20804', 'NA20805', 'NA20806', 'NA20807', 'NA20808', 'NA20809', + 'NA20810', 'NA20811', 'NA20812', 'NA20813', 'NA20814', 'NA20815', + 'NA20816', 'NA20818', 'NA20819', 'NA20826', 'NA20828'], dtype='<U7')
array([[[ 0, 0], + [ 0, 0], + [ 1, 1], + ..., + [ 0, 1], + [ 1, 1], + [ 1, 0]], + + [[-1, -1], + [-1, -1], + [-1, -1], + ..., + [-1, -1], + [-1, -1], + [-1, -1]], + + [[ 0, 0], + [ 0, 0], + [ 0, 0], + ..., + [ 0, 0], + [ 0, 0], + [ 0, 0]]], dtype=int32)
array([[[False, False], + [False, False], + [False, False], + ..., + [False, False], + [False, False], + [False, False]], + + [[ True, True], + [ True, True], + [ True, True], + ..., + [ True, True], + [ True, True], + [ True, True]], + + [[False, False], + [False, False], + [False, False], + ..., + [False, False], + [False, False], + [False, False]]])
array([0], dtype=int32)
array([1], dtype=int32)
array([[b'A', b'T']], dtype='|S1')
array(['sample-1'], dtype='<U8')
array([[[0, 0]]], dtype=int32)
array([[[False, False]]])