Skip to content

Commit

Permalink
issue #70 - SnakeMake
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Sep 2, 2024
1 parent 756eccb commit f8cac4e
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 85 deletions.
98 changes: 64 additions & 34 deletions generate_transcript_data/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,61 +8,91 @@ cdot_dir = os.path.dirname(workflow.basedir)
cdot_output_raw = subprocess.check_output(f"{cdot_json} --version", shell=True, env={"PYTHONPATH": cdot_dir})
cdot_data_version = cdot_output_raw.decode().strip()

all_urls = {}
annotation_consortium_list = []
genome_build_list = []
for annotation_consortium, builds in config["config"].items():
annotation_consortium_list.append(annotation_consortium)
for gb, urls_dict in builds.items():
genome_build_list.append(gb)
all_urls.update(urls_dict)

#for name, url in urls.items():
# print(f"{name} : {url}")
def get_url_from_name(wildcards):
#print(f"get_url_from_name")
#for key, value in wildcards.items():
# print(f"{key}={value}")
return all_urls[wildcards.name]


def get_urls(wildcards):
print(f"get_urls")
for key, value in wildcards.items():
print(f"{key}={value}")

return urls


def get_cdot_command(wildcards):
url = urls[wildcards.name]
cdot_command = "gff_to_json" if url.endswith(".gff.gz") else "gtf_to_json"
#print(f"get_cdot_command: {wildcards}")
url = all_urls[wildcards.name]
# gffs can end with 'gff.gz' or 'gff3.gz', gtfs always end with 'gtf.gz'
cdot_command = "gtf_to_json" if url.endswith(".gtf.gz") else "gff3_to_json"
return cdot_command

def get_build_input_files(wildcards):
#print(f"get_build_input_files")
#for key, value in wildcards.items():
# print(f"{key}={value}")

urls = config["config"][wildcards.annotation_consortium][wildcards.genome_build]
#print(f"urls: {urls}")
prefix = f"{wildcards.annotation_consortium}/{wildcards.genome_build}/cdot-{cdot_data_version}"
return expand(prefix + "-{name}.json.gz", name=urls)



# Name it based on date as it may vary
today = datetime.now().date().isoformat()
gene_info_download_filename = f"Homo_sapiens.gene_info.{today}.gz"
gene_info_json = f"Homo_sapiens.gene-info-{cdot_data_version}.json.gz"
gene_info_download_filename = os.path.join("downloads/gene_info", f"Homo_sapiens.gene_info.{today}.gz")
gene_info_json_filename = f"Homo_sapiens.gene-info-{cdot_data_version}.json.gz"

genome_build_files = []
for annotation_consortium, builds in config["config"].items():
for genome_build in builds:
filename = os.path.join(annotation_consortium, genome_build, f"cdot-{cdot_data_version}-{annotation_consortium}.{genome_build}.json.gz")
genome_build_files.append(filename)

rule all:
input:
gene_info_json,
# genome_build_files,
expand("{annotation_consortium}/{genome_build}/cdot-{cdot_data_version}-{annotation_consortium}.{genome_build}.json.gz",
annotation_consortium=["RefSeq", "Ensembl"],
genome_build=["GRCh37", "GRCh38", "T2T-CHM13v2.0"],
cdot_data_version=[cdot_data_version])

rule cdot_build_json:
# Merges all GFFs per build
gene_info_json_filename,
expand("{annotation_consortium}/cdot-" + cdot_data_version + "-{annotation_consortium}-{genome_build}.json.gz",
annotation_consortium=annotation_consortium_list, # ["RefSeq", "Ensembl"],
genome_build=genome_build_list)


rule cdot_merge_historical_json:
# Merges multiple files together for 1 build
output:
"{annotation_consortium}/{genome_build}/cdot-{cdot_data_version}-{annotation_consortium}.{genome_build}.json.gz"
"{annotation_consortium}/cdot-" + cdot_data_version + "-{annotation_consortium}-{genome_build}.json.gz"
input:
lambda wildcards: expand("{annotation_consortium}/{genome_build}/cdot-{cdot_data_version}-{name}.json.gz", name=wildcards.urls)
params:
urls = lambda wildcards: config[wildcards.annotation_consortium][wildcards.genome_build]
get_build_input_files,
shell:
"""
PYTHONPATH={cdot_dir} \
{cdot_json} \
combine_builds \
merge_historical \
{input} \
#--grch37 GRCh37/cdot-${CDOT_DATA_VERSION}.refseq.grch37.json.gz \
#--grch38 GRCh38/cdot-${CDOT_DATA_VERSION}.refseq.grch38.json.gz \
--output ${output}
--genome-build={wildcards.genome_build} \
--output {output}
"""


rule cdot_gff_json:
# Individual GFF
input:
gene_info_json=gene_info_json,
gene_info_json=gene_info_json_filename,
gff_file="downloads/{name}.gz"
output:
protected("{annotation_consortium}/{genome_build}/cdot-{cdot_data_version}-{name}.json.gz")
protected("{annotation_consortium}/{genome_build}/cdot-" + cdot_data_version + "-{name}.json.gz")
params:
url=lambda wildcards: urls[wildcards.name],
url=get_url_from_name,
cdot_command=get_cdot_command
shell:
"""
Expand All @@ -71,7 +101,7 @@ rule cdot_gff_json:
{params.cdot_command} \
"{input.gff_file}" \
--url "{params.url}" \
--genome-build="{genome_build}" \
--genome-build="{wildcards.genome_build}" \
--output "{output}" \
--gene-info-json="{input.gene_info_json}"
"""
Expand All @@ -81,16 +111,16 @@ rule download_gff_files:
# Don't re-download if snakemake script changes
protected("downloads/{name}.gz")
params:
url=lambda wildcards: urls[wildcards.name]
url=lambda wildcards: all_urls[wildcards.name]
shell:
"curl -o {output} {params.url}"


rule process_gene_info_json:
input:
"downloads/gene_info/{gene_info_download_filename}"
gene_info_download_filename
output:
protected("{gene_info_json}")
protected(gene_info_json_filename)
shell:
"""
PYTHONPATH={cdot_dir} \
Expand All @@ -102,7 +132,7 @@ rule process_gene_info_json:

rule download_gene_info:
output:
protected("downloads/gene_info/{gene_info_download_filename}")
protected(gene_info_download_filename)
shell:
"curl -o {output} https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/Homo_sapiens.gene_info.gz"

89 changes: 44 additions & 45 deletions generate_transcript_data/cdot_transcripts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,62 +7,61 @@ config:
#83 has no data
#84 is 82 again
#86 is 85 again
Homo_sapiens_GRCh37_Ensembl_82: "http://ftp.ensembl.org/pub/grch37/release-82/gtf/homo_sapiens/Homo_sapiens.GRCh37.82.gtf.gz"
Homo_sapiens_GRCh37_Ensembl_85: "http://ftp.ensembl.org/pub/grch37/release-85/gtf/homo_sapiens/Homo_sapiens.GRCh37.85.gtf.gz"
Homo_sapiens_GRCh37_Ensembl_87: "http://ftp.ensembl.org/pub/grch37/release-87/gtf/homo_sapiens/Homo_sapiens.GRCh37.87.gtf.gz"
Homo_sapiens_GRCh37_Ensembl_82: "https://ftp.ensembl.org/pub/grch37/release-82/gtf/homo_sapiens/Homo_sapiens.GRCh37.82.gtf.gz"
Homo_sapiens_GRCh37_Ensembl_85: "https://ftp.ensembl.org/pub/grch37/release-85/gtf/homo_sapiens/Homo_sapiens.GRCh37.85.gtf.gz"
Homo_sapiens_GRCh37_Ensembl_87: "https://ftp.ensembl.org/pub/grch37/release-87/gtf/homo_sapiens/Homo_sapiens.GRCh37.87.gtf.gz"

GRCh38:
Homo_sapiens_GRCh38_Ensembl_81: "http://ftp.ensembl.org/pub/release-81/gtf/homo_sapiens/Homo_sapiens.GRCh38.81.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_82: "http://ftp.ensembl.org/pub/release-82/gtf/homo_sapiens/Homo_sapiens.GRCh38.82.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_83: "http://ftp.ensembl.org/pub/release-83/gtf/homo_sapiens/Homo_sapiens.GRCh38.83.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_84: "http://ftp.ensembl.org/pub/release-84/gtf/homo_sapiens/Homo_sapiens.GRCh38.84.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_85: "http://ftp.ensembl.org/pub/release-85/gtf/homo_sapiens/Homo_sapiens.GRCh38.85.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_86: "http://ftp.ensembl.org/pub/release-86/gtf/homo_sapiens/Homo_sapiens.GRCh38.86.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_87: "http://ftp.ensembl.org/pub/release-87/gtf/homo_sapiens/Homo_sapiens.GRCh38.87.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_88: "http://ftp.ensembl.org/pub/release-88/gtf/homo_sapiens/Homo_sapiens.GRCh38.88.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_89: "http://ftp.ensembl.org/pub/release-89/gtf/homo_sapiens/Homo_sapiens.GRCh38.89.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_90: "http://ftp.ensembl.org/pub/release-90/gtf/homo_sapiens/Homo_sapiens.GRCh38.90.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_91: "http://ftp.ensembl.org/pub/release-91/gtf/homo_sapiens/Homo_sapiens.GRCh38.91.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_92: "http://ftp.ensembl.org/pub/release-92/gtf/homo_sapiens/Homo_sapiens.GRCh38.92.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_93: "http://ftp.ensembl.org/pub/release-93/gtf/homo_sapiens/Homo_sapiens.GRCh38.93.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_94: "http://ftp.ensembl.org/pub/release-94/gtf/homo_sapiens/Homo_sapiens.GRCh38.94.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_95: "http://ftp.ensembl.org/pub/release-95/gtf/homo_sapiens/Homo_sapiens.GRCh38.95.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_96: "http://ftp.ensembl.org/pub/release-96/gtf/homo_sapiens/Homo_sapiens.GRCh38.96.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_97: "http://ftp.ensembl.org/pub/release-97/gtf/homo_sapiens/Homo_sapiens.GRCh38.97.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_98: "http://ftp.ensembl.org/pub/release-98/gtf/homo_sapiens/Homo_sapiens.GRCh38.98.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_99: "http://ftp.ensembl.org/pub/release-99/gtf/homo_sapiens/Homo_sapiens.GRCh38.99.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_100: "http://ftp.ensembl.org/pub/release-100/gtf/homo_sapiens/Homo_sapiens.GRCh38.100.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_101: "http://ftp.ensembl.org/pub/release-101/gtf/homo_sapiens/Homo_sapiens.GRCh38.101.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_102: "http://ftp.ensembl.org/pub/release-102/gtf/homo_sapiens/Homo_sapiens.GRCh38.102.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_103: "http://ftp.ensembl.org/pub/release-103/gtf/homo_sapiens/Homo_sapiens.GRCh38.103.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_104: "http://ftp.ensembl.org/pub/release-104/gtf/homo_sapiens/Homo_sapiens.GRCh38.104.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_105: "http://ftp.ensembl.org/pub/release-105/gtf/homo_sapiens/Homo_sapiens.GRCh38.105.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_106: "http://ftp.ensembl.org/pub/release-106/gtf/homo_sapiens/Homo_sapiens.GRCh38.106.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_107: "http://ftp.ensembl.org/pub/release-107/gtf/homo_sapiens/Homo_sapiens.GRCh38.107.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_108: "http://ftp.ensembl.org/pub/release-108/gtf/homo_sapiens/Homo_sapiens.GRCh38.108.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_109: "http://ftp.ensembl.org/pub/release-109/gtf/homo_sapiens/Homo_sapiens.GRCh38.109.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_110: "http://ftp.ensembl.org/pub/release-110/gtf/homo_sapiens/Homo_sapiens.GRCh38.110.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_111: "http://ftp.ensembl.org/pub/release-111/gtf/homo_sapiens/Homo_sapiens.GRCh38.111.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_112: "http://ftp.ensembl.org/pub/release-112/gtf/homo_sapiens/Homo_sapiens.GRCh38.112.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_81: "https://ftp.ensembl.org/pub/release-81/gtf/homo_sapiens/Homo_sapiens.GRCh38.81.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_82: "https://ftp.ensembl.org/pub/release-82/gtf/homo_sapiens/Homo_sapiens.GRCh38.82.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_83: "https://ftp.ensembl.org/pub/release-83/gtf/homo_sapiens/Homo_sapiens.GRCh38.83.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_84: "https://ftp.ensembl.org/pub/release-84/gtf/homo_sapiens/Homo_sapiens.GRCh38.84.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_85: "https://ftp.ensembl.org/pub/release-85/gtf/homo_sapiens/Homo_sapiens.GRCh38.85.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_86: "https://ftp.ensembl.org/pub/release-86/gtf/homo_sapiens/Homo_sapiens.GRCh38.86.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_87: "https://ftp.ensembl.org/pub/release-87/gtf/homo_sapiens/Homo_sapiens.GRCh38.87.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_88: "https://ftp.ensembl.org/pub/release-88/gtf/homo_sapiens/Homo_sapiens.GRCh38.88.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_89: "https://ftp.ensembl.org/pub/release-89/gtf/homo_sapiens/Homo_sapiens.GRCh38.89.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_90: "https://ftp.ensembl.org/pub/release-90/gtf/homo_sapiens/Homo_sapiens.GRCh38.90.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_91: "https://ftp.ensembl.org/pub/release-91/gtf/homo_sapiens/Homo_sapiens.GRCh38.91.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_92: "https://ftp.ensembl.org/pub/release-92/gtf/homo_sapiens/Homo_sapiens.GRCh38.92.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_93: "https://ftp.ensembl.org/pub/release-93/gtf/homo_sapiens/Homo_sapiens.GRCh38.93.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_94: "https://ftp.ensembl.org/pub/release-94/gtf/homo_sapiens/Homo_sapiens.GRCh38.94.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_95: "https://ftp.ensembl.org/pub/release-95/gtf/homo_sapiens/Homo_sapiens.GRCh38.95.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_96: "https://ftp.ensembl.org/pub/release-96/gtf/homo_sapiens/Homo_sapiens.GRCh38.96.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_97: "https://ftp.ensembl.org/pub/release-97/gtf/homo_sapiens/Homo_sapiens.GRCh38.97.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_98: "https://ftp.ensembl.org/pub/release-98/gtf/homo_sapiens/Homo_sapiens.GRCh38.98.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_99: "https://ftp.ensembl.org/pub/release-99/gtf/homo_sapiens/Homo_sapiens.GRCh38.99.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_100: "https://ftp.ensembl.org/pub/release-100/gtf/homo_sapiens/Homo_sapiens.GRCh38.100.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_101: "https://ftp.ensembl.org/pub/release-101/gtf/homo_sapiens/Homo_sapiens.GRCh38.101.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_102: "https://ftp.ensembl.org/pub/release-102/gtf/homo_sapiens/Homo_sapiens.GRCh38.102.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_103: "https://ftp.ensembl.org/pub/release-103/gtf/homo_sapiens/Homo_sapiens.GRCh38.103.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_104: "https://ftp.ensembl.org/pub/release-104/gtf/homo_sapiens/Homo_sapiens.GRCh38.104.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_105: "https://ftp.ensembl.org/pub/release-105/gtf/homo_sapiens/Homo_sapiens.GRCh38.105.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_106: "https://ftp.ensembl.org/pub/release-106/gtf/homo_sapiens/Homo_sapiens.GRCh38.106.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_107: "https://ftp.ensembl.org/pub/release-107/gtf/homo_sapiens/Homo_sapiens.GRCh38.107.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_108: "https://ftp.ensembl.org/pub/release-108/gtf/homo_sapiens/Homo_sapiens.GRCh38.108.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_109: "https://ftp.ensembl.org/pub/release-109/gtf/homo_sapiens/Homo_sapiens.GRCh38.109.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_110: "https://ftp.ensembl.org/pub/release-110/gtf/homo_sapiens/Homo_sapiens.GRCh38.110.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_111: "https://ftp.ensembl.org/pub/release-111/gtf/homo_sapiens/Homo_sapiens.GRCh38.111.gtf.gz"
Homo_sapiens_GRCh38_Ensembl_112: "https://ftp.ensembl.org/pub/release-112/gtf/homo_sapiens/Homo_sapiens.GRCh38.112.gtf.gz"

T2T-CHM13v2.0:
Homo_sapiens_T2T-CHM13v2.0_Ensembl_2022_06: "https://ftp.ensembl.org/pub/rapid-release/species/Homo_sapiens/GCA_009914755.4/ensembl/geneset/2022_06/Homo_sapiens-GCA_009914755.4-2022_06-genes.gff3.gz"
Homo_sapiens_T2T-CHM13v2.0_Ensembl_2022_07: "https://ftp.ensembl.org/pub/rapid-release/species/Homo_sapiens/GCA_009914755.4/ensembl/geneset/2022_07/Homo_sapiens-GCA_009914755.4-2022_07-genes.gff3.gz"

Homo_sapiens_T2T-CHM13v2.0_Ensembl_2022_06: "https://ftp.ensembl.org/pub/rapid-release/species/Homo_sapiens/GCA_009914755.4/ensembl/geneset/2022_06/Homo_sapiens-GCA_009914755.4-2022_06-genes.gtf.gz"
Homo_sapiens_T2T-CHM13v2.0_Ensembl_2022_07: "https://ftp.ensembl.org/pub/rapid-release/species/Homo_sapiens/GCA_009914755.4/ensembl/geneset/2022_07/Homo_sapiens-GCA_009914755.4-2022_07-genes.gtf.gz"
RefSeq:
GRCh37:
Homo_sapiens_GRCh37_RefSeq_p5: "http://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/BUILD.37.3/GFF/ref_GRCh37.p5_top_level.gff3.gz"
Homo_sapiens_GRCh37_RefSeq_103: "http://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.103/GFF/ref_GRCh37.p9_top_level.gff3.gz"
Homo_sapiens_GRCh37_RefSeq_104: "http://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.104/GFF/ref_GRCh37.p10_top_level.gff3.gz"
Homo_sapiens_GRCh37_RefSeq_p5: "https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/BUILD.37.3/GFF/ref_GRCh37.p5_top_level.gff3.gz"
Homo_sapiens_GRCh37_RefSeq_103: "https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.103/GFF/ref_GRCh37.p9_top_level.gff3.gz"
Homo_sapiens_GRCh37_RefSeq_104: "https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.104/GFF/ref_GRCh37.p10_top_level.gff3.gz"
Homo_sapiens_GRCh37_RefSeq_105.20190906: "https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/105.20190906/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_genomic.gff.gz"
Homo_sapiens_GRCh37_RefSeq_105.20201022: "https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/105.20201022/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_genomic.gff.gz"
Homo_sapiens_GRCh37_RefSeq_105.20220307: "https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/105.20220307/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_genomic.gff.gz"

GRCh38:
Homo_sapiens_GRCh38_RefSeq_106: "http://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.106/GFF/ref_GRCh38_top_level.gff3.gz"
Homo_sapiens_GRCh38_RefSeq_107: "http://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.107/GFF/ref_GRCh38.p2_top_level.gff3.gz"
Homo_sapiens_GRCh38_RefSeq_108: "http://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.108/GFF/ref_GRCh38.p7_top_level.gff3.gz"
Homo_sapiens_GRCh38_RefSeq_109: "http://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.109/GFF/ref_GRCh38.p12_top_level.gff3.gz"
Homo_sapiens_GRCh38_RefSeq_106: "https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.106/GFF/ref_GRCh38_top_level.gff3.gz"
Homo_sapiens_GRCh38_RefSeq_107: "https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.106/GFF/ref_GRCh38_top_level.gff3.gz"
Homo_sapiens_GRCh38_RefSeq_108: "https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.108/GFF/ref_GRCh38.p7_top_level.gff3.gz"
Homo_sapiens_GRCh38_RefSeq_109: "https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.109/GFF/ref_GRCh38.p12_top_level.gff3.gz"
# The date on this 109 version is 2020-2024 (after the other 109s below), not sure what's going on
Homo_sapiens_GRCh38_RefSeq_109.GCF: "https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/109/GCF_000001405.38_GRCh38.p12/GCF_000001405.38_GRCh38.p12_genomic.gff.gz"
Homo_sapiens_GRCh38_RefSeq_109.20190607: "https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/109.20190607/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.gff.gz"
Expand Down
Loading

0 comments on commit f8cac4e

Please sign in to comment.