Skip to content

Commit

Permalink
Merge pull request #150 from rabernat/gpcp
Browse files Browse the repository at this point in the history
Add recipe for gpcp
  • Loading branch information
cisaacstern authored Jul 13, 2022
2 parents 63054d4 + e9e9a38 commit e613cc5
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 0 deletions.
31 changes: 31 additions & 0 deletions recipes/gpcp/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
title: "Global Precipitation Climatology Project"
description: >
Global Precipitation Climatology Project (GPCP) Daily Version 1.3 gridded, merged ty
satellite/gauge precipitation Climate data Record (CDR) from 1996 to present.
pangeo_forge_version: "0.8.3"
pangeo_notebook_version: "2022.06.02"
recipes:
- id: gpcp
object: "recipe:recipe"
provenance:
providers:
- name: "NOAA NCEI"
description: "National Oceanographic & Atmospheric Administration National Centers for Environmental Information"
roles:
- host
- licensor
url: https://www.ncei.noaa.gov/products/global-precipitation-climatology-project
- name: "University of Maryland"
description: >
University of Maryland College Park Earth System Science Interdisciplinary Center
(ESSIC) and Cooperative Institute for Climate and Satellites (CICS)
roles:
- producer
url: http://gpcp.umd.edu/
license: "No constraints on data access or use."
maintainers:
- name: "Ryan Abernathey"
orcid: "0000-0001-5999-4917"
github: rabernat
bakery:
id: "pangeo-ldeo-nsf-earthcube"
31 changes: 31 additions & 0 deletions recipes/gpcp/recipe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import fsspec
from fsspec.implementations.http import HTTPFileSystem
from pangeo_forge_recipes.patterns import pattern_from_file_sequence, FileType
from pangeo_forge_recipes.recipes import XarrayZarrRecipe

# The GPCP files use an annoying naming convention which embeds the creation date in the file name.
# e.g. https 1996/gpcp_v01r03_daily_d19961001_c20170530.nc
# This makes it very hard to create a deterministic function to generate the file names,
# so instead we crawl the NCEI server.

url_base = "https://www.ncei.noaa.gov/data/global-precipitation-climatology-project-gpcp-daily/access/"
years = range(1996, 2022)
file_list = []
fs = HTTPFileSystem()
for year in years:
file_list += sorted(filter(
lambda x: x.endswith('.nc'),
fs.ls(url_base + str(year), detail=False)
))

pattern = pattern_from_file_sequence(
file_list,
"time",
nitems_per_file=1
)

recipe = XarrayZarrRecipe(
pattern,
inputs_per_chunk=200,
xarray_open_kwargs={"decode_coords": "all"}
)

0 comments on commit e613cc5

Please sign in to comment.