Skip to content

Commit

Permalink
Turn Builder class into a data class (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
andersy005 authored Apr 19, 2021
1 parent d8ace6b commit febd70d
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 66 deletions.
19 changes: 9 additions & 10 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
codecov:
require_ci_to_pass: no
max_report_age: off

comment: false

ignore:
- 'tests/*.py'
- 'setup.py'

coverage:
precision: 2
round: down
status:
project:
default:
threshold: 0.2
if_not_found: success
patch:
default:
enabled: no
if_not_found: success
changes:
default:
enabled: no
if_not_found: success
target: 95
informational: true
patch: off
changes: off
21 changes: 21 additions & 0 deletions docs/source/examples.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Examples

## CESM2

### Seasonal-to-Multiyear Large Ensemble (SMYLE)

```bash
ecgtools build cesm2-smyle /glade/campaign/cesm/development/espwg/SMYLE/archive/ --depth 4 --exclude-patterns */glc/* --jobs 20 --description "Seasonal-to-Multiyear Large Ensemble (SMYLE) using CESM2"
```

### Large Ensemble (LENS)

## CESM 1

### Large Ensemble (LENS)

## CMIP

### CMIP5

### CMIP6
4 changes: 1 addition & 3 deletions ecgtools/builders/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@
import typer
from distributed import Client
from ncar_jobqueue import NCARCluster
from rich.console import Console

console = Console()
from ..core import Builder
from ..core import Builder, console
from .cesm import smyle_parser

app = typer.Typer(help='ESM Catalog Generation CLI')
Expand Down
97 changes: 49 additions & 48 deletions ecgtools/core.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import dataclasses
import datetime
import itertools
import json
Expand All @@ -7,6 +8,9 @@
from typing import List

import dask
from rich.console import Console

console = Console()
import pandas as pd


Expand Down Expand Up @@ -104,66 +108,59 @@ def extract_attr_with_regex(
return None


@dataclasses.dataclass
class Builder:
"""
Generates a catalog from a list of files.
"""
def __init__(
self,
root_path: str,
extension: str = '*.nc',
depth: int = 0,
exclude_patterns: list = None,
parser: callable = None,
lazy: bool = True,
nbatches: int = 25,
) -> 'Builder':
"""
Generate ESM catalog from a list of files.
Parameters
----------
root_path : str
Path of root directory.
extension : str, optional
File extension, by default None. If None, the builder will look for files with
"*.nc" extension.
depth : int, optional
Recursion depth. Recursively crawl `root_path` up to a specified depth, by default None
exclude_patterns : list, optional
Directory, file patterns to exclude during catalog generation, by default None
parser : callable, optional
A function (or callable object) that will be called to parse
attributes from a given file/filepath, by default None
lazy : bool, optional
Whether to parse attributes lazily via dask.delayed, by default True
nbatches : int, optional
Number of tasks to batch in a single `dask.delayed` call, by default 25
Parameters
----------
root_path : str
Path of root directory.
extension : str, optional
File extension, by default None. If None, the builder will look for files with
"*.nc" extension.
depth : int, optional
Recursion depth. Recursively crawl `root_path` up to a specified depth, by default None
exclude_patterns : list, optional
Directory, file patterns to exclude during catalog generation, by default None
parser : callable, optional
A function (or callable object) that will be called to parse
attributes from a given file/filepath, by default None
lazy : bool, optional
Whether to parse attributes lazily via dask.delayed, by default True
nbatches : int, optional
Number of tasks to batch in a single `dask.delayed` call, by default 25
Raises
------
FileNotFoundError
When `root_path` does not exist.
"""
if root_path is not None:
self.root_path = Path(root_path)
if root_path is not None and not self.root_path.is_dir():
raise FileNotFoundError(f'{root_path} directory does not exist')
if parser is not None and not callable(parser):
Raises
------
FileNotFoundError
When `root_path` does not exist.
"""

root_path: str
extension: str = '*.nc'
depth: int = 0
exclude_patterns: list = None
parser: callable = None
lazy: bool = True
nbatches: int = 25

def __post_init__(self):

if self.root_path is not None:
self.root_path = Path(self.root_path)
if self.root_path is not None and not self.root_path.is_dir():
raise FileNotFoundError(f'{self.root_path} directory does not exist')
if self.parser is not None and not callable(self.parser):
raise TypeError('parser must be callable.')
self.dirs = []
self.filelist = []
self.df = None
self.old_df = None
self.new_df = None
self.esmcol_data = None
self.parser = parser
self.lazy = lazy
self.nbatches = nbatches
self.extension = extension
self.depth = depth
self.exclude_patterns = exclude_patterns or []
self.exclude_patterns = self.exclude_patterns or []

def _get_directories(self, root_path: str = None, depth: int = None):
"""
Expand Down Expand Up @@ -191,6 +188,7 @@ def _get_directories(self, root_path: str = None, depth: int = None):
depth = self.depth

pattern = '*/' * (depth + 1)
console.print('Getting list of directories...')
dirs = [x for x in root_path.glob(pattern) if x.is_dir()]
self.dirs = dirs
return self
Expand Down Expand Up @@ -270,11 +268,14 @@ def _get_filelist_from_dirs(
dirs = self.dirs.copy()
else:
dirs = self.dirs.copy()
console.print('Getting list of files...')
if self.lazy:
console.print('Batching `_get_filelist()` dask.delayed calls...')
filelist = [
self._get_filelist_delayed(directory, extension, exclude_patterns)
for directory in dirs
]

filelist = dask.compute(*filelist)
else:
filelist = [
Expand Down
11 changes: 6 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
cf_xarray
dask[bag]
dask[delayed]
ncar-jobqueue
netCDF4
rich
typer
xarray
yamale
dask[delayed]
dask[bag]
cf_xarray
typer
ncar-jobqueue

0 comments on commit febd70d

Please sign in to comment.