From fd10847b13e6127f93e6a69e5931d1eb74c7031e Mon Sep 17 00:00:00 2001 From: bnb32 Date: Thu, 4 Jan 2024 09:57:48 -0700 Subject: [PATCH 1/2] path updates for kestrel runs --- .../pipeline_legacy/config_east_nsrdb.json | 4 ++-- .../pipeline_legacy/config_east_pipeline.json | 10 ++++---- .../config_west_conus_nsrdb.json | 6 ++--- .../config_west_conus_pipeline.json | 10 ++++---- .../templates/config_nsrdb_pre2018.json | 24 +++++++++---------- nsrdb/utilities/extract_surfrad.py | 13 +++++----- nsrdb/utilities/file_utils.py | 24 +++++++++---------- nsrdb/utilities/movers.py | 10 ++++---- nsrdb/utilities/update_nsrdb_versions.py | 5 ++-- 9 files changed, 52 insertions(+), 54 deletions(-) diff --git a/nsrdb/config/pipeline_legacy/config_east_nsrdb.json b/nsrdb/config/pipeline_legacy/config_east_nsrdb.json index c1597325..796f6808 100755 --- a/nsrdb/config/pipeline_legacy/config_east_nsrdb.json +++ b/nsrdb/config/pipeline_legacy/config_east_nsrdb.json @@ -44,12 +44,12 @@ "name": "nsrdb_east", "nsrdb_freq": "10min", "nsrdb_grid": "/projects/pxs/reference_grids/nsrdb_meta_2km_east_-105.csv", - "out_dir": "/lustre/eaglefs/projects/pxs/processing/2019/east/", + "out_dir": "/projects/pxs/processing/2019/east/", "year": 2019 }, "eagle": { "alloc": "pxs", - "feature": "--qos=high", + "feature": "--qos=normal", "memory": 178, "walltime": 48 } diff --git a/nsrdb/config/pipeline_legacy/config_east_pipeline.json b/nsrdb/config/pipeline_legacy/config_east_pipeline.json index 6113113c..016e69a7 100755 --- a/nsrdb/config/pipeline_legacy/config_east_pipeline.json +++ b/nsrdb/config/pipeline_legacy/config_east_pipeline.json @@ -6,19 +6,19 @@ "name": "east", "pipeline": [ { - "data-model": "/lustre/eaglefs/projects/pxs/processing/2019/east/config_nsrdb_east.json" + "data-model": "/projects/pxs/processing/2019/east/config_nsrdb_east.json" }, { - "collect-data-model": "/lustre/eaglefs/projects/pxs/processing/2019/east/config_nsrdb_east.json" + "collect-data-model": "/projects/pxs/processing/2019/east/config_nsrdb_east.json" }, { - "cloud-fill": "/lustre/eaglefs/projects/pxs/processing/2019/east/config_nsrdb_east.json" + "cloud-fill": "/projects/pxs/processing/2019/east/config_nsrdb_east.json" }, { - "all-sky": "/lustre/eaglefs/projects/pxs/processing/2019/east/config_nsrdb_east.json" + "all-sky": "/projects/pxs/processing/2019/east/config_nsrdb_east.json" }, { - "collect-final": "/lustre/eaglefs/projects/pxs/processing/2019/east/config_nsrdb_east.json" + "collect-final": "/projects/pxs/processing/2019/east/config_nsrdb_east.json" } ] } diff --git a/nsrdb/config/pipeline_legacy/config_west_conus_nsrdb.json b/nsrdb/config/pipeline_legacy/config_west_conus_nsrdb.json index 71543a61..c1c413e5 100755 --- a/nsrdb/config/pipeline_legacy/config_west_conus_nsrdb.json +++ b/nsrdb/config/pipeline_legacy/config_west_conus_nsrdb.json @@ -41,13 +41,13 @@ "log_level": "DEBUG", "name": "nsrdb_west_conus", "nsrdb_freq": "5min", - "nsrdb_grid": "/lustre/eaglefs/projects/pxs/reference_grids/nsrdb_meta_2km_conus_west.csv", - "out_dir": "/lustre/eaglefs/projects/pxs/processing/2019/west_conus/", + "nsrdb_grid": "/projects/pxs/reference_grids/nsrdb_meta_2km_conus_west.csv", + "out_dir": "/projects/pxs/processing/2019/west_conus/", "year": 2019 }, "eagle": { "alloc": "pxs", - "feature": "--qos=high", + "feature": "--qos=normal", "memory": 83, "walltime": 4 } diff --git a/nsrdb/config/pipeline_legacy/config_west_conus_pipeline.json b/nsrdb/config/pipeline_legacy/config_west_conus_pipeline.json index 92c62668..1b632b3d 100755 --- a/nsrdb/config/pipeline_legacy/config_west_conus_pipeline.json +++ b/nsrdb/config/pipeline_legacy/config_west_conus_pipeline.json @@ -6,19 +6,19 @@ "name": "west_conus", "pipeline": [ { - "data-model": "/lustre/eaglefs/projects/pxs/processing/2019/west_conus/config_nsrdb_west_conus.json" + "data-model": "/projects/pxs/processing/2019/west_conus/config_nsrdb_west_conus.json" }, { - "collect-data-model": "/lustre/eaglefs/projects/pxs/processing/2019/west_conus/config_nsrdb_west_conus.json" + "collect-data-model": "/projects/pxs/processing/2019/west_conus/config_nsrdb_west_conus.json" }, { - "cloud-fill": "/lustre/eaglefs/projects/pxs/processing/2019/west_conus/config_nsrdb_west_conus.json" + "cloud-fill": "/projects/pxs/processing/2019/west_conus/config_nsrdb_west_conus.json" }, { - "all-sky": "/lustre/eaglefs/projects/pxs/processing/2019/west_conus/config_nsrdb_west_conus.json" + "all-sky": "/projects/pxs/processing/2019/west_conus/config_nsrdb_west_conus.json" }, { - "collect-final": "/lustre/eaglefs/projects/pxs/processing/2019/west_conus/config_nsrdb_west_conus.json" + "collect-final": "/projects/pxs/processing/2019/west_conus/config_nsrdb_west_conus.json" } ] } diff --git a/nsrdb/config/templates/config_nsrdb_pre2018.json b/nsrdb/config/templates/config_nsrdb_pre2018.json index 919ba90e..0f686e79 100755 --- a/nsrdb/config/templates/config_nsrdb_pre2018.json +++ b/nsrdb/config/templates/config_nsrdb_pre2018.json @@ -13,43 +13,43 @@ ], "factory_kwargs": { "cld_opd_dcomp": { - "pattern": "/lustre/eaglefs/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" + "pattern": "/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" }, "cld_press_acha": { - "pattern": "/lustre/eaglefs/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" + "pattern": "/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" }, "cld_reff_dcomp": { - "pattern": "/lustre/eaglefs/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" + "pattern": "/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" }, "cloud_fraction": { - "pattern": "/lustre/eaglefs/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" + "pattern": "/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" }, "cloud_probability": { - "pattern": "/lustre/eaglefs/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" + "pattern": "/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" }, "cloud_type": { - "pattern": "/lustre/eaglefs/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" + "pattern": "/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" }, "refl_0_65um_nom": { - "pattern": "/lustre/eaglefs/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" + "pattern": "/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" }, "refl_0_65um_nom_stddev_3x3": { - "pattern": "/lustre/eaglefs/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" + "pattern": "/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" }, "refl_3_75um_nom": { - "pattern": "/lustre/eaglefs/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" + "pattern": "/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" }, "surface_albedo": { "source_dir": "/projects/pxs/ancillary/albedo/nsrdb_%year%/" }, "temp_11_0um_nom": { - "pattern": "/lustre/eaglefs/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" + "pattern": "/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" }, "temp_11_0um_nom_stddev_3x3": { - "pattern": "/lustre/eaglefs/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" + "pattern": "/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" }, "temp_3_75um_nom": { - "pattern": "/lustre/eaglefs/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" + "pattern": "/projects/pxs/HDF/%satellite%/%year%/{doy}/level2/*_%year%_*.h5" } }, "max_workers": null, diff --git a/nsrdb/utilities/extract_surfrad.py b/nsrdb/utilities/extract_surfrad.py index 8e2bd886..3cf0c82f 100755 --- a/nsrdb/utilities/extract_surfrad.py +++ b/nsrdb/utilities/extract_surfrad.py @@ -4,14 +4,13 @@ @author: gbuster """ +import os + import h5py import numpy as np -import os import pandas as pd - from farms import SZA_LIM - DAT_COLS = ('year', 'jday', 'month', @@ -104,7 +103,7 @@ def get_dat_table(d, flist): table = [] # get readlines iterator - with open(os.path.join(d, fname), 'r') as f: + with open(os.path.join(d, fname)) as f: lines = f.readlines() # iterate through lines @@ -161,7 +160,7 @@ def get_lw1_table(d, flist): for i, fname in enumerate(flist): # get readlines iterator - with open(os.path.join(d, fname), 'r') as f: + with open(os.path.join(d, fname)) as f: lines = f.readlines() # iterate through lines @@ -302,7 +301,7 @@ def extract_all(root_dir, dir_out, years=range(1998, 2018), file_flag='.dat', if __name__ == '__main__': - root_dir = '/lustre/eaglefs/projects/pxs/surfrad/raw' - dir_out = '/lustre/eaglefs/projects/pxs/surfrad/h5' + root_dir = '/projects/pxs/surfrad/raw' + dir_out = '/projects/pxs/surfrad/h5' site_codes = ('bon', 'dra', 'fpk', 'gwn', 'psu', 'sxf', 'tbl') extract_all(root_dir, dir_out, site_codes=site_codes) diff --git a/nsrdb/utilities/file_utils.py b/nsrdb/utilities/file_utils.py index f095a95d..2df87b76 100755 --- a/nsrdb/utilities/file_utils.py +++ b/nsrdb/utilities/file_utils.py @@ -3,26 +3,24 @@ @author: gbuster """ -from concurrent.futures import as_completed import gzip import logging import os +import re import shlex import shutil -import re -from subprocess import Popen, PIPE, run import time -from urllib.request import urlopen +from concurrent.futures import as_completed +from subprocess import PIPE, Popen, run from urllib.error import URLError +from urllib.request import urlopen + +import numpy as np import pandas as pd from packaging import version - - from rex.utilities.execution import SpawnProcessPool from rex.utilities.loggers import init_logger -import numpy as np - logger = logging.getLogger(__name__) DIR = os.path.dirname(os.path.realpath(__file__)) @@ -210,7 +208,7 @@ def convert_h4(path4, f_h4, path5, f_h5): h5 = os.path.join(path5, f_h5) if not os.path.exists(h4): - raise IOError('Could not locate file for conversion to h5: {}' + raise OSError('Could not locate file for conversion to h5: {}' .format(h4)) if os.path.exists(h5): logger.info('Target h5 file already exists, may have already been ' @@ -281,7 +279,7 @@ def convert_list_serial(conversion_list): """Convert h4 to h5 files in serial based on the conversion list. Parameters - ------- + ---------- conversion_list : list List of paths and files to convert for input to convert4to5. Format is: conversion_list = [[path4, f_h4, path5, f_h5], ...] @@ -297,7 +295,7 @@ def convert_list_parallel(conversion_list, n_workers=2): """Convert h4 to h5 files in parallel based on the conversion list. Parameters - ------- + ---------- conversion_list : list List of paths and files to convert for input to convert4to5. Format is: conversion_list = [[path4, f_h4, path5, f_h5], ...] @@ -364,8 +362,8 @@ def convert_directory(path4, path5, n_workers=1): if __name__ == '__main__': - path4 = '/lustre/eaglefs/projects/pxs/uwisc/2018_west/' - path5 = '/lustre/eaglefs/projects/pxs/uwisc/2018_west_h5/' + path4 = '/projects/pxs/uwisc/2018_west/' + path5 = '/projects/pxs/uwisc/2018_west_h5/' init_logger(__name__, log_level='INFO', log_file=os.path.join(path5, 'convert.log')) convert_directory(path4, path5, n_workers=36) diff --git a/nsrdb/utilities/movers.py b/nsrdb/utilities/movers.py index be35eb9b..5a6bec8d 100755 --- a/nsrdb/utilities/movers.py +++ b/nsrdb/utilities/movers.py @@ -3,15 +3,15 @@ @author: gbuster """ -import h5py import logging -import numpy as np import os -import pandas as pd import time from warnings import warn -from rex.utilities.hpc import SLURM, PBS +import h5py +import numpy as np +import pandas as pd +from rex.utilities.hpc import PBS, SLURM from rex.utilities.loggers import init_logger from nsrdb.utilities.file_utils import repack_h5 @@ -421,7 +421,7 @@ def peregrine(fun_str, arg_str, alloc='pxs', queue='batch-h', def eagle(fun_str, arg_str, alloc='pxs', memory=96, walltime=10, node_name='mover', - stdout_path='/lustre/eaglefs/scratch/gbuster/data_movers/'): + stdout_path='//scratch/gbuster/data_movers/'): """Kick off an eagle job to execute a mover function. Parameters diff --git a/nsrdb/utilities/update_nsrdb_versions.py b/nsrdb/utilities/update_nsrdb_versions.py index fafb617a..5c5e07c6 100755 --- a/nsrdb/utilities/update_nsrdb_versions.py +++ b/nsrdb/utilities/update_nsrdb_versions.py @@ -3,10 +3,11 @@ @author: gbuster """ -import h5py import os -root_dir = '/lustre/eaglefs/projects/pxs/nsrdb/v3.0.1/' +import h5py + +root_dir = '/projects/pxs/nsrdb/v3.0.1/' versions = {"nsrdb_1998.h5": "3.0.6", "nsrdb_1999.h5": "3.0.6", From b3d684ab702980b5184cdcd3e89f0609ed72507e Mon Sep 17 00:00:00 2001 From: bnb32 Date: Thu, 4 Jan 2024 10:06:10 -0700 Subject: [PATCH 2/2] linting --- nsrdb/utilities/extract_surfrad.py | 196 ++++++++++++++++------------- 1 file changed, 108 insertions(+), 88 deletions(-) diff --git a/nsrdb/utilities/extract_surfrad.py b/nsrdb/utilities/extract_surfrad.py index 3cf0c82f..611c2d42 100755 --- a/nsrdb/utilities/extract_surfrad.py +++ b/nsrdb/utilities/extract_surfrad.py @@ -11,38 +11,38 @@ import pandas as pd from farms import SZA_LIM -DAT_COLS = ('year', - 'jday', - 'month', - 'day', - 'hour', - 'min', - 'dt', - 'zen', - 'dw_solar', - 'qc_dwsolar', - 'uw_solar', - 'qc_uw_solar', - 'direct_n', - 'qc_direct_n', - 'diffuse', - 'qc_diffuse', - ) - -DAT_MAPPING = {'dw_solar': 'ghi', - 'direct_n': 'dni', - 'diffuse': 'dhi', - 'zen': 'sza'} - -LW1_MAPPING = {'swdn': 'ghi', - 'dirsw': 'dni', - 'difsw': 'dhi', - 'sza': 'sza'} +DAT_COLS = ( + "year", + "jday", + "month", + "day", + "hour", + "min", + "dt", + "zen", + "dw_solar", + "qc_dwsolar", + "uw_solar", + "qc_uw_solar", + "direct_n", + "qc_direct_n", + "diffuse", + "qc_diffuse", +) + +DAT_MAPPING = { + "dw_solar": "ghi", + "direct_n": "dni", + "diffuse": "dhi", + "zen": "sza", +} + +LW1_MAPPING = {"swdn": "ghi", "dirsw": "dni", "difsw": "dhi", "sza": "sza"} MISSING = -999 -def filter_measurement_df(df, var_list=('dhi', 'dni', 'ghi', 'sza')): +def filter_measurement_df(df, var_list=("dhi", "dni", "ghi", "sza")): """Filter the measurement dataframe. Parameters @@ -68,12 +68,12 @@ def filter_measurement_df(df, var_list=('dhi', 'dni', 'ghi', 'sza')): for var in var_list: # No data can be negative - mask = (df[var] < 0) + mask = df[var] < 0 df.loc[mask, var] = MISSING - if var in ('dhi', 'ghi'): + if var in ("dhi", "ghi"): # dhi and ghi cannot be negative or zero during the day - mask = (df[var] <= 0) & (df['sza'] < SZA_LIM) + mask = (df[var] <= 0) & (df["sza"] < SZA_LIM) df.loc[mask, var] = MISSING return df @@ -108,15 +108,14 @@ def get_dat_table(d, flist): # iterate through lines for line in lines: - # reduce multiple spaces to a single space, split columns - while ' ' in line: - line = line.replace(' ', ' ') - cols = line.strip(' ').split(' ') + while " " in line: + line = line.replace(" ", " ") + cols = line.strip(" ").split(" ") # Set table header or append data to table if len(cols) > len(DAT_COLS): - table.append(cols[0:len(DAT_COLS)]) + table.append(cols[0: len(DAT_COLS)]) # upon finishing table concatenation, initialize annual table # or append to annual table @@ -126,14 +125,16 @@ def get_dat_table(d, flist): annual_table += table df = pd.DataFrame(annual_table, columns=DAT_COLS) - df = df.rename(DAT_MAPPING, axis='columns') - df['time_string'] = (df['year'] - + df['month'].str.zfill(2) - + df['day'].str.zfill(2) - + df['hour'].str.zfill(2) - + df['min'].str.zfill(2)) - - ti = pd.to_datetime(df['time_string'], format='%Y%m%d%H%M') + df = df.rename(DAT_MAPPING, axis="columns") + df["time_string"] = ( + df["year"] + + df["month"].str.zfill(2) + + df["day"].str.zfill(2) + + df["hour"].str.zfill(2) + + df["min"].str.zfill(2) + ) + + ti = pd.to_datetime(df["time_string"], format="%Y%m%d%H%M") df.index = ti df = df.sort_index() return df @@ -158,18 +159,16 @@ def get_lw1_table(d, flist): # iterate through data files for i, fname in enumerate(flist): - # get readlines iterator with open(os.path.join(d, fname)) as f: lines = f.readlines() # iterate through lines for j, line in enumerate(lines): - # reduce multiple spaces to a single space, split columns - while ' ' in line: - line = line.replace(' ', ' ') - cols = line.strip(' ').split(' ') + while " " in line: + line = line.replace(" ", " ") + cols = line.strip(" ").split(" ") # Set table header or append data to table if j == 0: @@ -186,18 +185,21 @@ def get_lw1_table(d, flist): # make sure headers are the same annual_table += table[1:] else: - msg = ('Headers for "{}" does not match annual table ' - 'headers: {}' - .format(os.path.join(d, fname), annual_table[0])) + msg = ( + 'Headers for "{}" does not match annual table ' + "headers: {}".format( + os.path.join(d, fname), annual_table[0] + ) + ) raise ValueError(msg) headers = [h.lower() for h in annual_table[0]] df = pd.DataFrame(annual_table[1:], columns=headers) - df = df[['zdate', 'ztim', 'cosz', 'swdn', 'dirsw', 'difsw']] - df['sza'] = np.arccos(df['cosz']) - df = df.rename(LW1_MAPPING, axis='columns') - df['time_string'] = df['zdate'] + ' ' + df['ztim'].str.zfill(4) - ti = pd.to_datetime(df['time_string'], format='%Y%m%d %H%M') + df = df[["zdate", "ztim", "cosz", "swdn", "dirsw", "difsw"]] + df["sza"] = np.arccos(df["cosz"]) + df = df.rename(LW1_MAPPING, axis="columns") + df["time_string"] = df["zdate"] + " " + df["ztim"].str.zfill(4) + ti = pd.to_datetime(df["time_string"], format="%Y%m%d %H%M") df.index = ti df = df.sort_index() return df @@ -217,31 +219,42 @@ def surfrad_to_h5(df, fout, dir_out): Location to save output file. """ - with h5py.File(os.path.join(dir_out, fout), 'w') as f: - + with h5py.File(os.path.join(dir_out, fout), "w") as f: # write time index - time_index = np.array(df.index.astype(str), dtype='S20') - ds = f.create_dataset('time_index', shape=time_index.shape, - dtype=time_index.dtype, chunks=None) + time_index = np.array(df.index.astype(str), dtype="S20") + ds = f.create_dataset( + "time_index", + shape=time_index.shape, + dtype=time_index.dtype, + chunks=None, + ) ds[...] = time_index # write solar zenith angle - ds = f.create_dataset('solar_zenith_angle', shape=df['sza'].shape, - dtype=np.float16, chunks=None) - ds[...] = df['sza'].values + ds = f.create_dataset( + "solar_zenith_angle", + shape=df["sza"].shape, + dtype=np.float16, + chunks=None, + ) + ds[...] = df["sza"].values # write irraidance variables - for dset in ['dhi', 'dni', 'ghi']: - df[dset] = np.round(df[dset].astype(float))\ - .astype(np.int16) - ds = f.create_dataset(dset, shape=df[dset].shape, - dtype=df[dset].dtype, - chunks=None) + for dset in ["dhi", "dni", "ghi"]: + df[dset] = np.round(df[dset].astype(float)).astype(np.int16) + ds = f.create_dataset( + dset, shape=df[dset].shape, dtype=df[dset].dtype, chunks=None + ) ds[...] = df[dset].values -def extract_all(root_dir, dir_out, years=range(1998, 2018), file_flag='.dat', - site_codes=('bon', 'dra', 'fpk', 'gwn', 'psu', 'sxf', 'tbl')): +def extract_all( + root_dir, + dir_out, + years=range(1998, 2018), + file_flag=".dat", + site_codes=("bon", "dra", "fpk", "gwn", "psu", "sxf", "tbl"), +): """Extract all surfrad measurement data into h5 files. Parameters @@ -265,43 +278,50 @@ def extract_all(root_dir, dir_out, years=range(1998, 2018), file_flag='.dat', for site in site_codes: for year in years: - # look for target data directory d = os.path.join(root_dir, site, str(year)) # set target output filename - fout = '{}_{}.h5'.format(site, year) + fout = "{}_{}.h5".format(site, year) if not os.path.exists(d): - print('Skipping: "{}" for {}. Path does not exist: {}' - .format(site, year, d)) + print( + 'Skipping: "{}" for {}. Path does not exist: {}'.format( + site, year, d + ) + ) bad_dirs.append(d) elif os.path.exists(os.path.join(dir_out, fout)): - print('Skipping file, already exists: {}'.format(fout)) + print("Skipping file, already exists: {}".format(fout)) else: # get number of valid files in dir flist = [f for f in os.listdir(d) if file_flag in f] print('Processing "{}" for {}'.format(site, year)) - if 'dat' in file_flag: + if "dat" in file_flag: df = get_dat_table(d, flist) - elif 'lw1' in file_flag: + elif "lw1" in file_flag: df = get_lw1_table(d, flist) else: - raise('Did not recongize user-specified file flag: ' - '"{}"'.format(file_flag)) + raise ( + "Did not recongize user-specified file flag: " + '"{}"'.format(file_flag) + ) df = filter_measurement_df(df) surfrad_to_h5(df, fout, dir_out) - print('The following directories did not have valid datasets:\n{}' - .format(bad_dirs)) + print( + "The following directories did not have valid datasets:\n{}".format( + bad_dirs + ) + ) return df -if __name__ == '__main__': - root_dir = '/projects/pxs/surfrad/raw' - dir_out = '/projects/pxs/surfrad/h5' - site_codes = ('bon', 'dra', 'fpk', 'gwn', 'psu', 'sxf', 'tbl') +if __name__ == "__main__": + root_dir = "/projects/pxs/surfrad/raw" + dir_out = "/projects/pxs/surfrad/h5" + site_codes = ("bon", "dra", "fpk", "gwn", "psu", "sxf", "tbl") extract_all(root_dir, dir_out, site_codes=site_codes)