Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor omPriorVerify.py into tests #14

Merged
merged 23 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/actions/setup/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,4 @@ runs:
echo "${VIRTUAL_ENV}/bin" >> $GITHUB_PATH

shell: bash

14 changes: 9 additions & 5 deletions scripts/omCreateDomainInfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,30 @@

from openmethane_prior.omInputs import domainPath, geomFilePath, croFilePath, dotFilePath
import xarray as xr
import os
from pathlib import Path

root_path = Path(__file__).parent.parent

domainXr = xr.Dataset()

with xr.open_dataset( geomFilePath) as geomXr:
with xr.open_dataset( os.path.join(root_path, geomFilePath)) as geomXr:
for attr in ['DX', 'DY', 'TRUELAT1','TRUELAT2', 'MOAD_CEN_LAT', 'STAND_LON']:
domainXr.attrs[attr] = geomXr.attrs[attr]

with xr.open_dataset( croFilePath) as croXr:
with xr.open_dataset( os.path.join(root_path, croFilePath)) as croXr:
for var in ['LAT','LON']:
domainXr[var] = croXr[var]
domainXr[var] = croXr[var].squeeze(dim="LAY", drop=True) # copy but remove the 'LAY' dimension

domainXr['LANDMASK'] = croXr['LWMASK'].squeeze(dim="LAY", drop=True) # copy but remove the 'LAY' dimension

with xr.open_dataset( dotFilePath) as dotXr:
with xr.open_dataset( os.path.join(root_path, dotFilePath)) as dotXr:
# some repetition between the geom and grid files here, XCELL=DX and YCELL=DY
for attr in ['XCELL', 'YCELL']:
domainXr.attrs[attr] = croXr.attrs[attr]
for var in ['LATD','LOND']:
domainXr[var] = dotXr[var].rename({'COL':'COL_D', 'ROW':'ROW_D'})

print(domainPath)
domainXr.to_netcdf(domainPath)
print(os.path.join(root_path, domainPath))
domainXr.to_netcdf(os.path.join(root_path, domainPath))
3 changes: 1 addition & 2 deletions scripts/omPrior.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,4 @@
omWetlandEmis.processEmissions(args.startDate, args.endDate)

omOutputs.sumLayers()

omPriorVerify.verifyEmis()
crdanielbusch marked this conversation as resolved.
Show resolved Hide resolved
omPriorVerify.verifyEmis()
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
from pathlib import Path


@pytest.fixture
@pytest.fixture(scope="session")
def root_dir():
return Path(__file__).parent.parent
return Path(__file__).parent.parent
36 changes: 0 additions & 36 deletions tests/test_cmaq_files.py

This file was deleted.

55 changes: 0 additions & 55 deletions tests/test_download_data.py

This file was deleted.

198 changes: 184 additions & 14 deletions tests/test_om_prior.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,202 @@
import datetime
import os
import subprocess
import sys
from pathlib import Path

import numpy as np
import pandas as pd
import pytest
import requests
import xarray as xr
import os
from openmethane_prior.layers.omGFASEmis import downloadGFAS
from openmethane_prior.omInputs import livestockDataPath, sectoralEmissionsPath
from openmethane_prior.omUtils import getenv, secsPerYear

root_path = Path(__file__).parent.parent
sys.path.insert(1, os.path.join(root_path, "scripts"))
from omDownloadInputs import download_input_files, downloads, remote, sectoralEmissionsPath


def test_full_process(num_regression, root_dir, monkeypatch):
@pytest.fixture(scope="session")
def cro_xr(root_dir) :
cmaqExamplePath = getenv("CMAQ_EXAMPLE")
croFilePath = os.path.join(root_dir, cmaqExamplePath, getenv("CROFILE"))
return xr.open_dataset(croFilePath)

monkeypatch.chdir(root_dir)

subprocess.run(["python", "scripts/omDownloadInputs.py"])
@pytest.fixture(scope="session")
def dot_xr(root_dir) :
cmaqExamplePath = getenv("CMAQ_EXAMPLE")
dotFilePath = os.path.join(root_dir, cmaqExamplePath, getenv("DOTFILE"))
return xr.open_dataset(dotFilePath)

subprocess.run(["python", "scripts/omCreateDomainInfo.py"])

subprocess.run(["python", "scripts/omPrior.py", "2022-07-01", "2022-07-02"])
@pytest.fixture(scope="session")
def geom_xr(root_dir) :
cmaqExamplePath = getenv("CMAQ_EXAMPLE")
geomFilePath = os.path.join(root_dir, cmaqExamplePath, getenv("GEO_EM"))
return xr.open_dataset(geomFilePath)

filepath_ds = os.path.join(root_dir, "outputs/out-om-domain-info.nc")
out_om_domain = xr.load_dataset(filepath_ds)

mean_values = {key: out_om_domain[key].mean().item() for key in out_om_domain.keys()}
num_regression.check(mean_values)
# Fixture to download and later remove all input files
@pytest.fixture(scope="session")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fixture can probably move into conftest.py as it could be used in other places. Also since it is session scoped even if you use it in multiple test modules it will still only run once

def input_files(root_dir) :
download_input_files(root_path=root_dir,
downloads=downloads,
remote=remote)

input_folder = os.path.join(root_dir, "inputs")

downloaded_files = os.listdir("inputs")
downloaded_files = os.listdir(input_folder)

for file in [i for i in downloaded_files if i != 'README.md']:
filepath = os.path.join("inputs", file)
yield downloaded_files

for file in [i for i in downloaded_files if i != 'README.md'] :
filepath = os.path.join(input_folder, file)
os.remove(filepath)

os.remove("outputs/out-om-domain-info.nc")

@pytest.fixture(scope="session")
def input_domain_xr(root_dir, input_files) :
subprocess.run(["python", os.path.join(root_dir, "scripts/omCreateDomainInfo.py")], check=True)

# Generated by scripts/omCreateDomainInfo.py
filepath_in_domain = os.path.join(root_dir, "inputs/om-domain-info.nc")

yield xr.load_dataset(filepath_in_domain)

os.remove(filepath_in_domain)


@pytest.fixture(scope="session")
def output_domain_xr(root_dir, input_domain_xr) :
subprocess.run(["python", os.path.join(root_dir, "scripts/omPrior.py"), "2022-07-01", "2022-07-02"], check=True)

# Generated by scripts/omPrior.py
filepath_out_domain = os.path.join(root_dir, "outputs/out-om-domain-info.nc")

yield xr.load_dataset(filepath_out_domain)

os.remove(filepath_out_domain)


def test_001_response_for_download_links() :
for filename, filepath in downloads :
url = f"{remote}{filename}"
with requests.get(url, stream=True) as response :
print(f"Response code for {url}: {response.status_code}")
assert response.status_code == 200


def test_002_cdsapi_connection(root_dir, tmpdir) :
filepath = tmpdir.mkdir("sub").join("test_download_cdsapi.nc")

startDate = datetime.datetime.strptime("2022-07-01", "%Y-%m-%d")
endDate = datetime.datetime.strptime("2022-07-02", "%Y-%m-%d")

downloadGFAS(startDate=startDate, endDate=endDate, fileName=filepath)

assert os.path.exists(filepath)


def test_003_inputs_folder_is_empty(root_dir) :
input_folder = os.path.join(root_dir, "inputs")

EXPECTED_FILES = ['README.md']

assert os.listdir(input_folder) == EXPECTED_FILES, f"Folder '{input_folder}' is not empty"


def test_004_omDownloadInputs(root_dir, input_files) :
EXPECTED_FILES = [
"ch4-electricity.csv",
"coal-mining_emissions-sources.csv",
"oil-and-gas-production-and-transport_emissions-sources.csv",
"NLUM_ALUMV8_250m_2015_16_alb.tif",
"ch4-sectoral-emissions.csv",
"landuse-sector-map.csv",
"nasa-nighttime-lights.tiff",
"AUS_2021_AUST_SHP_GDA2020.zip",
"EntericFermentation.nc",
"termite_emissions_2010-2016.nc",
"DLEM_totflux_CRU_diagnostic.nc",
'README.md',
]

assert sorted(input_files) == sorted(EXPECTED_FILES)


def test_005_agriculture_emissions(root_dir, input_files) :
filepath_livestock = os.path.join(root_dir, livestockDataPath)
livestock_data = xr.open_dataset(filepath_livestock)

filepath_sector = os.path.join(root_dir, sectoralEmissionsPath)
sector_data = pd.read_csv(filepath_sector).to_dict(orient="records")[0]

lsVal = round(np.sum(livestock_data["CH4_total"].values))
agVal = round(sector_data["agriculture"] * 1e9)
agDX = agVal - lsVal

assert agDX > 0, f"Livestock CH4 exceeds bounds of total agriculture CH4: {agDX / 1e9}"


# TODO Update this test when file structure is clear.
# This test ensures that the grid size for all input files is 10 km.
def test_006_grid_size_for_geo_files(cro_xr, geom_xr, dot_xr) :
expected_cell_size = 10000
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure what exactly to test here. Right now the test ensure that the grid size for geo_em.d01.nc, GRIDCRO2D_1, GRIDDOT2D_1 is 10 km.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My initial thought was lat/lons, but then I realised that the GRIDCRO/GRIDDOT are a subset of the WRF domain.

That could be a test for after omCreateDomainInfo.py is run, but not applicable here


assert cro_xr.XCELL == expected_cell_size
assert cro_xr.YCELL == expected_cell_size

assert geom_xr.DX == expected_cell_size
assert geom_xr.DY == expected_cell_size

assert dot_xr.XCELL == expected_cell_size
assert dot_xr.YCELL == expected_cell_size


def test_007_compare_in_domain_with_cro_dot_files(input_domain_xr, cro_xr, dot_xr) :
assert dot_xr.NCOLS == input_domain_xr.COL_D.size
assert dot_xr.NROWS == input_domain_xr.ROW_D.size

assert cro_xr.NCOLS == input_domain_xr.COL.size
assert cro_xr.NROWS == input_domain_xr.ROW.size


def test_008_compare_out_domain_with_cro_dot_files(output_domain_xr, cro_xr, dot_xr) :
assert dot_xr.NCOLS == output_domain_xr.COL_D.size
assert dot_xr.NROWS == output_domain_xr.ROW_D.size

assert cro_xr.NCOLS == output_domain_xr.COL.size
assert cro_xr.NROWS == output_domain_xr.ROW.size


def test_009_output_domain_xr(output_domain_xr, num_regression) :
mean_values = {key : output_domain_xr[key].mean().item() for key in output_domain_xr.keys()}

num_regression.check(mean_values)


def test_010_emission_discrepancy(root_dir, output_domain_xr, input_files) :
modelAreaM2 = output_domain_xr.DX * output_domain_xr.DY

filepath_sector = os.path.join(root_dir, sectoralEmissionsPath)
sector_data = pd.read_csv(filepath_sector).to_dict(orient="records")[0]

for sector in sector_data.keys() :

layerName = f"OCH4_{sector.upper()}"
sectorVal = float(sector_data[sector]) * 1e9

# Check each layer in the output sums up to the input
if layerName in output_domain_xr :
layerVal = np.sum(output_domain_xr[layerName][0].values * modelAreaM2 * secsPerYear)

if sector == "agriculture" :
layerVal += np.sum(output_domain_xr["OCH4_LIVESTOCK"][0].values * modelAreaM2 * secsPerYear)

diff = round(layerVal - sectorVal)
perectenageDifference = diff / sectorVal * 100

assert abs(perectenageDifference) < 0.1, f"Discrepency of {perectenageDifference}% in {sector} emissions"
2 changes: 2 additions & 0 deletions tests/test_om_prior/test_009_output_domain_xr.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,LAT,LON,LANDMASK,LATD,LOND,OCH4_AGRICULTURE,OCH4_LULUCF,OCH4_WASTE,OCH4_LIVESTOCK,OCH4_INDUSTRIAL,OCH4_STATIONARY,OCH4_TRANSPORT,OCH4_ELECTRICITY,OCH4_FUGITIVE,OCH4_TERMITE,OCH4_FIRE,OCH4_WETLANDS,OCH4_TOTAL
0,-26.983160018920898,133.302001953125,0.39128163456916809,-26.980266571044922,133.302001953125,3.8221794892533283e-13,8.2839841777071689e-13,7.6806688034203811e-13,3.3252710482543991e-12,4.6406982245152562e-15,8.5852921845127254e-14,1.8562792898061025e-14,2.3204437472569127e-14,1.9068246493083643e-12,8.1061502996138124e-13,2.6130391795643748e-13,1.8596938045956645e-11,2.7011896889060713e-11
Loading