diff --git a/.github/actions/setup/action.yaml b/.github/actions/setup/action.yaml index 49de7b9..0362f6a 100644 --- a/.github/actions/setup/action.yaml +++ b/.github/actions/setup/action.yaml @@ -49,3 +49,4 @@ runs: echo "${VIRTUAL_ENV}/bin" >> $GITHUB_PATH shell: bash + diff --git a/scripts/omCreateDomainInfo.py b/scripts/omCreateDomainInfo.py index 32564da..cf9db22 100644 --- a/scripts/omCreateDomainInfo.py +++ b/scripts/omCreateDomainInfo.py @@ -22,26 +22,30 @@ from openmethane_prior.omInputs import domainPath, geomFilePath, croFilePath, dotFilePath import xarray as xr +import os +from pathlib import Path + +root_path = Path(__file__).parent.parent domainXr = xr.Dataset() -with xr.open_dataset( geomFilePath) as geomXr: +with xr.open_dataset( os.path.join(root_path, geomFilePath)) as geomXr: for attr in ['DX', 'DY', 'TRUELAT1','TRUELAT2', 'MOAD_CEN_LAT', 'STAND_LON']: domainXr.attrs[attr] = geomXr.attrs[attr] -with xr.open_dataset( croFilePath) as croXr: +with xr.open_dataset( os.path.join(root_path, croFilePath)) as croXr: for var in ['LAT','LON']: domainXr[var] = croXr[var] domainXr[var] = croXr[var].squeeze(dim="LAY", drop=True) # copy but remove the 'LAY' dimension domainXr['LANDMASK'] = croXr['LWMASK'].squeeze(dim="LAY", drop=True) # copy but remove the 'LAY' dimension -with xr.open_dataset( dotFilePath) as dotXr: +with xr.open_dataset( os.path.join(root_path, dotFilePath)) as dotXr: # some repetition between the geom and grid files here, XCELL=DX and YCELL=DY for attr in ['XCELL', 'YCELL']: domainXr.attrs[attr] = croXr.attrs[attr] for var in ['LATD','LOND']: domainXr[var] = dotXr[var].rename({'COL':'COL_D', 'ROW':'ROW_D'}) -print(domainPath) -domainXr.to_netcdf(domainPath) +print(os.path.join(root_path, domainPath)) +domainXr.to_netcdf(os.path.join(root_path, domainPath)) diff --git a/scripts/omPrior.py b/scripts/omPrior.py index dea75f0..a385797 100644 --- a/scripts/omPrior.py +++ b/scripts/omPrior.py @@ -57,5 +57,4 @@ omWetlandEmis.processEmissions(args.startDate, args.endDate) omOutputs.sumLayers() - -omPriorVerify.verifyEmis() +omPriorVerify.verifyEmis() \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 03bd5d4..f44ee46 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,6 +2,6 @@ from pathlib import Path -@pytest.fixture +@pytest.fixture(scope="session") def root_dir(): - return Path(__file__).parent.parent \ No newline at end of file + return Path(__file__).parent.parent diff --git a/tests/test_cmaq_files.py b/tests/test_cmaq_files.py deleted file mode 100644 index 31b5a83..0000000 --- a/tests/test_cmaq_files.py +++ /dev/null @@ -1,36 +0,0 @@ -import pytest -import os -import dotenv -import xarray as xr -import subprocess - -# TODO Update this test when file structure is clear. -# This test ensures that the grid size for all input files is 10 km. -# When we re-arrange the files and scripts there may be other -# thing we want to test as well. -def test_grid_size_for_cmaq_files(root_dir, monkeypatch): - - expected_cell_size = 10000 - - monkeypatch.chdir(root_dir) - - dotenv.load_dotenv() - getenv = os.environ.get - - cmaqExamplePath = getenv("CMAQ_EXAMPLE") - - croFilePath = os.path.join(cmaqExamplePath, getenv("CROFILE")) - dotFilePath = os.path.join(cmaqExamplePath, getenv("DOTFILE")) - geomFilePath = os.path.join(cmaqExamplePath, getenv("GEO_EM")) - - with xr.open_dataset(geomFilePath) as geomXr : - assert geomXr.DX == expected_cell_size - assert geomXr.DY == expected_cell_size - - with xr.open_dataset(dotFilePath) as dotXr : - assert dotXr.XCELL == expected_cell_size - assert dotXr.YCELL == expected_cell_size - - with xr.open_dataset(croFilePath) as croXr : - assert croXr.XCELL == expected_cell_size - assert croXr.YCELL == expected_cell_size diff --git a/tests/test_download_data.py b/tests/test_download_data.py deleted file mode 100644 index 768176f..0000000 --- a/tests/test_download_data.py +++ /dev/null @@ -1,55 +0,0 @@ -# work around until folder structure is updated -import os -import sys -# insert root directory into python module search path -sys.path.insert(1, os.getcwd()) - -from scripts.omDownloadInputs import root_path, downloads, remote, download_input_files - -import requests -from pathlib import Path - -ROOT_DIRECTORY = Path(__file__).parent.parent - -def test_001_response_for_download_links() : - for filename, filepath in downloads : - url = f"{remote}{filename}" - with requests.get(url, stream=True) as response : - print(f"Response code for {url}: {response.status_code}") - assert response.status_code == 200 - - -def test_002_omDownloadInputs(): - - input_folder = os.path.join(ROOT_DIRECTORY, "inputs") - - EXPECTED_FILES_START = ['README.md'] - EXPECTED_FILES_END = [ - "ch4-electricity.csv", - "coal-mining_emissions-sources.csv", - "oil-and-gas-production-and-transport_emissions-sources.csv", - "NLUM_ALUMV8_250m_2015_16_alb.tif", - "ch4-sectoral-emissions.csv", - "landuse-sector-map.csv", - "nasa-nighttime-lights.tiff", - "AUS_2021_AUST_SHP_GDA2020.zip", - "EntericFermentation.nc", - "termite_emissions_2010-2016.nc", - "DLEM_totflux_CRU_diagnostic.nc", - 'README.md', - ] - - assert os.listdir(input_folder) == EXPECTED_FILES_START, f"Folder '{input_folder}' is not empty" - - download_input_files(root_path=root_path, - downloads=downloads, - remote=remote) - - downloaded_files = os.listdir(input_folder) - - for file in [i for i in downloaded_files if i != 'README.md']: - filepath = os.path.join(input_folder, file) - os.remove(filepath) - - assert sorted(downloaded_files) == sorted(EXPECTED_FILES_END) - diff --git a/tests/test_om_prior.py b/tests/test_om_prior.py index 7e57196..db235e0 100644 --- a/tests/test_om_prior.py +++ b/tests/test_om_prior.py @@ -1,32 +1,202 @@ +import datetime +import os import subprocess +import sys +from pathlib import Path +import numpy as np +import pandas as pd import pytest +import requests import xarray as xr -import os +from openmethane_prior.layers.omGFASEmis import downloadGFAS +from openmethane_prior.omInputs import livestockDataPath, sectoralEmissionsPath +from openmethane_prior.omUtils import getenv, secsPerYear +root_path = Path(__file__).parent.parent +sys.path.insert(1, os.path.join(root_path, "scripts")) +from omDownloadInputs import download_input_files, downloads, remote, sectoralEmissionsPath -def test_full_process(num_regression, root_dir, monkeypatch): +@pytest.fixture(scope="session") +def cro_xr(root_dir) : + cmaqExamplePath = getenv("CMAQ_EXAMPLE") + croFilePath = os.path.join(root_dir, cmaqExamplePath, getenv("CROFILE")) + return xr.open_dataset(croFilePath) - monkeypatch.chdir(root_dir) - subprocess.run(["python", "scripts/omDownloadInputs.py"]) +@pytest.fixture(scope="session") +def dot_xr(root_dir) : + cmaqExamplePath = getenv("CMAQ_EXAMPLE") + dotFilePath = os.path.join(root_dir, cmaqExamplePath, getenv("DOTFILE")) + return xr.open_dataset(dotFilePath) - subprocess.run(["python", "scripts/omCreateDomainInfo.py"]) - subprocess.run(["python", "scripts/omPrior.py", "2022-07-01", "2022-07-02"]) +@pytest.fixture(scope="session") +def geom_xr(root_dir) : + cmaqExamplePath = getenv("CMAQ_EXAMPLE") + geomFilePath = os.path.join(root_dir, cmaqExamplePath, getenv("GEO_EM")) + return xr.open_dataset(geomFilePath) - filepath_ds = os.path.join(root_dir, "outputs/out-om-domain-info.nc") - out_om_domain = xr.load_dataset(filepath_ds) - mean_values = {key: out_om_domain[key].mean().item() for key in out_om_domain.keys()} - num_regression.check(mean_values) +# Fixture to download and later remove all input files +@pytest.fixture(scope="session") +def input_files(root_dir) : + download_input_files(root_path=root_dir, + downloads=downloads, + remote=remote) + + input_folder = os.path.join(root_dir, "inputs") - downloaded_files = os.listdir("inputs") + downloaded_files = os.listdir(input_folder) - for file in [i for i in downloaded_files if i != 'README.md']: - filepath = os.path.join("inputs", file) + yield downloaded_files + + for file in [i for i in downloaded_files if i != 'README.md'] : + filepath = os.path.join(input_folder, file) os.remove(filepath) - os.remove("outputs/out-om-domain-info.nc") +@pytest.fixture(scope="session") +def input_domain_xr(root_dir, input_files) : + subprocess.run(["python", os.path.join(root_dir, "scripts/omCreateDomainInfo.py")], check=True) + + # Generated by scripts/omCreateDomainInfo.py + filepath_in_domain = os.path.join(root_dir, "inputs/om-domain-info.nc") + + yield xr.load_dataset(filepath_in_domain) + + os.remove(filepath_in_domain) + + +@pytest.fixture(scope="session") +def output_domain_xr(root_dir, input_domain_xr) : + subprocess.run(["python", os.path.join(root_dir, "scripts/omPrior.py"), "2022-07-01", "2022-07-02"], check=True) + + # Generated by scripts/omPrior.py + filepath_out_domain = os.path.join(root_dir, "outputs/out-om-domain-info.nc") + + yield xr.load_dataset(filepath_out_domain) + + os.remove(filepath_out_domain) + + +def test_001_response_for_download_links() : + for filename, filepath in downloads : + url = f"{remote}{filename}" + with requests.get(url, stream=True) as response : + print(f"Response code for {url}: {response.status_code}") + assert response.status_code == 200 + + +def test_002_cdsapi_connection(root_dir, tmpdir) : + filepath = tmpdir.mkdir("sub").join("test_download_cdsapi.nc") + + startDate = datetime.datetime.strptime("2022-07-01", "%Y-%m-%d") + endDate = datetime.datetime.strptime("2022-07-02", "%Y-%m-%d") + + downloadGFAS(startDate=startDate, endDate=endDate, fileName=filepath) + + assert os.path.exists(filepath) + + +def test_003_inputs_folder_is_empty(root_dir) : + input_folder = os.path.join(root_dir, "inputs") + + EXPECTED_FILES = ['README.md'] + + assert os.listdir(input_folder) == EXPECTED_FILES, f"Folder '{input_folder}' is not empty" + + +def test_004_omDownloadInputs(root_dir, input_files) : + EXPECTED_FILES = [ + "ch4-electricity.csv", + "coal-mining_emissions-sources.csv", + "oil-and-gas-production-and-transport_emissions-sources.csv", + "NLUM_ALUMV8_250m_2015_16_alb.tif", + "ch4-sectoral-emissions.csv", + "landuse-sector-map.csv", + "nasa-nighttime-lights.tiff", + "AUS_2021_AUST_SHP_GDA2020.zip", + "EntericFermentation.nc", + "termite_emissions_2010-2016.nc", + "DLEM_totflux_CRU_diagnostic.nc", + 'README.md', + ] + + assert sorted(input_files) == sorted(EXPECTED_FILES) + + +def test_005_agriculture_emissions(root_dir, input_files) : + filepath_livestock = os.path.join(root_dir, livestockDataPath) + livestock_data = xr.open_dataset(filepath_livestock) + + filepath_sector = os.path.join(root_dir, sectoralEmissionsPath) + sector_data = pd.read_csv(filepath_sector).to_dict(orient="records")[0] + + lsVal = round(np.sum(livestock_data["CH4_total"].values)) + agVal = round(sector_data["agriculture"] * 1e9) + agDX = agVal - lsVal + + assert agDX > 0, f"Livestock CH4 exceeds bounds of total agriculture CH4: {agDX / 1e9}" + + +# TODO Update this test when file structure is clear. +# This test ensures that the grid size for all input files is 10 km. +def test_006_grid_size_for_geo_files(cro_xr, geom_xr, dot_xr) : + expected_cell_size = 10000 + + assert cro_xr.XCELL == expected_cell_size + assert cro_xr.YCELL == expected_cell_size + + assert geom_xr.DX == expected_cell_size + assert geom_xr.DY == expected_cell_size + + assert dot_xr.XCELL == expected_cell_size + assert dot_xr.YCELL == expected_cell_size + + +def test_007_compare_in_domain_with_cro_dot_files(input_domain_xr, cro_xr, dot_xr) : + assert dot_xr.NCOLS == input_domain_xr.COL_D.size + assert dot_xr.NROWS == input_domain_xr.ROW_D.size + + assert cro_xr.NCOLS == input_domain_xr.COL.size + assert cro_xr.NROWS == input_domain_xr.ROW.size + + +def test_008_compare_out_domain_with_cro_dot_files(output_domain_xr, cro_xr, dot_xr) : + assert dot_xr.NCOLS == output_domain_xr.COL_D.size + assert dot_xr.NROWS == output_domain_xr.ROW_D.size + + assert cro_xr.NCOLS == output_domain_xr.COL.size + assert cro_xr.NROWS == output_domain_xr.ROW.size + + +def test_009_output_domain_xr(output_domain_xr, num_regression) : + mean_values = {key : output_domain_xr[key].mean().item() for key in output_domain_xr.keys()} + + num_regression.check(mean_values) + + +def test_010_emission_discrepancy(root_dir, output_domain_xr, input_files) : + modelAreaM2 = output_domain_xr.DX * output_domain_xr.DY + + filepath_sector = os.path.join(root_dir, sectoralEmissionsPath) + sector_data = pd.read_csv(filepath_sector).to_dict(orient="records")[0] + + for sector in sector_data.keys() : + + layerName = f"OCH4_{sector.upper()}" + sectorVal = float(sector_data[sector]) * 1e9 + + # Check each layer in the output sums up to the input + if layerName in output_domain_xr : + layerVal = np.sum(output_domain_xr[layerName][0].values * modelAreaM2 * secsPerYear) + + if sector == "agriculture" : + layerVal += np.sum(output_domain_xr["OCH4_LIVESTOCK"][0].values * modelAreaM2 * secsPerYear) + + diff = round(layerVal - sectorVal) + perectenageDifference = diff / sectorVal * 100 + + assert abs(perectenageDifference) < 0.1, f"Discrepency of {perectenageDifference}% in {sector} emissions" diff --git a/tests/test_om_prior/test_full_process.csv b/tests/test_om_prior/test_007_output_domain_file.csv similarity index 100% rename from tests/test_om_prior/test_full_process.csv rename to tests/test_om_prior/test_007_output_domain_file.csv diff --git a/tests/test_om_prior/test_009_output_domain_xr.csv b/tests/test_om_prior/test_009_output_domain_xr.csv new file mode 100644 index 0000000..a7c52f1 --- /dev/null +++ b/tests/test_om_prior/test_009_output_domain_xr.csv @@ -0,0 +1,2 @@ +,LAT,LON,LANDMASK,LATD,LOND,OCH4_AGRICULTURE,OCH4_LULUCF,OCH4_WASTE,OCH4_LIVESTOCK,OCH4_INDUSTRIAL,OCH4_STATIONARY,OCH4_TRANSPORT,OCH4_ELECTRICITY,OCH4_FUGITIVE,OCH4_TERMITE,OCH4_FIRE,OCH4_WETLANDS,OCH4_TOTAL +0,-26.983160018920898,133.302001953125,0.39128163456916809,-26.980266571044922,133.302001953125,3.8221794892533283e-13,8.2839841777071689e-13,7.6806688034203811e-13,3.3252710482543991e-12,4.6406982245152562e-15,8.5852921845127254e-14,1.8562792898061025e-14,2.3204437472569127e-14,1.9068246493083643e-12,8.1061502996138124e-13,2.6130391795643748e-13,1.8596938045956645e-11,2.7011896889060713e-11