Skip to content

Commit

Permalink
Replace hard-coded obsolete file paths with references to CLI args
Browse files Browse the repository at this point in the history
  • Loading branch information
eecavanna committed Feb 1, 2025
1 parent 1e0e1b2 commit 2042c09
Showing 1 changed file with 30 additions and 19 deletions.
49 changes: 30 additions & 19 deletions metadata-translation/src/bin/validate_json.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,12 @@
import jsonschema
import sys

from jsonschema import Draft7Validator
import json

# test using pytest; call with (python -m) pytest validate_json.py
# def test_always_pass():
# assert True

# def test_always_fail():
# assert False


def validate_json(data_path, schema_path, log_file):
def validate_json(data_path: str, schema_path: str, log_file: str) -> bool:
r"""
TODO: Document this function.
TODO: Add type hints for this function's parameters and its return value.
"""

with open(data_path, "r") as json_file: # load data
Expand All @@ -28,26 +21,44 @@ def validate_json(data_path, schema_path, log_file):
if not valid:
with open(log_file, "w") as fp:
for error in sorted(validator.iter_errors(data), key=lambda e: e.path):
# print(error.message)
fp.write(error.message)

return valid


def test_gold_study_json(
data_path="output/nmdc_etl/gold_study.json",
schema_path="/path/to/nmdc_materialized_patterns.schema.json",
log_file="study_error.log",
):
schema_path: str,
data_path: str,
log_file_path: str = "error.log",
) -> bool:
r"""
TODO: Document this function.
TODO: Add type hints for this function's parameters and its return value.
Validates the specified data against the specified schema, writing any validation errors to the specified log file.
:param schema_path: Path to JSON-formatted NMDC Schema file against which you want to validate the data.
Example value: `/path/to/nmdc_materialized_patterns.schema.json`
:param data_path: Path to JSON-formatted data file you want to validate.
Example value: `/path/to/nmdc_etl/gold_study.json`
:param log_file_path: Path to log file to which you want the function to write validation error messages.
"""
valid = validate_json(data_path, schema_path, log_file)
valid = validate_json(data_path, schema_path, log_file_path)

assert valid
return valid


if __name__ == "__main__":
print("study test", test_gold_study_json())
r"""
Note: In 2025, this script was updated ("quick 'n dirty"-ly) to allow the user to specify the various file paths via
CLI arguments. That update was prompted by team members noticing the hard-coded file paths in this script were
obsolete (i.e. they were paths to files that no longer existed in the repository).
"""

# If an invalid number of CLI arguments was specified, abort and display a usage string.
if len(sys.argv) < 3:
raise SystemExit("Usage: script.py SCHEMA_PATH DATA_PATH [LOG_FILE_PATH]")

print("study test", test_gold_study_json(
schema_path=sys.argv[1],
data_path=sys.argv[2],
log_file_path=sys.argv[3] if len(sys.argv) == 4 else None,
))

0 comments on commit 2042c09

Please sign in to comment.