Skip to content

Commit

Permalink
PR Feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
dogversioning committed Jan 14, 2025
1 parent 470ddb8 commit 137e952
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 14 deletions.
Binary file removed cumulus_library_umls/.DS_Store
Binary file not shown.
12 changes: 6 additions & 6 deletions cumulus_library_umls/ancilary_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ SELECT
str,
5 AS depth
FROM code_5
UNION
UNION ALL
SELECT
rui,
cui1,
Expand All @@ -280,7 +280,7 @@ SELECT
str,
6 AS depth
FROM code_6
UNION
UNION ALL
SELECT
rui,
cui1,
Expand All @@ -291,7 +291,7 @@ SELECT
str,
7 AS depth
FROM code_7
UNION
UNION ALL
SELECT
rui,
cui1,
Expand All @@ -314,7 +314,7 @@ SELECT
str,
2 AS depth
FROM umls__icd10_chapter
UNION
UNION ALL
SELECT
rui,
cui1,
Expand All @@ -325,7 +325,7 @@ SELECT
str,
3 AS depth
FROM umls__icd10_block
UNION
UNION ALL
SELECT
rui,
cui1,
Expand All @@ -336,7 +336,7 @@ SELECT
str,
4 AS depth
FROM umls__icd10_category
UNION
UNION ALL
SELECT
rui,
cui1,
Expand Down
2 changes: 1 addition & 1 deletion cumulus_library_umls/manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ study_prefix = "umls"
file_names = [
"umls_builder.py",
"static_builder.py",
"ancilary_tables.sql"
"ancilary_tables.sql",
]

[advanced_options]
Expand Down
15 changes: 8 additions & 7 deletions cumulus_library_umls/umls_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,9 @@ def create_parquet(
:param force_upload: if true, upload to a remote source regardless of what
already exists there
"""
parquet_path = parquet_path / rrf_path.stem
if not force_upload:
if (parquet_path / f"{rrf_path.stem}/{rrf_path.stem}.parquet").exists():
if (parquet_path / f"{rrf_path.stem}.parquet").exists():
return
df = pandas.read_csv(
rrf_path,
Expand All @@ -137,8 +138,8 @@ def create_parquet(
dtype=table["dtype"],
index_col=False,
)
(parquet_path / f"{rrf_path.stem}").mkdir(parents=True, exist_ok=True)
df.to_parquet(parquet_path / f"{rrf_path.stem}/{rrf_path.stem}.parquet")
parquet_path.mkdir(parents=True, exist_ok=True)
df.to_parquet(parquet_path / f"{rrf_path.stem}.parquet")

def prepare_queries(
self,
Expand All @@ -151,10 +152,10 @@ def prepare_queries(
download_path.mkdir(exist_ok=True, parents=True)
parquet_path = pathlib.Path(__file__).resolve().parent / "generated_parquet"
parquet_path.mkdir(exist_ok=True, parents=True)
files, new_version, folder = self.get_umls_data(
files, new_version, umls_version = self.get_umls_data(
download_path, parquet_path, config.force_upload, config.umls_key
)
parquet_path = parquet_path / folder
parquet_path = parquet_path / umls_version
parquet_path.mkdir(exist_ok=True, parents=True)

with base_utils.get_progress_bar() as progress:
Expand All @@ -166,7 +167,7 @@ def prepare_queries(
with open(file) as f:
datasource, table = self.parse_ctl_file(f.readlines())
progress.update(task, description=f"Compressing {datasource}...")
rrf_path = download_path / f"./{folder}/META/{datasource}"
rrf_path = download_path / f"./{umls_version}/META/{datasource}"
self.create_parquet(
rrf_path, parquet_path, table, force_upload=config.force_upload
)
Expand All @@ -192,5 +193,5 @@ def prepare_queries(
)
progress.advance(task)
log_utils.log_transaction(
config=config, manifest=manifest, message=f"UMLS version: {folder}"
config=config, manifest=manifest, message=f"UMLS version: {umls_version}"
)

0 comments on commit 137e952

Please sign in to comment.