Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

style: clean up code formatting and improve consistency in string quotes #199

Merged
merged 1 commit into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 46 additions & 33 deletions src/fhiry/base_fhiry.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def default_output_processor(
) -> str:
return output


class BaseFhiry(object):
def __init__(self, config_json=None):
self._df = None
Expand All @@ -33,12 +34,14 @@ def __init__(self, config_json=None):
self._delete_col_raw_coding = True
if config_json is not None:
try:
with open(config_json, 'r') as f: # config_json is a file path
with open(config_json, "r") as f: # config_json is a file path
self.config = json.load(f)
except:
self.config = json.loads(config_json) # config_json is a json string
self.config = json.loads(config_json) # config_json is a json string
else:
self.config = json.loads('{ "REMOVE": ["resource.text.div"], "RENAME": { "resource.id": "id" } }')
self.config = json.loads(
'{ "REMOVE": ["resource.text.div"], "RENAME": { "resource.id": "id" } }'
)

@property
def df(self):
Expand All @@ -53,23 +56,22 @@ def delete_col_raw_coding(self, delete_col_raw_coding):
self._delete_col_raw_coding = delete_col_raw_coding

def read_bundle_from_bundle_dict(self, bundle_dict):
return pd.json_normalize(bundle_dict['entry'])
return pd.json_normalize(bundle_dict["entry"])

def delete_unwanted_cols(self):
for col in self.config['REMOVE']:
for col in self.config["REMOVE"]:
if col in self._df.columns:
del self._df[col]

def rename_cols(self):
self._df.rename(columns=self.config['RENAME'], inplace=True)
self._df.rename(columns=self.config["RENAME"], inplace=True)

def process_df(self):
self.delete_unwanted_cols()
self.convert_object_to_list()
self.add_patient_id()
self.rename_cols()


def process_bundle_dict(self, bundle_dict):
self._df = self.read_bundle_from_bundle_dict(bundle_dict)
self.delete_unwanted_cols()
Expand All @@ -79,44 +81,54 @@ def process_bundle_dict(self, bundle_dict):
return self._df

def convert_object_to_list(self):
"""Convert object to a list of codes
"""
"""Convert object to a list of codes"""
for col in self._df.columns:
if 'coding' in col:
codes = self._df.apply(
lambda x: self.process_list(x[col]), axis=1)
if "coding" in col:
codes = self._df.apply(lambda x: self.process_list(x[col]), axis=1)
self._df = pd.concat(
[self._df, codes.to_frame(name=col+'codes')], axis=1)
[self._df, codes.to_frame(name=col + "codes")], axis=1
)
if self._delete_col_raw_coding:
del self._df[col]
if 'display' in col:
codes = self._df.apply(
lambda x: self.process_list(x[col]), axis=1)
if "display" in col:
codes = self._df.apply(lambda x: self.process_list(x[col]), axis=1)
self._df = pd.concat(
[self._df, codes.to_frame(name=col+'display')], axis=1)
[self._df, codes.to_frame(name=col + "display")], axis=1
)
del self._df[col]

def add_patient_id(self):
"""Create a patientId column with the resource.id if a Patient resource or with the resource.subject.reference if other resource type
"""
"""Create a patientId column with the resource.id if a Patient resource or with the resource.subject.reference if other resource type"""
try:
# PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
newframe = self._df.copy()
newframe['patientId'] = self._df.apply(lambda x: x['resource.id'] if x['resource.resourceType']
== 'Patient' else self.check_subject_reference(x), axis=1)
newframe["patientId"] = self._df.apply(
lambda x: (
x["resource.id"]
if x["resource.resourceType"] == "Patient"
else self.check_subject_reference(x)
),
axis=1,
)
self._df = newframe
except:
try:
newframe = self._df.copy()
newframe['patientId'] = self._df.apply(lambda x: x['id'] if x['resourceType']
== 'Patient' else self.check_subject_reference(x), axis=1)
newframe["patientId"] = self._df.apply(
lambda x: (
x["id"]
if x["resourceType"] == "Patient"
else self.check_subject_reference(x)
),
axis=1,
)
self._df = newframe
except:
pass

def check_subject_reference(self, row):
try:
return row['resource.subject.reference'].replace('Patient/', '')
return row["resource.subject.reference"].replace("Patient/", "")
except:
return ""

Expand All @@ -137,10 +149,10 @@ def process_list(self, myList):
myCodes = []
if isinstance(myList, list):
for entry in myList:
if 'code' in entry:
myCodes.append(entry['code'])
elif 'display' in entry:
myCodes.append(entry['display'])
if "code" in entry:
myCodes.append(entry["code"])
elif "display" in entry:
myCodes.append(entry["display"])
return myCodes

def llm_query(self, query, llm, embed_model=None, verbose=True):
Expand Down Expand Up @@ -177,12 +189,13 @@ def llm_query(self, query, llm, embed_model=None, verbose=True):
else:
embed_model = HuggingFaceEmbeddings(model_name=embed_model)
service_context = ServiceContext.from_defaults(
llm=llm,
embed_model=embed_model,
)
llm=llm,
embed_model=embed_model,
)
query_engine = PandasQueryEngine(
df=self._df,
service_context=service_context,
output_processor=default_output_processor,
verbose=verbose)
return query_engine.query(query)
verbose=verbose,
)
return query_engine.query(query)
6 changes: 2 additions & 4 deletions src/fhiry/bqsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
https://opensource.org/licenses/MIT
"""


from google.cloud import bigquery

from .base_fhiry import BaseFhiry
Expand All @@ -18,7 +17,7 @@ def __init__(self, config_json=None):
self._client = bigquery.Client()
super().__init__(config_json=config_json)

def search(self, query = None):
def search(self, query=None):
if query is None:
_query = """
SELECT *
Expand All @@ -27,12 +26,11 @@ def search(self, query = None):
"""
else:
try:
with open(query, 'r') as f:
with open(query, "r") as f:
_query = f.read()
except:
_query = query

self._df = self._client.query(_query).to_dataframe()
super().process_df()
return self._df

5 changes: 1 addition & 4 deletions src/fhiry/fhirndjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
https://opensource.org/licenses/MIT
"""


import pandas as pd
import json
import os
from .base_fhiry import BaseFhiry
from tqdm import tqdm


class Fhirndjson(BaseFhiry):
def __init__(self, config_json=None):
self._folder = ""
Expand All @@ -29,7 +29,6 @@ def folder(self):
def folder(self, folder):
self._folder = folder


def read_resource_from_line(self, line):
return pd.json_normalize(json.loads(line))

Expand All @@ -52,5 +51,3 @@ def process_file(self, file):
df = pd.concat([df, self._df])
self._df = df
return self._df


27 changes: 16 additions & 11 deletions src/fhiry/fhirsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import requests
from .base_fhiry import BaseFhiry


class Fhirsearch(BaseFhiry):

def __init__(self, fhir_base_url, config_json=None):
Expand All @@ -23,15 +24,20 @@ def search(self, resource_type="Patient", search_parameters={}):

headers = {"Content-Type": "application/fhir+json"}

if '_count' not in search_parameters:
search_parameters['_count'] = self.page_size
if "_count" not in search_parameters:
search_parameters["_count"] = self.page_size

search_url = f'{self.fhir_base_url}/{resource_type}'
r = requests.get(search_url, params=search_parameters, headers=headers, **self.requests_kwargs)
search_url = f"{self.fhir_base_url}/{resource_type}"
r = requests.get(
search_url,
params=search_parameters,
headers=headers,
**self.requests_kwargs,
)
r.raise_for_status()
bundle_dict = r.json()

if 'entry' in bundle_dict:
if "entry" in bundle_dict:
df = super().process_bundle_dict(bundle_dict)

next_page_url = get_next_page_url(bundle_dict)
Expand All @@ -51,13 +57,12 @@ def search(self, resource_type="Patient", search_parameters={}):
return self._df



def get_next_page_url(bundle_dict):
links = bundle_dict.get('link')
links = bundle_dict.get("link")
if links:
for link in links:
relation = link.get('relation')
if relation == 'next':
return link.get('url')
for link in links:
relation = link.get("relation")
if relation == "next":
return link.get("url")

return None
10 changes: 5 additions & 5 deletions src/fhiry/fhiry.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

logger = logging.getLogger(__name__)


class Fhiry(BaseFhiry):
def __init__(self, config_json=None):
self._filename = ""
Expand Down Expand Up @@ -50,10 +51,10 @@ def delete_col_raw_coding(self, delete_col_raw_coding):
self._delete_col_raw_coding = delete_col_raw_coding

def read_bundle_from_file(self, filename):
with open(filename, encoding='utf8', mode='r') as f:
with open(filename, encoding="utf8", mode="r") as f:
json_in = f.read()
json_in = json.loads(json_in)
return pd.json_normalize(json_in['entry'])
return pd.json_normalize(json_in["entry"])

def process_source(self):
"""Read a single JSON resource or a directory full of JSON resources
Expand All @@ -64,7 +65,8 @@ def process_source(self):
for file in tqdm(os.listdir(self._folder)):
if file.endswith(".json"):
self._df = self.read_bundle_from_file(
os.path.join(self._folder, file))
os.path.join(self._folder, file)
)
self.process_df()
if df.empty:
df = self._df
Expand All @@ -84,5 +86,3 @@ def process_bundle_dict(self, bundle_dict):
self._df = self.read_bundle_from_bundle_dict(bundle_dict)
self.process_df()
return self._df


Loading
Loading