Skip to content

Commit

Permalink
Merge pull request #14 from TranslatorSRI/novelty_errors
Browse files Browse the repository at this point in the history
Novelty errors
maximusunc authored Aug 17, 2023
2 parents 4c2af4f + dfe0f26 commit 8b775f6
Showing 3 changed files with 109 additions and 110 deletions.
176 changes: 81 additions & 95 deletions app/novelty/compute_novelty.py
Original file line number Diff line number Diff line change
@@ -177,100 +177,89 @@ async def extracting_drug_fda_publ_date(message, unknown):
tmp_res = message["results"][tmp]["analyses"][0]["edge_bindings"]
for tmp_1 in tmp_res:
idi += 1
edge = message["results"][tmp]["analyses"][0]["edge_bindings"][tmp_1][0][
edge_id = message["results"][tmp]["analyses"][0]["edge_bindings"][tmp_1][0][
"id"
]
# edge_list = list(message['knowledge_graph']['edges'].keys())
# for idx, idi in enumerate(edge_list):
# if idx % 20 == 0:
# print(f'progressing {idx}')
# edge = edge_list[idx]
edge_attribute = message["knowledge_graph"]["edges"][edge]
# if set(['subject', 'object']).issubset(edge_attribute.keys()):
edge = message["knowledge_graph"]["edges"][edge_id]
if query_chk == 1:
if (
"PUBCHEM" in edge_attribute["subject"]
or "CHEMBL" in edge_attribute["subject"]
or "UNII" in edge_attribute["subject"]
or "RXNORM" in edge_attribute["subject"]
or "UMLS" in edge_attribute["subject"]
or not "MONDO" in edge_attribute["subject"]
"PUBCHEM" in edge["subject"]
or "CHEMBL" in edge["subject"]
or "UNII" in edge["subject"]
or "RXNORM" in edge["subject"]
or "UMLS" in edge["subject"]
or not "MONDO" in edge["subject"]
):
drug_idx = edge_attribute["subject"]
drug_idx = edge["subject"]
else:
drug_idx = edge_attribute["object"]
if set(["attributes"]).issubset(edge_attribute.keys()):
if len(edge_attribute["attributes"]) > 0:
att_type_id = {}
fda = []
pub = []
for i in range(len(edge_attribute["attributes"])):
att_type_id[i] = edge_attribute["attributes"][i][
"attribute_type_id"
]

for key in att_type_id.keys():
if att_type_id[key] in attribute_type_id_list_fda:
fda.append(key)
elif att_type_id[key] in attribute_type_id_list_pub:
pub.append(key)

if len(fda) > 0:
if (
edge_attribute["attributes"][fda[0]]["value"]
== "FDA Approval"
):
fda_status = 0.0
else:
fda_status = 1.0
drug_idx = edge["object"]
edge_attributes = edge.get("attributes") or []
if len(edge_attributes) > 0:
att_type_id = {}
fda = []
pub = []
for i in range(len(edge_attributes)):
att_type_id[i] = edge_attributes[i]["attribute_type_id"]

for key in att_type_id.keys():
if att_type_id[key] in attribute_type_id_list_fda:
fda.append(key)
elif att_type_id[key] in attribute_type_id_list_pub:
pub.append(key)

if len(fda) > 0:
if edge_attributes[fda[0]]["value"] == "FDA Approval":
fda_status = 0.0
else:
fda_status = None

# Publication
if len(pub) > 0:
publications = edge_attribute["attributes"][pub[0]]["value"]
if "|" in publications:
publications = publications.split("|")
if type(publications) == "str":
publications = [publications]

# Removal of all publication entries that are links
publications = [x for x in publications if "http" not in x]
# Removal of all publication entries that are Clinical Trials
publications = [
x for x in publications if "clinicaltrials" not in x
]
number_of_publ = len(publications)

if len(publications) > 0:
# print(publications)
publications_1 = ",".join(publications)
try:
response_pub = await get_publication_info(
publications_1
)
if response_pub["_meta"]["n_results"] == 0:
age_oldest = np.nan
else:
publ_year = []
for key in response_pub["results"].keys():
if "not_found" not in key:
publ_year.extend(
[
int(
response_pub["results"][
key
]["pub_year"]
)
]
)
age_oldest = today.year - min(publ_year)
except ConnectionError as e:
fda_status = 1.0
else:
fda_status = None

# Publication
if len(pub) > 0:
publications = edge_attributes[pub[0]]["value"]
if "|" in publications:
publications = publications.split("|")
if type(publications) == "str":
publications = [publications]

# Removal of all publication entries that are links
publications = [x for x in publications if "http" not in x]
# Removal of all publication entries that are Clinical Trials
publications = [
x for x in publications if "clinicaltrials" not in x
]
number_of_publ = len(publications)

if len(publications) > 0:
# print(publications)
publications_1 = ",".join(publications)
try:
response_pub = await get_publication_info(
publications_1
)
if response_pub["_meta"]["n_results"] == 0:
age_oldest = np.nan
else:
publications = None
number_of_publ = 0.0
age_oldest = np.nan
else:
publ_year = []
for key in response_pub["results"].keys():
if "not_found" not in key:
publ_year.extend(
[
int(
response_pub["results"][key][
"pub_year"
]
)
]
)
age_oldest = today.year - min(publ_year)
except ConnectionError as e:
age_oldest = np.nan
else:
publications = None
number_of_publ = 0.0
age_oldest = np.nan
drug_idx_fda_status.append(
(
idi,
@@ -283,19 +272,16 @@ async def extracting_drug_fda_publ_date(message, unknown):
)
else:
if query_unknown in ["biolink:Gene", "biolink:Protein"]:
if (
"NCBI" in edge_attribute["subject"]
or "GO" in edge_attribute["subject"]
):
gene_idx = edge_attribute["subject"]
if "NCBI" in edge["subject"] or "GO" in edge["subject"]:
gene_idx = edge["subject"]
else:
gene_idx = edge_attribute["object"]
gene_idx = edge["object"]
drug_idx_fda_status.append((idi, gene_idx))
elif query_unknown in ["biolink:Disease", "biolink:Phenotype"]:
if "MONDO" in edge_attribute["subject"]:
dis_idx = edge_attribute["subject"]
if "MONDO" in edge["subject"]:
dis_idx = edge["subject"]
else:
dis_idx = edge_attribute["object"]
dis_idx = edge["object"]
drug_idx_fda_status.append((idi, dis_idx))
if query_chk == 1 and res_chk == 1:
DF = pd.DataFrame(
41 changes: 27 additions & 14 deletions app/ordering_components.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Compute scores for each result in the given message."""
import redis
from tqdm import tqdm
import traceback

from .config import settings
from .clinical_evidence.compute_clinical_evidence import compute_clinical_evidence
@@ -24,7 +25,7 @@ def get_confidence(result, message, logger):
eps is set to 0.001
"""
score_sum = 0
score_sum = 0.0
non_zero_count = 0
eps = 0.001
for analysis in result.get("analyses") or []:
@@ -55,24 +56,36 @@ async def get_novelty(message, logger):
async def get_ordering_components(message, logger):
logger.debug(f"Computing scores for {len(message['results'])} results")
db_conn = redis.Redis(connection_pool=redis_pool)
novelty_scores = await get_novelty(message, logger)
novelty_scores = {}
try:
novelty_scores = await get_novelty(message, logger)
except Exception:
logger.error(f"Novelty score failed: {traceback.format_exc()}")
for result in tqdm(message.get("results") or []):
clinical_evidence_score = get_clinical_evidence(
result,
message,
logger,
db_conn,
)
confidence = 0.0
try:
confidence = get_confidence(result, message, logger)
except Exception:
logger.error(f"Confidence score failed: {traceback.format_exc()}")
clinical_evidence_score = 0.0
try:
clinical_evidence_score = get_clinical_evidence(
result,
message,
logger,
db_conn,
)
except Exception:
logger.error(f"Clinical evidence score failed: {traceback.format_exc()}")
result["ordering_components"] = {
"confidence": get_confidence(result, message, logger),
"confidence": confidence,
"clinical_evidence": clinical_evidence_score,
"novelty": 0,
"novelty": 0.0,
}
if clinical_evidence_score == 0:
# Only compute novelty if there is no clinical evidence
for node_bindings in result.get("node_bindings", {}).values():
for node_binding in node_bindings:
if node_binding["id"] in novelty_scores:
result["ordering_components"]["novelty"] = novelty_scores[
node_binding["id"]
]
result["ordering_components"]["novelty"] = novelty_scores.get(
node_binding["id"], 0.0
)
2 changes: 1 addition & 1 deletion app/server.py
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@

openapi_args = dict(
title="Answer Appraiser",
version="0.3.2",
version="0.3.3",
terms_of_service="",
translator_component="Utility",
translator_teams=["Standards Reference Implementation Team"],

0 comments on commit 8b775f6

Please sign in to comment.