Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: create test data for term-set similarity score analysis #113

Merged
merged 1 commit into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from json import load
import pytest
import pandas as pd
from spinneret.utilities import load_workbook


Expand Down Expand Up @@ -55,3 +56,13 @@ def termset_similarity_score_fields():
"average_test_information_content",
"best_test_information_content",
]


@pytest.fixture(name="termset_similarity_score_dataframe")
def termset_similarity_score_dataframe():
"""Return a fixture for a dataframe of termset similarity scores returned
by the benchmark_against_standard function"""
scores = pd.read_csv(
"tests/data/benchmark/termset_similarity_scores.tsv", sep="\t", encoding="utf-8"
)
return scores
11 changes: 11 additions & 0 deletions tests/data/benchmark/termset_similarity_scores.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
standard_dir test_dir standard_file predicate_value element_xpath_value standard_set test_set average_score best_score average_jaccard_similarity best_jaccard_similarity average_phenodigm_score best_phenodigm_score average_standard_information_content best_standard_information_content average_test_information_content best_test_information_content
tests/data/benchmark/standard tests/data/benchmark/test_a knb-lter-ntl.1.59_annotation_workbook_annotated.tsv env_broad_scale /eml:eml/dataset ['ENVO:01000286', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01001021', 'ENVO:01000774', 'ENVO:01000287', 'ENVO:01000252'] ['ENVO:01000317', 'ENVO:01001209', 'ENVO:01001209'] 4.399317289600849 4.616452786848972 0.2719553079933457 0.28735632183908044 1.1051688054274622 1.1517668569518282 12.185656141890044 12.78135971352466 7.598198606401752 8.321928094887362
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[31] ['ECSO:00002844'] ['ECSO:00002359', 'ECSO:00001534'] 0.0 0.0
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[41] ['ECSO:00001727'] ['ECSO:00000329'] 0.0 0.0
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[5] ['ECSO:00000515'] ['ECSO:00001250'] 0.0 0.0
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv env_broad_scale /eml:eml/dataset ['ENVO:01000286', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01001021', 'ENVO:01000774', 'ENVO:01000287', 'ENVO:01000252'] ['ENVO:01001209'] 4.509617311638698 4.616452786848972 0.2719553079933457 0.28735632183908044 1.1051688054274622 1.1517668569518282 12.185656141890044 12.78135971352466 6.874469117916141 6.874469117916141
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv usesMethod /eml:eml/dataset ['ENVTHES:21335', 'ENVTHES:20223', 'ENVTHES:21337', 'ENVTHES:20243', 'ENVTHES:20285', 'ENVTHES:21339', 'ENVTHES:20304', 'https://www.wikidata.org/wiki/Q591867', 'https://www.wikidata.org/wiki/Q5149058'] ['ENVTHES:20803', 'ENVTHES:10375', 'ENVTHES:20104', 'ENVTHES:22297', 'ENVTHES:10328'] 0.0 0.0
tests/data/benchmark/standard tests/data/benchmark/test_a knb-lter-ntl.2.37_annotation_workbook_annotated.tsv env_broad_scale /eml:eml/dataset ['ENVO:01001021', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01000774', 'ENVO:01000286', 'ENVO:01000287', 'ENVO:01000252'] ['ENVO:00000035', 'ENVO:01001209', 'ENVO:01001209'] 4.521487919995395 4.616452786848972 0.2689232631619699 0.2840909090909091 1.0989954987335404 1.1452040294162371 12.185656141890044 12.78135971352466 9.035433165359823 11.196397212803504
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.2.37_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[14] ['ECSO:00001799'] ['ECSO:00001120'] 0.0 0.0
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.2.37_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[26] ['ECSO:00001720'] ['ECSO:00001534'] 0.0 0.0
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.2.37_annotation_workbook_annotated.tsv env_broad_scale /eml:eml/dataset ['ENVO:01001021', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01000774', 'ENVO:01000286', 'ENVO:01000287', 'ENVO:01000252'] ['ENVO:01001209'] 4.509617311638698 4.616452786848972 0.2719553079933457 0.28735632183908044 1.1051688054274622 1.1517668569518282 12.185656141890044 12.78135971352466 6.874469117916141 6.874469117916141
Loading