Skip to content

Commit

Permalink
test: create test data for term-set similarity score analysis
Browse files Browse the repository at this point in the history
Create a set of test data containing term-set similarity scores for
various configurations, enabling unit testing of downstream functions
that analyze and interpret these scores.
  • Loading branch information
clnsmth authored Dec 20, 2024
1 parent 13b2eb6 commit 513e5e5
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 0 deletions.
11 changes: 11 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from json import load
import pytest
import pandas as pd
from spinneret.utilities import load_workbook


Expand Down Expand Up @@ -55,3 +56,13 @@ def termset_similarity_score_fields():
"average_test_information_content",
"best_test_information_content",
]


@pytest.fixture(name="termset_similarity_score_dataframe")
def termset_similarity_score_dataframe():
"""Return a fixture for a dataframe of termset similarity scores returned
by the benchmark_against_standard function"""
scores = pd.read_csv(
"tests/data/benchmark/termset_similarity_scores.tsv", sep="\t", encoding="utf-8"
)
return scores
11 changes: 11 additions & 0 deletions tests/data/benchmark/termset_similarity_scores.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
standard_dir test_dir standard_file predicate_value element_xpath_value standard_set test_set average_score best_score average_jaccard_similarity best_jaccard_similarity average_phenodigm_score best_phenodigm_score average_standard_information_content best_standard_information_content average_test_information_content best_test_information_content
tests/data/benchmark/standard tests/data/benchmark/test_a knb-lter-ntl.1.59_annotation_workbook_annotated.tsv env_broad_scale /eml:eml/dataset ['ENVO:01000286', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01001021', 'ENVO:01000774', 'ENVO:01000287', 'ENVO:01000252'] ['ENVO:01000317', 'ENVO:01001209', 'ENVO:01001209'] 4.399317289600849 4.616452786848972 0.2719553079933457 0.28735632183908044 1.1051688054274622 1.1517668569518282 12.185656141890044 12.78135971352466 7.598198606401752 8.321928094887362
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[31] ['ECSO:00002844'] ['ECSO:00002359', 'ECSO:00001534'] 0.0 0.0
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[41] ['ECSO:00001727'] ['ECSO:00000329'] 0.0 0.0
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[5] ['ECSO:00000515'] ['ECSO:00001250'] 0.0 0.0
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv env_broad_scale /eml:eml/dataset ['ENVO:01000286', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01001021', 'ENVO:01000774', 'ENVO:01000287', 'ENVO:01000252'] ['ENVO:01001209'] 4.509617311638698 4.616452786848972 0.2719553079933457 0.28735632183908044 1.1051688054274622 1.1517668569518282 12.185656141890044 12.78135971352466 6.874469117916141 6.874469117916141
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv usesMethod /eml:eml/dataset ['ENVTHES:21335', 'ENVTHES:20223', 'ENVTHES:21337', 'ENVTHES:20243', 'ENVTHES:20285', 'ENVTHES:21339', 'ENVTHES:20304', 'https://www.wikidata.org/wiki/Q591867', 'https://www.wikidata.org/wiki/Q5149058'] ['ENVTHES:20803', 'ENVTHES:10375', 'ENVTHES:20104', 'ENVTHES:22297', 'ENVTHES:10328'] 0.0 0.0
tests/data/benchmark/standard tests/data/benchmark/test_a knb-lter-ntl.2.37_annotation_workbook_annotated.tsv env_broad_scale /eml:eml/dataset ['ENVO:01001021', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01000774', 'ENVO:01000286', 'ENVO:01000287', 'ENVO:01000252'] ['ENVO:00000035', 'ENVO:01001209', 'ENVO:01001209'] 4.521487919995395 4.616452786848972 0.2689232631619699 0.2840909090909091 1.0989954987335404 1.1452040294162371 12.185656141890044 12.78135971352466 9.035433165359823 11.196397212803504
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.2.37_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[14] ['ECSO:00001799'] ['ECSO:00001120'] 0.0 0.0
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.2.37_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[26] ['ECSO:00001720'] ['ECSO:00001534'] 0.0 0.0
tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.2.37_annotation_workbook_annotated.tsv env_broad_scale /eml:eml/dataset ['ENVO:01001021', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01000774', 'ENVO:01000286', 'ENVO:01000287', 'ENVO:01000252'] ['ENVO:01001209'] 4.509617311638698 4.616452786848972 0.2719553079933457 0.28735632183908044 1.1051688054274622 1.1517668569518282 12.185656141890044 12.78135971352466 6.874469117916141 6.874469117916141

0 comments on commit 513e5e5

Please sign in to comment.