Skip to content

Commit

Permalink
Merge pull request #27 from nfdi4plants/structural-ontology
Browse files Browse the repository at this point in the history
Add Structural ontologies, rename ArcGraphModel -> ARCTokenization
  • Loading branch information
omaus authored Aug 4, 2023
2 parents 3a0f1a7 + c621cdc commit c841135
Show file tree
Hide file tree
Showing 48 changed files with 3,354 additions and 1,758 deletions.
28 changes: 14 additions & 14 deletions ArcGraphModel.sln
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{67DA0DCC-75F
EndProject
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "ControlledVocabulary", "src\ControlledVocabulary\ControlledVocabulary.fsproj", "{F8E5EFC0-C74B-4C3C-BC22-7A5286A31DF6}"
EndProject
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "ArcGraphModel", "src\ArcGraphModel\ArcGraphModel.fsproj", "{E80010AC-0AEE-4D7C-A85C-3A20C1FB7070}"
EndProject
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "ArcGraphModel.Tests", "tests\ArcGraphModel.Tests\ArcGraphModel.Tests.fsproj", "{5A2C8580-8D5E-4A90-A21F-2C45C672A259}"
EndProject
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "ControlledVocabulary.Tests", "tests\ControlledVocabulary.Tests\ControlledVocabulary.Tests.fsproj", "{2CBEE59D-07E4-460E-8B97-9267965D3F46}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{3DB2A5F4-23F6-4A06-9AE5-CEAC0707735B}"
Expand All @@ -39,6 +35,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{3DB2A5F4
EndProject
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "Build", "build\Build.fsproj", "{ED24D0E3-BA79-4F6D-9A30-D05FF4EBAFDB}"
EndProject
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "ARCTokenization", "src\ARCTokenization\ARCTokenization.fsproj", "{24D99DC2-DF18-45A3-B444-B60351F131DB}"
EndProject
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "ARCTokenization.Tests", "tests\ARCTokenization.Tests\ARCTokenization.Tests.fsproj", "{30177EF1-3980-4FFE-9B49-90B75DCEBDA3}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand All @@ -49,14 +49,6 @@ Global
{F8E5EFC0-C74B-4C3C-BC22-7A5286A31DF6}.Debug|Any CPU.Build.0 = Debug|Any CPU
{F8E5EFC0-C74B-4C3C-BC22-7A5286A31DF6}.Release|Any CPU.ActiveCfg = Release|Any CPU
{F8E5EFC0-C74B-4C3C-BC22-7A5286A31DF6}.Release|Any CPU.Build.0 = Release|Any CPU
{E80010AC-0AEE-4D7C-A85C-3A20C1FB7070}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E80010AC-0AEE-4D7C-A85C-3A20C1FB7070}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E80010AC-0AEE-4D7C-A85C-3A20C1FB7070}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E80010AC-0AEE-4D7C-A85C-3A20C1FB7070}.Release|Any CPU.Build.0 = Release|Any CPU
{5A2C8580-8D5E-4A90-A21F-2C45C672A259}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5A2C8580-8D5E-4A90-A21F-2C45C672A259}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5A2C8580-8D5E-4A90-A21F-2C45C672A259}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5A2C8580-8D5E-4A90-A21F-2C45C672A259}.Release|Any CPU.Build.0 = Release|Any CPU
{2CBEE59D-07E4-460E-8B97-9267965D3F46}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{2CBEE59D-07E4-460E-8B97-9267965D3F46}.Debug|Any CPU.Build.0 = Debug|Any CPU
{2CBEE59D-07E4-460E-8B97-9267965D3F46}.Release|Any CPU.ActiveCfg = Release|Any CPU
Expand All @@ -65,16 +57,24 @@ Global
{ED24D0E3-BA79-4F6D-9A30-D05FF4EBAFDB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{ED24D0E3-BA79-4F6D-9A30-D05FF4EBAFDB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{ED24D0E3-BA79-4F6D-9A30-D05FF4EBAFDB}.Release|Any CPU.Build.0 = Release|Any CPU
{24D99DC2-DF18-45A3-B444-B60351F131DB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{24D99DC2-DF18-45A3-B444-B60351F131DB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{24D99DC2-DF18-45A3-B444-B60351F131DB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{24D99DC2-DF18-45A3-B444-B60351F131DB}.Release|Any CPU.Build.0 = Release|Any CPU
{30177EF1-3980-4FFE-9B49-90B75DCEBDA3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{30177EF1-3980-4FFE-9B49-90B75DCEBDA3}.Debug|Any CPU.Build.0 = Debug|Any CPU
{30177EF1-3980-4FFE-9B49-90B75DCEBDA3}.Release|Any CPU.ActiveCfg = Release|Any CPU
{30177EF1-3980-4FFE-9B49-90B75DCEBDA3}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{F8E5EFC0-C74B-4C3C-BC22-7A5286A31DF6} = {67DA0DCC-75F4-4F30-91C3-309A48B13D49}
{E80010AC-0AEE-4D7C-A85C-3A20C1FB7070} = {67DA0DCC-75F4-4F30-91C3-309A48B13D49}
{5A2C8580-8D5E-4A90-A21F-2C45C672A259} = {2EB71559-9BE4-4E02-9763-9092876D1E4A}
{2CBEE59D-07E4-460E-8B97-9267965D3F46} = {2EB71559-9BE4-4E02-9763-9092876D1E4A}
{ED24D0E3-BA79-4F6D-9A30-D05FF4EBAFDB} = {3DB2A5F4-23F6-4A06-9AE5-CEAC0707735B}
{24D99DC2-DF18-45A3-B444-B60351F131DB} = {67DA0DCC-75F4-4F30-91C3-309A48B13D49}
{30177EF1-3980-4FFE-9B49-90B75DCEBDA3} = {2EB71559-9BE4-4E02-9763-9092876D1E4A}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {933E3470-7D09-4F22-A056-2407458B9600}
Expand Down
4 changes: 2 additions & 2 deletions build/ProjectInfo.fs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ let project = "ArcGraphModel"

let testProjects =
[
"tests/ControlledVocabulary.Tests"
"tests/ArcGraphModel.Tests"
"tests/ControlledVocabulary.Tests/ControlledVocabulary.Tests.fsproj"
"tests/ARCTokenization.Tests/ARCTokenization.Tests.fsproj"
]

let solutionFile = $"{project}.sln"
Expand Down
134 changes: 132 additions & 2 deletions playground.fsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#r "nuget: DocumentFormat.OpenXml"
#r "nuget: FSharpAux"
#r "nuget: FsOboParser"
#r "nuget: FsSpreadsheet, 3.1.1"
#r "nuget: FsSpreadsheet.ExcelIO, 3.1.1"
#r "nuget: FSharp.FGL"
Expand All @@ -33,20 +34,149 @@ open System.Collections.Generic

open FsSpreadsheet
open FsSpreadsheet.ExcelIO
open FsOboParser
//open FsSpreadsheet.DSL
open ControlledVocabulary
open ControlledVocabulary.ParamBase
open ArcGraphModel
open ARCTokenization


let expectedTermValuesSimple =
[
[""]
[""]
[""]
[""]
[""]
[""]
[""; "iid"]
[""; "ititle"]
[""; "idesc"]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""; "Maus"; "Keider"; "müller"; "oih"]
[""; "Oliver"; "andreas"]
[""; "L. I."; "C."]
[""; "[email protected]"]
[""]
[""]
[""]
[""; "Affe"]
[""]
[""]
[""]
[""]
[""; "sid"]
[""; "stitle"]
[""; "sdesc"]
[""]
[""]
[""; "sid\isa.study.xlsx"]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""; "aid\isa.assay.xlsx"; "aid2\isa.assay.xlsx"]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""; "weil"]
[""; "lukas"]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
[""]
]

let allExpectedMetadataTermsFull =
Terms.InvestigationMetadata.cvTerms
|> List.skip 1 //(ignore root term)
|> List.zip expectedTermValuesSimple
|> List.map (fun (values,term) ->
values
|> List.mapi (fun i v ->
if i = 0 then
CvParam(term, ParamValue.CvValue (CvTerm("AGMO:00000001", "Metadata Section Key", "AGMO")), [])
else
CvParam(term, ParamValue.Value v, [])
)
)
|> List.concat
allExpectedMetadataTermsFull.Length

let parsedInvestigationMetadataSimple = Investigation.parseMetadataSheetFromFile (__SOURCE_DIRECTORY__ + "/tests/ArcGraphModel.Tests/Fixtures/correct/investigation_simple.xlsx")
parsedInvestigationMetadataSimple.Length

let i_fs = FsWorkbook.fromXlsxFile (__SOURCE_DIRECTORY__ + "/tests/ArcGraphModel.Tests/Fixtures/correct/investigation_simple.xlsx")

(FsWorkbook.getWorksheetByName "isa_investigation" i_fs).CellCollection.GetCells()
|> Seq.filter(fun c -> c.RowNumber = 1)



parsedInvestigationMetadataSimple
|> List.skip 10
|> List.take 10

// Assay annotation table parsing

let assayTokens = Assay.parseAnnotationTablesFromFile (__SOURCE_DIRECTORY__ + "/tests/ArcGraphModel.Tests/Fixtures/correct/assay_with_single_characteristics.xlsx")

// Investigation metadata parsing

let investigationTokens = Investigation.parseMetadataSheetfromXlsxFile (__SOURCE_DIRECTORY__ + "/tests/ArcGraphModel.Tests/Fixtures/correct/full_investigation_mkay.xlsx")

//let inves = FsWorkbook.fromXlsxFile @"C:\Users\revil\OneDrive\CSB-Stuff\NFDI\testARC30\isa.investigation.xlsx"
//let inves = FsWorkbook.fromXlsxFile @"C:\Users\olive\OneDrive\CSB-Stuff\NFDI\testARC30\isa.investigation.xlsx"
let inves = FsWorkbook.fromXlsxFile (__SOURCE_DIRECTORY__ + "/tests/ArcGraphModel.Tests/Fixtures/isa.investigation.xlsx")
let inves = FsWorkbook.fromXlsxFile (__SOURCE_DIRECTORY__ + "/tests/ArcGraphModel.Tests/Fixtures/correct/full_investigation_mkay.xlsx")

let invesWs = FsWorkbook.getWorksheets inves |> Seq.head
invesWs.RescanRows()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,30 @@
</PropertyGroup>

<ItemGroup>
<None Include="ArcStructuralOntology/arc_structure.obo" />
<None Include="ArcStructuralOntology/arc_structure.yml" />
<EmbeddedResource Include="structural_ontologies/investigation_metadata_structural_ontology.obo" CopyToOutputDirectory="Always" />
<EmbeddedResource Include="structural_ontologies/study_metadata_structural_ontology.obo" CopyToOutputDirectory="Always" />
<EmbeddedResource Include="structural_ontologies/assay_metadata_structural_ontology.obo" CopyToOutputDirectory="Always" />
<None Include="structural_ontologies/investigation_metadata_structural_ontology.yml" />
<None Include="structural_ontologies/study_metadata_structural_ontology.yml" />
<None Include="structural_ontologies/assay_metadata_structural_ontology.yml" />
<Compile Include="Address.fs" />
<Compile Include="ISA\Terms.fs" />
<Compile Include="ISA\AnnotationTable.fs" />
<Compile Include="ISA\Regex.fs" />
<Compile Include="ISA\KeyParser.fs" />
<Compile Include="ISA\Tokenization.fs" />
<Compile Include="ISA\TokenAggregation.fs" />
<Compile Include="ISA\Worksheet.fs" />
<Compile Include="ISA\TopLevelParsers.fs" />
<Compile Include="Terms.fs" />
<Compile Include="Regex.fs" />
<Compile Include="AnnotationTable.fs" />
<Compile Include="MetadataSheet.fs" />
<Compile Include="Tokenization.fs" />
<Compile Include="Workbook.fs" />
<Compile Include="Worksheet.fs" />
<Compile Include="TopLevelParsers.fs" />
</ItemGroup>


<PropertyGroup>
<Authors>Oliver Maus, Heinrich Lukas Weil, Timo Mühlhaus, Kevin Frey, Kevin Schneider</Authors>
<Description>Fsharp Library for tokenization of isa and cwl files into the ArcGraphModel</Description>
<Summary>Fsharp Library for tokenization of isa and cwl files into the ArcGraphModel</Summary>
<Authors>Oliver Maus, Kevin Schneider, Heinrich Lukas Weil, Timo Muehlhaus, Kevin Frey</Authors>
<Description>F# library for tokenization of ARC metadata into controlled vocabulary tokens</Description>
<Summary>F# library for tokenization of ARC metadata into controlled vocabulary tokens</Summary>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<!--<PackageIcon>logo.png</PackageIcon>-->
<PackageTags>F# FSharp datascience rdm arc dataplant nfdi4plants xlsx io isa fable fable-library fable-javascript</PackageTags>
<PackageTags>F# FSharp datascience rdm arc dataplant nfdi4plants xlsx isa</PackageTags>
<RepositoryUrl>https://github.com/nfdi4plants/ArcGraphModel</RepositoryUrl>
<RepositoryType>git</RepositoryType>
</PropertyGroup>
Expand All @@ -36,7 +39,7 @@
<ProjectReference Include="..\ControlledVocabulary\ControlledVocabulary.fsproj" PackageVersion="[0.0.0, 1.0.0)" />
<PackageReference Include="FSharp.FGL.ArrayAdjacencyGraph" Version="[0.0.2]" />
<PackageReference Include="FSharpAux.Core" Version="[2.0.0]" />
<PackageReference Include="FsOboParser" Version="0.1.0" />
<PackageReference Include="FsOboParser" Version="[0.1.0]" />
<PackageReference Include="FsSpreadsheet" Version="[3.1.1]" />
<PackageReference Include="FsSpreadsheet.ExcelIO" Version="[3.1.1]" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace ArcGraphModel
namespace ARCTokenization

open ControlledVocabulary

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace ArcGraphModel
namespace ARCTokenization

open ControlledVocabulary
open FSharpAux
Expand Down
45 changes: 45 additions & 0 deletions src/ARCTokenization/MetadataSheet.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
namespace ARCTokenization

open ControlledVocabulary
open FSharpAux
open FsSpreadsheet
open ARCTokenization.Terms

module MetadataSheet =

let (|Term|_|) (terms : CvTerm list) (key : string) : CvTerm Option =
terms
|> List.tryFind (fun (term) -> CvTerm.getName term = key)

let (|UnMatchable|) (key : string) : string =
key

// we need to have separate functions here because matching is done based on term name, of which some are contained in multiple structural ontologies
// (e.g. the study metadata section is a copy of the resepctive section in an investigation file)

let rec parseAssayKey (attributes : IParam list) (key : string) : ParamValue -> IParam =
match key with

| Term AssayMetadata.cvTerms term ->
fun (pv) -> CvParam(term,pv,attributes)

| UnMatchable name ->
fun (pv) -> UserParam(name,pv,attributes) // UserParam(name,pv,Attributes)

let rec parseStudyKey (attributes : IParam list) (key : string) : ParamValue -> IParam =
match key with

| Term StudyMetadata.cvTerms term ->
fun (pv) -> CvParam(term,pv,attributes)

| UnMatchable name ->
fun (pv) -> UserParam(name,pv,attributes) // UserParam(name,pv,Attributes)

let rec parseInvestigationKey (attributes : IParam list) (key : string) : ParamValue -> IParam =
match key with

| Term InvestigationMetadata.cvTerms term ->
fun (pv) -> CvParam(term,pv,attributes)

| UnMatchable name ->
fun (pv) -> UserParam(name,pv,attributes) // UserParam(name,pv,Attributes)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/// <summary>
/// This module contains unified regex patterns and matching functions to parse isa tab column headers to BuildingBlock information.
/// </summary>
namespace ArcGraphModel.ISA
namespace ARCTokenization.ISA

open System

Expand Down
Loading

0 comments on commit c841135

Please sign in to comment.