-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Copied from https://github.com/nfdi4plants/ISADotNet/blob/developer/src/ISADotNet.QueryModel/Obo.fs ISA.NET dependency was necessary.
- Loading branch information
Showing
8 changed files
with
1,298 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio Version 17 | ||
VisualStudioVersion = 17.0.31903.59 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsOboParser", "src\FsOboParser\FsOboParser.fsproj", "{7812F4E0-128E-437C-92CF-84C9F621C459}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Release|Any CPU = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{7812F4E0-128E-437C-92CF-84C9F621C459}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{7812F4E0-128E-437C-92CF-84C9F621C459}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{7812F4E0-128E-437C-92CF-84C9F621C459}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{7812F4E0-128E-437C-92CF-84C9F621C459}.Release|Any CPU.Build.0 = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
GlobalSection(ExtensibilityGlobals) = postSolution | ||
SolutionGuid = {54B9E6DB-7B93-4B11-AF6D-F46EF21F9781} | ||
EndGlobalSection | ||
EndGlobal |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
namespace FsOboParser | ||
|
||
open System | ||
|
||
|
||
module Obo = | ||
|
||
//Dbxref definitions take the following form: | ||
|
||
//<dbxref name> {optional-trailing-modifier} | ||
|
||
//or | ||
|
||
//<dbxref name> "<dbxref description>" {optional-trailing-modifier} | ||
|
||
//The dbxref is a colon separated key-value pair. The key should be taken from GO.xrf_abbs but this is not a requirement. | ||
//If provided, the dbxref description is a string of zero or more characters describing the dbxref. | ||
//DBXref descriptions are rarely used and as of obof1.4 are discouraged. | ||
|
||
//Dbxref lists are used when a tag value must contain several dbxrefs. Dbxref lists take the following form: | ||
|
||
//[<dbxref definition>, <dbxref definition>, ...] | ||
|
||
//The brackets may contain zero or more comma separated dbxref definitions. An example of a dbxref list can be seen in the GO def for "ribonuclease MRP complex": | ||
|
||
//def: "A ribonucleoprotein complex that contains an RNA molecule of the snoRNA family, and cleaves the rRNA precursor as part of rRNA transcript processing. It also has other roles: In S. cerevisiae it is involved in cell cycle-regulated degradation of daughter cell-specific mRNAs, while in mammalian cells it also enters the mitochondria and processes RNAs to create RNA primers for DNA replication." [GOC:sgd_curators, PMID:10690410, Add to Citavi project by Pubmed ID PMID:14729943, Add to Citavi project by Pubmed ID PMID:7510714] Add to Citavi project by Pubmed ID | ||
|
||
//Note that the trailing modifiers (like all trailing modifiers) do not need to be decoded or round-tripped by parsers; trailing modifiers can always be optionally ignored. However, all parsers must be able to gracefully ignore trailing modifiers. It is important to recognize that lines which accept a dbxref list may have a trailing modifier for each dbxref in the list, and another trailing modifier for the line itself. | ||
type DBXref = { | ||
Name : string | ||
Description : string | ||
Modifiers : string | ||
} | ||
|
||
let trimComment (line : string) = | ||
line.Split('!').[0].Trim() | ||
|
||
let private xrefRegex = | ||
Text.RegularExpressions.Regex("""(?<xrefName>^([^"{])*)(\s?)(?<xrefDescription>\"(.*?)\")?(\s?)(?<xrefModifiers>\{(.*?)}$)?""") | ||
|
||
let parseDBXref (v:string) = | ||
let matches = xrefRegex.Match(v.Trim()).Groups | ||
{ | ||
Name = matches.Item("xrefName").Value | ||
Description = matches.Item("xrefDescription").Value | ||
Modifiers = matches.Item("xrefModifiers").Value | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
namespace FsOboParser | ||
|
||
open Obo | ||
|
||
|
||
////######################################## | ||
//// Definition of OboGraph | ||
|
||
|
||
//module FastOboGraph = | ||
|
||
// /// Obo Term as node | ||
// [<StructuredFormatDisplay("{PrettyString}")>] | ||
// type OboNode = { | ||
// Id : int | ||
// Name : string | ||
// NameSpace : string | ||
// OntologyId : string // GO: | ||
// } | ||
// with | ||
// member this.PrettyString = sprintf "%s:%07i | %s {%s}" this.OntologyId this.Id this.Name this.NameSpace | ||
// interface INode<int> | ||
// with member this.Id = this.Id | ||
|
||
|
||
// /// Creates OboNode | ||
// let createOboNode id name nameSpace ontologyId = | ||
// {Id = id; Name = name; NameSpace = nameSpace; OntologyId = ontologyId; } | ||
|
||
|
||
|
||
// type OboEdgeType = | ||
// | Is_A | ||
// | Part_Of | ||
|
||
// [<StructuredFormatDisplay("{PrettyString}")>] | ||
// type OboEdge = { | ||
// Id : int | ||
// SourceId :int | ||
// TargetId :int } | ||
// with | ||
// member this.PrettyString = if this.Id = this.SourceId then | ||
// sprintf "o---> %07i | (%i)" this.Id this.TargetId | ||
// else | ||
// sprintf "%07i <---o | (%i)" this.Id this.TargetId | ||
// interface IEdge<int> with | ||
// member this.Id = this.Id | ||
// member this.SourceId = this.SourceId | ||
// member this.TargetId = this.TargetId | ||
|
||
|
||
// /// Creates OboEdge | ||
// let createOboEdge id sourceId targetId = | ||
// {Id = id; SourceId = sourceId; TargetId = targetId} | ||
|
||
|
||
// type oboAdjacencyNode = AdjacencyNode<OboNode,OboEdge,int> | ||
|
||
|
||
|
||
// /// Splits String s at ":", returns sa.[1] | ||
// let tryIdToInt str = | ||
// match str with | ||
// | Regex.RegexValue @"GO:(?<goId>[\d]+)" [ goId; ] -> Some( int goId ) | ||
// | _ -> None | ||
|
||
// let idToInt str = | ||
// match tryIdToInt str with | ||
// | Some v -> v | ||
// | None -> failwithf "%s invaild GO id" str | ||
|
||
// let private oboIdStringToInt s = | ||
// let sa = String.split ':' s | ||
// if sa.Length > 1 then | ||
// sa.[1] |> int | ||
// else | ||
// -1 | ||
|
||
// /// Creates fromOboTerm from oboTerm startIndex | ||
// let fromOboTerm (obo: OboTerm) (startIndex: int) = | ||
// let nodeId = oboIdStringToInt obo.Id | ||
// let node = createOboNode nodeId obo.Name obo.Namespace | ||
// let edges = | ||
// obo.IsA | ||
// |> List.mapi (fun i edId -> let edgeTargetId = oboIdStringToInt edId | ||
// createOboEdge (i+startIndex) nodeId edgeTargetId | ||
// ) | ||
// (node,edges,(startIndex + obo.IsA.Length)) | ||
|
||
|
||
// /// Creates OboEnumerator from oboNode oboEdge | ||
// let oboTermToOboGraph (input: seq<OboTerm>) = //: seq<oboAdjacencyNode> = | ||
// let en = input.GetEnumerator() | ||
// let rec loop (en:System.Collections.Generic.IEnumerator<OboTerm>) acc = | ||
// seq { | ||
// match en.MoveNext() with | ||
// | true -> let cNode,cEdges,cIndex = fromOboTerm en.Current acc | ||
|
||
// yield (cNode,cEdges) | ||
// yield! loop en cIndex | ||
// | false -> () | ||
// } | ||
// loop en 0 | ||
|
||
|
||
// /// Reads obo file | ||
// let readFile path = | ||
// FileIO.readFile path | ||
// |> parseOboTerms | ||
// |> oboTermToOboGraph | ||
// |> Seq.toList |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>net6.0</TargetFramework> | ||
<GenerateDocumentationFile>true</GenerateDocumentationFile> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<Compile Include="DBXref.fs" /> | ||
<Compile Include="TermSynonym.fs" /> | ||
<Compile Include="OboTerm.fs" /> | ||
<Compile Include="OboTypeDef.fs" /> | ||
<Compile Include="OboOntology.fs" /> | ||
<Compile Include="FastOboGraph.fs" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="ISADotNet" Version="0.6.1" /> | ||
</ItemGroup> | ||
|
||
</Project> |
Oops, something went wrong.