Copy and split OboParser into parts
omaus committed Jul 21, 2023
commit 541fb49
25 changes: 25 additions & 0 deletions FsOboParser.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsOboParser", "src\FsOboParser\FsOboParser.fsproj", "{7812F4E0-128E-437C-92CF-84C9F621C459}"
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{7812F4E0-128E-437C-92CF-84C9F621C459}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{7812F4E0-128E-437C-92CF-84C9F621C459}.Debug|Any CPU.Build.0 = Debug|Any CPU
{7812F4E0-128E-437C-92CF-84C9F621C459}.Release|Any CPU.ActiveCfg = Release|Any CPU
{7812F4E0-128E-437C-92CF-84C9F621C459}.Release|Any CPU.Build.0 = Release|Any CPU
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {54B9E6DB-7B93-4B11-AF6D-F46EF21F9781}
47 changes: 47 additions & 0 deletions src/FsOboParser/DBXref.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
namespace FsOboParser

open System

module Obo =

//Dbxref definitions take the following form:

//<dbxref name> {optional-trailing-modifier}


//<dbxref name> "<dbxref description>" {optional-trailing-modifier}

//The dbxref is a colon separated key-value pair. The key should be taken from GO.xrf_abbs but this is not a requirement.
//If provided, the dbxref description is a string of zero or more characters describing the dbxref.
//DBXref descriptions are rarely used and as of obof1.4 are discouraged.

//Dbxref lists are used when a tag value must contain several dbxrefs. Dbxref lists take the following form:

//[<dbxref definition>, <dbxref definition>, ...]

//The brackets may contain zero or more comma separated dbxref definitions. An example of a dbxref list can be seen in the GO def for "ribonuclease MRP complex":

//def: "A ribonucleoprotein complex that contains an RNA molecule of the snoRNA family, and cleaves the rRNA precursor as part of rRNA transcript processing. It also has other roles: In S. cerevisiae it is involved in cell cycle-regulated degradation of daughter cell-specific mRNAs, while in mammalian cells it also enters the mitochondria and processes RNAs to create RNA primers for DNA replication." [GOC:sgd_curators, PMID:10690410, Add to Citavi project by Pubmed ID PMID:14729943, Add to Citavi project by Pubmed ID PMID:7510714] Add to Citavi project by Pubmed ID

//Note that the trailing modifiers (like all trailing modifiers) do not need to be decoded or round-tripped by parsers; trailing modifiers can always be optionally ignored. However, all parsers must be able to gracefully ignore trailing modifiers. It is important to recognize that lines which accept a dbxref list may have a trailing modifier for each dbxref in the list, and another trailing modifier for the line itself.
type DBXref = {
Name : string
Description : string
Modifiers : string

let trimComment (line : string) =

let private xrefRegex =

let parseDBXref (v:string) =
let matches = xrefRegex.Match(v.Trim()).Groups
Name = matches.Item("xrefName").Value
Description = matches.Item("xrefDescription").Value
Modifiers = matches.Item("xrefModifiers").Value
111 changes: 111 additions & 0 deletions src/FsOboParser/FastOboGraph.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
namespace FsOboParser

open Obo

//// Definition of OboGraph

//module FastOboGraph =

// /// Obo Term as node
// [<StructuredFormatDisplay("{PrettyString}")>]
// type OboNode = {
// Id : int
// Name : string
// NameSpace : string
// OntologyId : string // GO:
// }
// with
// member this.PrettyString = sprintf "%s:%07i | %s {%s}" this.OntologyId this.Id this.Name this.NameSpace
// interface INode<int>
// with member this.Id = this.Id

// /// Creates OboNode
// let createOboNode id name nameSpace ontologyId =
// {Id = id; Name = name; NameSpace = nameSpace; OntologyId = ontologyId; }

// type OboEdgeType =
// | Is_A
// | Part_Of

// [<StructuredFormatDisplay("{PrettyString}")>]
// type OboEdge = {
// Id : int
// SourceId :int
// TargetId :int }
// with
// member this.PrettyString = if this.Id = this.SourceId then
// sprintf "o---> %07i | (%i)" this.Id this.TargetId
// else
// sprintf "%07i <---o | (%i)" this.Id this.TargetId
// interface IEdge<int> with
// member this.Id = this.Id
// member this.SourceId = this.SourceId
// member this.TargetId = this.TargetId

// /// Creates OboEdge
// let createOboEdge id sourceId targetId =
// {Id = id; SourceId = sourceId; TargetId = targetId}

// type oboAdjacencyNode = AdjacencyNode<OboNode,OboEdge,int>

// /// Splits String s at ":", returns sa.[1]
// let tryIdToInt str =
// match str with
// | Regex.RegexValue @"GO:(?<goId>[\d]+)" [ goId; ] -> Some( int goId )
// | _ -> None

// let idToInt str =
// match tryIdToInt str with
// | Some v -> v
// | None -> failwithf "%s invaild GO id" str

// let private oboIdStringToInt s =
// let sa = String.split ':' s
// if sa.Length > 1 then
// sa.[1] |> int
// else
// -1

// /// Creates fromOboTerm from oboTerm startIndex
// let fromOboTerm (obo: OboTerm) (startIndex: int) =
// let nodeId = oboIdStringToInt obo.Id
// let node = createOboNode nodeId obo.Name obo.Namespace
// let edges =
// obo.IsA
// |> List.mapi (fun i edId -> let edgeTargetId = oboIdStringToInt edId
// createOboEdge (i+startIndex) nodeId edgeTargetId
// )
// (node,edges,(startIndex + obo.IsA.Length))

// /// Creates OboEnumerator from oboNode oboEdge
// let oboTermToOboGraph (input: seq<OboTerm>) = //: seq<oboAdjacencyNode> =
// let en = input.GetEnumerator()
// let rec loop (en:System.Collections.Generic.IEnumerator<OboTerm>) acc =
// seq {
// match en.MoveNext() with
// | true -> let cNode,cEdges,cIndex = fromOboTerm en.Current acc

// yield (cNode,cEdges)
// yield! loop en cIndex
// | false -> ()
// }
// loop en 0

// /// Reads obo file
// let readFile path =
// FileIO.readFile path
// |> parseOboTerms
// |> oboTermToOboGraph
// |> Seq.toList
21 changes: 21 additions & 0 deletions src/FsOboParser/FsOboParser.fsproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">


<Compile Include="DBXref.fs" />
<Compile Include="TermSynonym.fs" />
<Compile Include="OboTerm.fs" />
<Compile Include="OboTypeDef.fs" />
<Compile Include="OboOntology.fs" />
<Compile Include="FastOboGraph.fs" />

<PackageReference Include="ISADotNet" Version="0.6.1" />


