Skip to content

Commit

Permalink
Copy and split OboParser into parts
Browse files Browse the repository at this point in the history
  • Loading branch information
omaus committed Jul 21, 2023
1 parent a2e0ca0 commit 541fb49
Show file tree
Hide file tree
Showing 8 changed files with 1,298 additions and 0 deletions.
25 changes: 25 additions & 0 deletions FsOboParser.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "FsOboParser", "src\FsOboParser\FsOboParser.fsproj", "{7812F4E0-128E-437C-92CF-84C9F621C459}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{7812F4E0-128E-437C-92CF-84C9F621C459}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{7812F4E0-128E-437C-92CF-84C9F621C459}.Debug|Any CPU.Build.0 = Debug|Any CPU
{7812F4E0-128E-437C-92CF-84C9F621C459}.Release|Any CPU.ActiveCfg = Release|Any CPU
{7812F4E0-128E-437C-92CF-84C9F621C459}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {54B9E6DB-7B93-4B11-AF6D-F46EF21F9781}
EndGlobalSection
EndGlobal
47 changes: 47 additions & 0 deletions src/FsOboParser/DBXref.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
namespace FsOboParser

open System


module Obo =

//Dbxref definitions take the following form:

//<dbxref name> {optional-trailing-modifier}

//or

//<dbxref name> "<dbxref description>" {optional-trailing-modifier}

//The dbxref is a colon separated key-value pair. The key should be taken from GO.xrf_abbs but this is not a requirement.
//If provided, the dbxref description is a string of zero or more characters describing the dbxref.
//DBXref descriptions are rarely used and as of obof1.4 are discouraged.

//Dbxref lists are used when a tag value must contain several dbxrefs. Dbxref lists take the following form:

//[<dbxref definition>, <dbxref definition>, ...]

//The brackets may contain zero or more comma separated dbxref definitions. An example of a dbxref list can be seen in the GO def for "ribonuclease MRP complex":

//def: "A ribonucleoprotein complex that contains an RNA molecule of the snoRNA family, and cleaves the rRNA precursor as part of rRNA transcript processing. It also has other roles: In S. cerevisiae it is involved in cell cycle-regulated degradation of daughter cell-specific mRNAs, while in mammalian cells it also enters the mitochondria and processes RNAs to create RNA primers for DNA replication." [GOC:sgd_curators, PMID:10690410, Add to Citavi project by Pubmed ID PMID:14729943, Add to Citavi project by Pubmed ID PMID:7510714] Add to Citavi project by Pubmed ID

//Note that the trailing modifiers (like all trailing modifiers) do not need to be decoded or round-tripped by parsers; trailing modifiers can always be optionally ignored. However, all parsers must be able to gracefully ignore trailing modifiers. It is important to recognize that lines which accept a dbxref list may have a trailing modifier for each dbxref in the list, and another trailing modifier for the line itself.
type DBXref = {
Name : string
Description : string
Modifiers : string
}

let trimComment (line : string) =
line.Split('!').[0].Trim()

let private xrefRegex =
Text.RegularExpressions.Regex("""(?<xrefName>^([^"{])*)(\s?)(?<xrefDescription>\"(.*?)\")?(\s?)(?<xrefModifiers>\{(.*?)}$)?""")

let parseDBXref (v:string) =
let matches = xrefRegex.Match(v.Trim()).Groups
{
Name = matches.Item("xrefName").Value
Description = matches.Item("xrefDescription").Value
Modifiers = matches.Item("xrefModifiers").Value
}
111 changes: 111 additions & 0 deletions src/FsOboParser/FastOboGraph.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
namespace FsOboParser

open Obo


////########################################
//// Definition of OboGraph


//module FastOboGraph =

// /// Obo Term as node
// [<StructuredFormatDisplay("{PrettyString}")>]
// type OboNode = {
// Id : int
// Name : string
// NameSpace : string
// OntologyId : string // GO:
// }
// with
// member this.PrettyString = sprintf "%s:%07i | %s {%s}" this.OntologyId this.Id this.Name this.NameSpace
// interface INode<int>
// with member this.Id = this.Id


// /// Creates OboNode
// let createOboNode id name nameSpace ontologyId =
// {Id = id; Name = name; NameSpace = nameSpace; OntologyId = ontologyId; }



// type OboEdgeType =
// | Is_A
// | Part_Of

// [<StructuredFormatDisplay("{PrettyString}")>]
// type OboEdge = {
// Id : int
// SourceId :int
// TargetId :int }
// with
// member this.PrettyString = if this.Id = this.SourceId then
// sprintf "o---> %07i | (%i)" this.Id this.TargetId
// else
// sprintf "%07i <---o | (%i)" this.Id this.TargetId
// interface IEdge<int> with
// member this.Id = this.Id
// member this.SourceId = this.SourceId
// member this.TargetId = this.TargetId


// /// Creates OboEdge
// let createOboEdge id sourceId targetId =
// {Id = id; SourceId = sourceId; TargetId = targetId}


// type oboAdjacencyNode = AdjacencyNode<OboNode,OboEdge,int>



// /// Splits String s at ":", returns sa.[1]
// let tryIdToInt str =
// match str with
// | Regex.RegexValue @"GO:(?<goId>[\d]+)" [ goId; ] -> Some( int goId )
// | _ -> None

// let idToInt str =
// match tryIdToInt str with
// | Some v -> v
// | None -> failwithf "%s invaild GO id" str

// let private oboIdStringToInt s =
// let sa = String.split ':' s
// if sa.Length > 1 then
// sa.[1] |> int
// else
// -1

// /// Creates fromOboTerm from oboTerm startIndex
// let fromOboTerm (obo: OboTerm) (startIndex: int) =
// let nodeId = oboIdStringToInt obo.Id
// let node = createOboNode nodeId obo.Name obo.Namespace
// let edges =
// obo.IsA
// |> List.mapi (fun i edId -> let edgeTargetId = oboIdStringToInt edId
// createOboEdge (i+startIndex) nodeId edgeTargetId
// )
// (node,edges,(startIndex + obo.IsA.Length))


// /// Creates OboEnumerator from oboNode oboEdge
// let oboTermToOboGraph (input: seq<OboTerm>) = //: seq<oboAdjacencyNode> =
// let en = input.GetEnumerator()
// let rec loop (en:System.Collections.Generic.IEnumerator<OboTerm>) acc =
// seq {
// match en.MoveNext() with
// | true -> let cNode,cEdges,cIndex = fromOboTerm en.Current acc

// yield (cNode,cEdges)
// yield! loop en cIndex
// | false -> ()
// }
// loop en 0


// /// Reads obo file
// let readFile path =
// FileIO.readFile path
// |> parseOboTerms
// |> oboTermToOboGraph
// |> Seq.toList
21 changes: 21 additions & 0 deletions src/FsOboParser/FsOboParser.fsproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
</PropertyGroup>

<ItemGroup>
<Compile Include="DBXref.fs" />
<Compile Include="TermSynonym.fs" />
<Compile Include="OboTerm.fs" />
<Compile Include="OboTypeDef.fs" />
<Compile Include="OboOntology.fs" />
<Compile Include="FastOboGraph.fs" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="ISADotNet" Version="0.6.1" />
</ItemGroup>

</Project>
Loading

0 comments on commit 541fb49

Please sign in to comment.