Skip to content

Commit

Permalink
Replace string matching with Regex
Browse files Browse the repository at this point in the history
  • Loading branch information
omaus committed May 21, 2024
1 parent d83d8f2 commit 7fa7799
Showing 1 changed file with 34 additions and 19 deletions.
53 changes: 34 additions & 19 deletions playground.fsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ open OBO.NET.CodeGeneration
#r "nuget: ARCTokenization"

open FSharpAux
open FSharpAux.Regex
open ARCTokenization.Terms

open System
Expand Down Expand Up @@ -100,44 +101,58 @@ type OboOntology =
RelaxUniqueLabelAssumptionForNamespaces = defaultArg RelaxUniqueLabelAssumptionForNamespaces []
}

//let res = (createRegex RegexOptions.None """format-version\s:\s*(?<formatVersion>.+)""").Match "format-version : 17.5.1"
//let res = (createRegex RegexOptions.None """format-version\s:\s*(?<formatVersion>.+)""").Match "format-verson: 17.5.1"
//res.Groups["formatVersion"].Value
//DateTime.ParseExact("31:12:2000 23:59", "dd:MM:yyyy HH:mm", Globalization.CultureInfo.InvariantCulture)

/// Reads an OBO Ontology containing term and type def stanzas from lines.
static member fromLines verbose (input : seq<string>) =

let skipColonSpace s = String.skipWhile ((<>) ':') s |> String.skipWhile ((=) ' ')
let rxFormatVersion = createRegex RegexOptions.None @"format-version\s*:\s*(?<formatVersion>.+)"
let rxDataVersion = createRegex RegexOptions.None @"(?:data-version|version)\s*:\s*(?<dataVersion>.+)"
let rxOntology = createRegex RegexOptions.None @"ontology\s*:\s*(?<ontology>.+)"
let rxDate = createRegex RegexOptions.None @"date\s*:\s*(?<date>\d{2}:\d{2}:\d{4} \d{2}:\d{2})"
let rxSavedBy = createRegex RegexOptions.None @"saved-by\s*:\s*(?<savedBy>.+)"
let rxAutoGeneratedBy = createRegex RegexOptions.None @"auto-generated-by\s*:\s*(?<autoGeneratedBy>.+)"

let en = input.GetEnumerator()
let rec loop (en : System.Collections.Generic.IEnumerator<string>) terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces lineNumber =

let rec loop (en : System.Collections.Generic.IEnumerator<string>) terms typedefs formatVersion (dataVersion : string option) ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces lineNumber =

match en.MoveNext() with
| true ->
let enTrimmed = trimComment en.Current
match enTrimmed with
match trimComment en.Current with
| "[Term]" ->
let lineNumber,parsedTerm = OboTerm.fromLines verbose en lineNumber "" "" false [] "" "" [] [] [] [] [] [] [] [] false [] [] [] false "" ""
loop en (parsedTerm :: terms) typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces lineNumber
| "[Typedef]" ->
let lineNumber,parsedTypeDef = OboTypeDef.fromLines verbose en lineNumber "" "" "" "" [] [] false false false false false false false
loop en terms (parsedTypeDef :: typedefs) formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces lineNumber
| "format-version" -> loop en terms typedefs (skipColonSpace enTrimmed) dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| "data-version"
| "version" ->
loop en terms typedefs formatVersion (skipColonSpace enTrimmed |> Some) ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| "ontology" ->
loop en terms typedefs formatVersion dataVersion (skipColonSpace enTrimmed |> Some) date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| "date" ->
| x when (rxFormatVersion.Match x).Success ->
if formatVersion <> "" then printfn "WARN: Duplicate format-version!"
loop en terms typedefs (rxFormatVersion.Match x).Groups["formatVersion"].Value dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| x when (rxDataVersion.Match x).Success ->
if dataVersion.IsSome then printfn "WARN: Duplicate data-version!"
loop en terms typedefs formatVersion (Some (rxDataVersion.Match x).Groups["dataVersion"].Value) ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| x when (rxOntology.Match x).Success ->
loop en terms typedefs formatVersion dataVersion (rxOntology.Match x).Groups["ontology"] date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| x when (rxDate.Match x).Success ->
let parsedDate =
try DateTime.ParseExact(skipColonSpace enTrimmed, "dd:MM:yyyy HH:mm", Globalization.CultureInfo.InvariantCulture) |> Some with
_ -> None
try DateTime.ParseExact((rxDate.Match x).Groups["date"].Value, "dd:MM:yyyy HH:mm", Globalization.CultureInfo.InvariantCulture) |> Some with
_ ->
printfn "ERROR: Inproper date given!"
None
loop en terms typedefs formatVersion dataVersion ontology parsedDate savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| "saved-by" ->
| x when String.startsWith "saved-by" x ->
loop en terms typedefs formatVersion dataVersion ontology date (skipColonSpace enTrimmed |> Some) autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| "auto-generated-by" ->
| x when String.startsWith "auto-generated-by" x ->
loop en terms typedefs formatVersion dataVersion ontology date savedBy (skipColonSpace enTrimmed |> Some) subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| "subsetdef" ->
| x when String.startsWith "subsetdef" x ->
loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy (skipColonSpace enTrimmed :: subsetdefs) imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| _ ->
loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| _ ->
| x when String.startsWith "import" x ->
loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs (enTrimmed :: imports) synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| x when String.startsWith "synonymtypedef" x ->
loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
| _ ->
loop en terms typedefs formatVersion dataVersion ontology date savedBy autoGeneratedBy subsetdefs imports synonymtypedefs idSpaces defaultRelationshipIdPrefix idMappings remarks treatXrefsAsEquivalents treatXrefsAsIsAs relaxUniqueIdentifierAssumptionForNamespaces relaxUniqueLabelAssumptionForNamespaces (lineNumber + 1)
Expand Down

0 comments on commit 7fa7799

Please sign in to comment.