Skip to content

Commit

Permalink
implement stringtable in arctable json compression
Browse files Browse the repository at this point in the history
  • Loading branch information
HLWeil committed Feb 6, 2024
1 parent ee67104 commit 8b36c55
Show file tree
Hide file tree
Showing 7 changed files with 237 additions and 53 deletions.
6 changes: 3 additions & 3 deletions src/ISA/ISA.Json/ARCtrl.ISA.Json.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<Compile Include="Decode.fs" />
<Compile Include="GEncode.fs" />
<Compile Include="Comment.fs" />
<Compile Include="StringTable.fs" />
<Compile Include="Ontology.fs" />
<Compile Include="Factor.fs" />
<Compile Include="Protocol.fs" />
Expand All @@ -26,7 +27,9 @@
<Compile Include="Assay.fs" />
<Compile Include="Study.fs" />
<Compile Include="Investigation.fs" />
<Compile Include="ArcTypes\OATable.fs" />
<Compile Include="ArcTypes\CompositeCell.fs" />
<Compile Include="ArcTypes\CellTable.fs" />
<Compile Include="ArcTypes\IOType.fs" />
<Compile Include="ArcTypes\CompositeHeader.fs" />
<Compile Include="ArcTypes\ArcTable.fs" />
Expand All @@ -48,9 +51,6 @@
<NpmPackage Name="jsonschema" Version="gte 1.1.0 lt 2.0.0" ResolutionStrategy="Max" />
</NpmDependencies>
</PropertyGroup>
<ItemGroup>
<Content Include="*.fsproj; **\*.fs; **\*.fsi" PackagePath="fable\" />
</ItemGroup>
<PropertyGroup>
<Authors>nfdi4plants, Lukas Weil, Florian Wetzels, Kevin Frey</Authors>
<Description>ARC and ISA json compliant parser for experimental metadata toolkit in F#. This project is meant as an easy means to open, manipulate and save ISA (Investigation,Study,Assay) metadata files in isa-json format.</Description>
Expand Down
65 changes: 40 additions & 25 deletions src/ISA/ISA.Json/ArcTypes/ArcTable.fs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ open Thoth.Json
open Thoth.Json.Net
#endif
open ARCtrl.ISA
open System.Collections.Generic

module ArcTable =
let encoder (table: ArcTable) =
Expand Down Expand Up @@ -34,41 +35,26 @@ module ArcTable =
)
)

let compressionEncoder (table: ArcTable) =
let objectTableMap = System.Collections.Generic.Dictionary()
let compressedEncoder (stringTable : StringTableMap) (oaTable : OATableMap) (cellTable : CellTableMap) (table: ArcTable) =
let keyEncoder : Encoder<int*int> = Encode.tuple2 Encode.int Encode.int
let valueEncoder (cc : CompositeCell) =
match ARCtrl.ISA.Aux.Dict.tryFind cc objectTableMap with
| Some i -> Encode.int i
| None ->
let i = objectTableMap.Count
objectTableMap.Add(cc,i)
Encode.int i
Encode.object [
"name", Encode.string table.Name
"n", StringTable.encodeString stringTable table.Name
if table.Headers.Count <> 0 then
"header", Encode.list [
"h", Encode.list [
for h in table.Headers do yield CompositeHeader.encoder h
]
if table.Values.Count <> 0 then
"values", Encode.map keyEncoder valueEncoder ([for KeyValue(k,v) in table.Values do yield k, v] |> Map)
if objectTableMap.Count <> 0 then
"objectTable", objectTableMap |> ObjectTable.arrayFromMap |> ObjectTable.encoder
"c", Encode.map keyEncoder (CellTable.encodeCell cellTable) ([for KeyValue(k,v) in table.Values do yield k, v] |> Map)
]

let compressionDecoder : Decoder<ArcTable> =
let compressedDecoder (stringTable : StringTableArray) (oaTable : OATableArray) (cellTable : CellTableArray) : Decoder<ArcTable> =
Decode.object(fun get ->
let objectTable = get.Optional.Field "objectTable" (Decode.array CompositeCell.decoder) |> Option.defaultValue Array.empty
let decodedHeader = get.Optional.Field "header" (Decode.list CompositeHeader.decoder) |> Option.defaultValue List.empty |> ResizeArray
let decodedHeader = get.Optional.Field "h" (Decode.list CompositeHeader.decoder) |> Option.defaultValue List.empty |> ResizeArray
let keyDecoder : Decoder<int*int> = Decode.tuple2 Decode.int Decode.int
let valueDecoder =
fun s js ->
match Decode.int s js with
| Ok i -> Ok objectTable[i]
| Error err -> Error err
let decodedValues = get.Optional.Field "values" (Decode.map' keyDecoder valueDecoder) |> Option.defaultValue Map.empty |> System.Collections.Generic.Dictionary
let valueDecoder = CellTable.decodeCell cellTable
let decodedValues = get.Optional.Field "v" (Decode.map' keyDecoder valueDecoder) |> Option.defaultValue Map.empty |> System.Collections.Generic.Dictionary
ArcTable.create(
get.Required.Field "name" Decode.string,
get.Required.Field "n" (StringTable.decodeString stringTable),
decodedHeader,
decodedValues
)
Expand All @@ -87,4 +73,33 @@ module ArcTableExtensions =
let spaces = defaultArg spaces 0
Encode.toString spaces (ArcTable.encoder this)

static member toJsonString(a:ArcTable) = a.ToJsonString()
static member toJsonString(a:ArcTable) = a.ToJsonString()

static member fromCompressedJsonString (jsonString: string) : ArcTable =
let decoder =
Decode.object(fun get ->
let stringTable = get.Required.Field "stringTable" (StringTable.decoder)
let oaTable = get.Required.Field "stringTable" (OATable.decoder stringTable)
let cellTable = get.Required.Field "cellTable" (CellTable.decoder stringTable oaTable)
get.Required.Field "table" (ArcTable.compressedDecoder stringTable oaTable cellTable)
)
match Decode.fromString decoder jsonString with
| Ok r -> r
| Error e -> failwithf "Error. Unable to parse json string to ArcTable: %s" e

member this.ToCompressedJsonString(?spaces) : string =
let spaces = defaultArg spaces 0
let stringTable = Dictionary()
let oaTable = Dictionary()
let cellTable = Dictionary()
let arcTable = ArcTable.compressedEncoder stringTable oaTable cellTable this
let jObject =
Encode.object [
"cellTable", CellTable.arrayFromMap cellTable |> CellTable.encoder stringTable oaTable
"oaTable", OATable.arrayFromMap oaTable |> OATable.encoder stringTable
"stringTable", StringTable.arrayFromMap stringTable |> StringTable.encoder
"table", arcTable
]
Encode.toString spaces jObject

static member toCompressedJsonString(a:ArcTable) = a.ToJsonString()
47 changes: 47 additions & 0 deletions src/ISA/ISA.Json/ArcTypes/CellTable.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
namespace rec ARCtrl.ISA.Json

#if FABLE_COMPILER
open Thoth.Json
#else
open Thoth.Json.Net
#endif
open ARCtrl.ISA

open ARCtrl.ISA.Aux

type CellTableMap = System.Collections.Generic.Dictionary<CompositeCell,int>

type CellTableArray = array<CompositeCell>

module CellTable =

let [<Literal>] CellType = "celltype"
let [<Literal>] CellValues = "values"

let arrayFromMap (otm : CellTableMap) : CellTableArray=
otm
|> Seq.sortBy (fun kv -> kv.Value)
|> Seq.map (fun kv -> kv.Key)
|> Seq.toArray

let encoder (stringTable : StringTableMap) (oaTable : OATableMap) (ot: CellTableArray) =
ot
|> Array.map (CompositeCell.compressedEncoder stringTable oaTable)
|> Encode.array

let decoder (stringTable : StringTableArray) (oaTable : OATableArray) : Decoder<CellTableArray> =
Decode.array (CompositeCell.compressedDecoder stringTable oaTable)

let encodeCell (otm : CellTableMap) (cc : CompositeCell) =
match Dict.tryFind cc otm with
| Some i -> Encode.int i
| None ->
let i = otm.Count
otm.Add(cc,i)
Encode.int i

let decodeCell (ot : CellTableArray) : Decoder<CompositeCell> =
fun s o ->
match Decode.int s o with
| Ok i -> Ok ot.[i]
| Error err -> Error err
57 changes: 32 additions & 25 deletions src/ISA/ISA.Json/ArcTypes/CompositeCell.fs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,16 @@ open Thoth.Json.Net
#endif
open ARCtrl.ISA

open ARCtrl.ISA.Aux

module CompositeCell =

let [<Literal>] CellType = "celltype"
let [<Literal>] CellValues = "values"

let [<Literal>] CompressedCellType = "t"
let [<Literal>] CompressedCellValues = "v"

let encoder (cc: CompositeCell) =
let oaToJsonString (oa:OntologyAnnotation) = OntologyAnnotation.encoder (ConverterOptions()) oa
let t, v =
Expand Down Expand Up @@ -40,7 +45,34 @@ module CompositeCell =
| anyelse -> failwithf "Error reading CompositeCell from json string: %A" anyelse
)

let compressedEncoder (stringTable : StringTableMap) (oaTable : OATableMap) (cc: CompositeCell) =

let t, v =
match cc with
| CompositeCell.FreeText s -> "FreeText", [StringTable.encodeString stringTable s]
| CompositeCell.Term t -> "Term", [OATable.encodeOA oaTable t]
| CompositeCell.Unitized (v, unit) -> "Unitized", [StringTable.encodeString stringTable v; OATable.encodeOA oaTable unit]
Encode.object [
CompressedCellType, StringTable.encodeString stringTable t
CompressedCellValues, v |> Encode.list
]

let compressedDecoder (stringTable : StringTableArray) (oaTable : OATableArray) : Decoder<CompositeCell> =

Decode.object (fun get ->
match get.Required.Field (CompressedCellType) (StringTable.decodeString stringTable) with
| "FreeText" ->
let s = get.Required.Field (CompressedCellValues) (Decode.index 0 (StringTable.decodeString stringTable))
CompositeCell.FreeText s
| "Term" ->
let oa = get.Required.Field (CompressedCellValues) (Decode.index 0 <| OATable.decodeOA oaTable )
CompositeCell.Term oa
| "Unitized" ->
let v = get.Required.Field (CompressedCellValues) (Decode.index 0 <| (StringTable.decodeString stringTable) )
let oa = get.Required.Field (CompressedCellValues) (Decode.index 1 <| OATable.decodeOA oaTable )
CompositeCell.Unitized (v, oa)
| anyelse -> failwithf "Error reading CompositeCell from json string: %A" anyelse
)

[<AutoOpen>]
module CompositeCellExtensions =
Expand All @@ -56,29 +88,4 @@ module CompositeCellExtensions =
Encode.toString spaces (CompositeCell.encoder this)

static member toJsonString(a:CompositeCell) = a.ToJsonString()



type ObjectTableMap = System.Collections.Generic.Dictionary<CompositeCell,int>

type ObjectTableArray = array<CompositeCell>

module ObjectTable =

let [<Literal>] CellType = "celltype"
let [<Literal>] CellValues = "values"

let arrayFromMap (otm : ObjectTableMap) : ObjectTableArray=
otm
|> Seq.sortBy (fun kv -> kv.Value)
|> Seq.map (fun kv -> kv.Key)
|> Seq.toArray

let encoder (ot: ObjectTableArray) =
ot
|> Array.map CompositeCell.encoder
|> Encode.array

let decoder : Decoder<ObjectTableArray> =
Decode.array CompositeCell.decoder

44 changes: 44 additions & 0 deletions src/ISA/ISA.Json/ArcTypes/OATable.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
namespace rec ARCtrl.ISA.Json

#if FABLE_COMPILER
open Thoth.Json
#else
open Thoth.Json.Net
#endif
open ARCtrl.ISA

open ARCtrl.ISA.Aux

type OATableMap = System.Collections.Generic.Dictionary<OntologyAnnotation,int>

type OATableArray = array<OntologyAnnotation>

module OATable =

let arrayFromMap (otm : OATableMap) : OATableArray=
otm
|> Seq.sortBy (fun kv -> kv.Value)
|> Seq.map (fun kv -> kv.Key)
|> Seq.toArray

let encoder (stringTable : StringTableMap) (ot: OATableArray) =
ot
|> Array.map (OntologyAnnotation.compressedEncoder stringTable (ConverterOptions()))
|> Encode.array

let decoder stringTable : Decoder<OATableArray> =
Decode.array (OntologyAnnotation.compressedDecoder stringTable (ConverterOptions()))

let encodeOA (otm : OATableMap) (oa : OntologyAnnotation) =
match Dict.tryFind oa otm with
| Some i -> Encode.int i
| None ->
let i = otm.Count
otm.Add(oa,i)
Encode.int i

let decodeOA (ot : OATableArray) : Decoder<OntologyAnnotation> =
fun s o ->
match Decode.int s o with
| Ok i -> Ok ot.[i]
| Error err -> Error err
26 changes: 26 additions & 0 deletions src/ISA/ISA.Json/Ontology.fs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,32 @@ module OntologyAnnotation =
)
)


let compressedEncoder (stringTable : StringTableMap) (options : ConverterOptions) (oa : obj) =
[
if options.SetID then "@id", GEncode.toJsonString (oa :?> OntologyAnnotation |> genID)
else GEncode.tryInclude "@id" GEncode.toJsonString (oa |> GEncode.tryGetPropertyValue "ID")
if options.IncludeType then "@type", GEncode.toJsonString "OntologyAnnotation"
GEncode.tryInclude "a" (StringTable.encodeString stringTable) (oa |> GEncode.tryGetPropertyValue "Name")
GEncode.tryInclude "ts" (StringTable.encodeString stringTable) (oa |> GEncode.tryGetPropertyValue "TermSourceREF")
GEncode.tryInclude "ta" (StringTable.encodeString stringTable) (oa |> GEncode.tryGetPropertyValue "TermAccessionNumber")
GEncode.tryInclude "comments" (Comment.encoder options) (oa |> GEncode.tryGetPropertyValue "Comments")
]
|> GEncode.choose
|> Encode.object


let compressedDecoder (stringTable : StringTableArray) (options : ConverterOptions) : Decoder<OntologyAnnotation> =
Decode.object (fun get ->
OntologyAnnotation.create(
?Id = get.Optional.Field "@id" GDecode.uri,
?Name = get.Optional.Field "a" (StringTable.decodeString stringTable),
?TermSourceREF = get.Optional.Field "ts" (StringTable.decodeString stringTable),
?TermAccessionNumber = get.Optional.Field "ta" (StringTable.decodeString stringTable),
?Comments = get.Optional.Field "comments" (Decode.array (Comment.decoder options))
)
)

let fromJsonString (s:string) =
GDecode.fromJsonString (decoder (ConverterOptions())) s

Expand Down
45 changes: 45 additions & 0 deletions src/ISA/ISA.Json/StringTable.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
namespace rec ARCtrl.ISA.Json

#if FABLE_COMPILER
open Thoth.Json
#else
open Thoth.Json.Net
#endif
open ARCtrl.ISA

open ARCtrl.ISA.Aux

type StringTableMap = System.Collections.Generic.Dictionary<string,int>

type StringTableArray = array<string>

module StringTable =

let arrayFromMap (otm : StringTableMap) : StringTableArray=
otm
|> Seq.sortBy (fun kv -> kv.Value)
|> Seq.map (fun kv -> kv.Key)
|> Seq.toArray

let encoder (ot: StringTableArray) =
ot
|> Array.map Encode.string
|> Encode.array

let decoder : Decoder<StringTableArray> =
Decode.array Decode.string

let encodeString (otm : StringTableMap) (s : obj) =
let s = s :?> string
match Dict.tryFind s otm with
| Some i -> Encode.int i
| None ->
let i = otm.Count
otm.Add(s,i)
Encode.int i

let decodeString (ot : StringTableArray) : Decoder<string> =
fun s o ->
match Decode.int s o with
| Ok i -> Ok ot.[i]
| Error err -> Error err

0 comments on commit 8b36c55

Please sign in to comment.