Skip to content

Commit

Permalink
prepare assay xlsx file io for FABLE compatability
Browse files Browse the repository at this point in the history
  • Loading branch information
HLWeil committed Oct 9, 2021
1 parent 78a667c commit 2147cb9
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 51 deletions.
105 changes: 64 additions & 41 deletions src/ISADotNet.XLSX/AssayFile/Assay.fs
Original file line number Diff line number Diff line change
Expand Up @@ -5,40 +5,76 @@ open FSharpSpreadsheetML

open ISADotNet

/// Functions for parsing an ISAXLSX Assay File
///
/// This is based on the ISA.Tab Format: https://isa-specs.readthedocs.io/en/latest/isatab.html#assay-table-file
///
/// But with the table being modified according to the SWATE tool: https://github.com/nfdi4plants/Swate
///
/// Additionally, the file can contain several sheets containing parameter tables and a sheet containing additional assay metadata
module AssayFile =

/// Create a new ISADotNet.XLSX assay file constisting of two sheets. The first has the name of the assayIdentifier and is meant to store parameters used in the assay. The second stores additional assay metadata
let init metadataSheetName assayIdentifier path =
Spreadsheet.initWithSST assayIdentifier path
|> MetaData.init metadataSheetName
|> Spreadsheet.close
module Process =

/// Returns processes and other additional information from a sparseMatrix represntation of an assay.xlsx sheet
///
/// processNameRoot is the sheetName (or the protocol name you want to use)
///
/// matrixHeaders are the column headers of the table
///
/// matrixLength is the number of rows in the sheet table
///
/// sparseMatrix is a sparse representation of the sheet table, with the first part of the key being the column header and the second part being a zero based row index
let fromSparseMatrix (processNameRoot:string) matrixHeaders (matrixLength:int) (sparseMatrix : Dictionary<string*int,string>) =
let fromSparseMatrix (processNameRoot:string) matrixHeaders (sparseMatrix : Dictionary<string*int,string>) =
let len =
let mutable i = 0
for kv in sparseMatrix do
let j = kv.Key |> snd
if j > i then i <- j
i + 1
let characteristic,factors,protocol,processGetter =
AnnotationNode.splitIntoNodes matrixHeaders
|> AnnotationTable.getProcessGetter ({Protocol.empty with Name = Some processNameRoot})
characteristic,factors,protocol,

Seq.init matrixLength (processGetter sparseMatrix)
Seq.init len (processGetter sparseMatrix)
|> AnnotationTable.mergeIdenticalProcesses
|> AnnotationTable.indexRelatedProcessesByProtocolName

/// Functions for parsing an ISAXLSX Assay File
///
/// This is based on the ISA.Tab Format: https://isa-specs.readthedocs.io/en/latest/isatab.html#assay-table-file
///
/// But with the table being modified according to the SWATE tool: https://github.com/nfdi4plants/Swate
///
/// Additionally, the file can contain several sheets containing parameter tables and a sheet containing additional assay metadata
module Assay =

/// Returns an assay from a sparseMatrix represntation of an assay.xlsx sheet
///
/// processNameRoot is the sheetName (or the protocol name you want to use)
///
/// matrixHeaders are the column headers of the table
///
/// sparseMatrix is a sparse representation of the sheet table, with the first part of the key being the column header and the second part being a zero based row index
let fromSparseMatrix (processNameRoot:string) matrixHeaders (sparseMatrix : Dictionary<string*int,string>) =
let characteristics,factors,protocols,processes = Process.fromSparseMatrix processNameRoot matrixHeaders sparseMatrix
factors,protocols,Assay.create(CharacteristicCategories = characteristics,ProcessSequence = Seq.toList processes)

/// Returns an assay from a sequence of sparseMatrix representations of assay.xlsx sheets
///
/// See "fromSparseMatrix" function for parameter documentation
let fromSparseMatrices (sheets : (string*(string seq)*Dictionary<string*int,string>) seq) =
let characteristics,factors,protocols,processes =
sheets
|> Seq.map (fun (name,matrixHeaders,matrix) -> Process.fromSparseMatrix name matrixHeaders matrix)
|> Seq.fold (fun (characteristics',factors',protocols',processes') (characteristics,factors,protocol,processes) ->
List.append characteristics' characteristics |> List.distinct,
List.append factors' factors |> List.distinct,
Seq.append protocols' (Seq.singleton protocol),
Seq.append processes' processes
) (List.empty,List.empty,Seq.empty,Seq.empty)

let processes = AnnotationTable.updateSamplesByThemselves processes

factors,protocols,Assay.create(CharacteristicCategories = characteristics,ProcessSequence = Seq.toList processes)

/// Diesen Block durch JS ersetzen ---->

/// Create a new ISADotNet.XLSX assay file constisting of two sheets. The first has the name of the assayIdentifier and is meant to store parameters used in the assay. The second stores additional assay metadata
let init metadataSheetName assayIdentifier path =
Spreadsheet.initWithSST assayIdentifier path
|> MetaData.init metadataSheetName
|> Spreadsheet.close

/// Parses the assay file
let fromSpreadsheet (doc:DocumentFormat.OpenXml.Packaging.SpreadsheetDocument) =
Expand All @@ -52,9 +88,11 @@ module AssayFile =
sheet
|> SheetData.getRows
|> Seq.map (Row.mapCells (Cell.includeSharedStringValue sst.Value))
|> Seq.map (Row.getIndexedValues None >> Seq.map (fun (i,v) -> (int i) - 1, v))
|> MetaData.fromRows
|> fun (a,p) -> Option.defaultValue Assay.empty a, p
)
|> Option.defaultValue (None,[])
|> Option.defaultValue (Assay.empty,[])

let sheetNames =
Spreadsheet.getWorkbookPart doc
Expand All @@ -63,7 +101,7 @@ module AssayFile =
|> Sheet.Sheets.getSheets
|> Seq.map Sheet.getName

let characteristics,factors,protocols,processes =
let factors,protocols,assay =
sheetNames
|> Seq.collect (fun sheetName ->
match Spreadsheet.tryGetWorksheetPartBySheetName sheetName doc with
Expand All @@ -73,33 +111,16 @@ module AssayFile =
let sheet = Worksheet.getSheetData wsp.Worksheet
let headers = Table.getColumnHeaders table
let m = Table.toSparseValueMatrix sst sheet table
let length =
Table.getArea table
|> fun area -> Table.Area.lowerBoundary area - Table.Area.upperBoundary area |> int

Seq.singleton (fromSparseMatrix sheetName headers length m)

Seq.singleton (sheetName,headers,m)
| None -> Seq.empty
| None -> Seq.empty
)
|> Seq.fold (fun (characteristics',factors',protocols',processes') (characteristics,factors,protocol,processes) ->
List.append characteristics' characteristics |> List.distinct,
List.append factors' factors |> List.distinct,
Seq.append protocols' (Seq.singleton protocol),
Seq.append processes' processes
) (List.empty,List.empty,Seq.empty,Seq.empty)

let processes = AnnotationTable.updateSamplesByThemselves processes

let assay = assayMetaData |> Option.defaultValue Assay.empty
|> fromSparseMatrices

factors,
protocols |> Seq.toList,
contacts,
{assay with
ProcessSequence = Option.fromValueWithDefault [] (processes |> Seq.toList)
CharacteristicCategories = Option.fromValueWithDefault [] characteristics
}
API.Update.UpdateByExisting.updateRecordType assayMetaData assay

/// Parses the assay file
let fromFile (path:string) =
Expand All @@ -116,4 +137,6 @@ module AssayFile =
try
fromSpreadsheet doc
finally
Spreadsheet.close doc
Spreadsheet.close doc

/// ----> Bis hier
28 changes: 18 additions & 10 deletions src/ISADotNet.XLSX/AssayFile/MetaData.fs
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,45 @@ module MetaData =
let toRows (assay:Assay) (contacts : Person list) =
seq {

yield Row.ofValues None 0u [assaysLabel]
yield! Assays.writeAssays (None) [assay]
yield SparseRow.fromValues [assaysLabel]
yield! Assays.toRows (None) [assay]

yield Row.ofValues None 0u [contactsLabel]
yield! Contacts.writePersons (None) contacts
yield SparseRow.fromValues [contactsLabel]
yield! Contacts.toRows (None) contacts
}
|> Seq.mapi (fun i row -> Row.updateRowIndex (i+1 |> uint) row)


/// Read Assay Metadata from excel rows
let fromRows (rows: seq<DocumentFormat.OpenXml.Spreadsheet.Row>) =
let fromRows (rows: seq<SparseRow>) =
let en = rows.GetEnumerator()
let rec loop lastLine assays contacts lineNumber =

match lastLine with

| Some k when k = assaysLabel ->
let currentLine,lineNumber,_,assays = Assays.readAssays None (lineNumber + 1) en
let currentLine,lineNumber,_,assays = Assays.fromRows None (lineNumber + 1) en
loop currentLine assays contacts lineNumber

| Some k when k = contactsLabel ->
let currentLine,lineNumber,_,contacts = Contacts.readPersons None (lineNumber + 1) en
let currentLine,lineNumber,_,contacts = Contacts.fromRows None (lineNumber + 1) en
loop currentLine assays contacts lineNumber

| k ->
assays |> Seq.tryHead,contacts

if en.MoveNext () then
let currentLine = en.Current |> Row.tryGetValueAt None 1u
let currentLine = en.Current |> SparseRow.tryGetValueAt 0
loop currentLine [] [] 1

else
failwith "emptyInvestigationFile"


let rowOfSparseRow (vs : SparseRow) =
vs
|> Seq.fold (fun r (i,v) -> Row.insertValueAt None (uint32 (i+1)) v r) (Row.empty())

/// Diesen Block durch JS ersetzen ---->

/// Append an assay metadata sheet with the given sheetname to an existing assay file excel spreadsheet
let init sheetName (doc: DocumentFormat.OpenXml.Packaging.SpreadsheetDocument) =
Expand All @@ -59,6 +64,9 @@ module MetaData =
let personWithComment = Person.make None None None None None None None None None None (Some [worksheetComment])

toRows Assay.empty [personWithComment]
|> Seq.mapi (fun i row ->
rowOfSparseRow row
|> Row.updateRowIndex (i+1 |> uint))
|> Seq.fold (fun s r ->
SheetData.appendRow r s
) sheet
Expand All @@ -71,4 +79,4 @@ module MetaData =

doc


/// ----> Bis hier

0 comments on commit 2147cb9

Please sign in to comment.