From 9c3fe899908d85b434712c7b5b97a355b354e467 Mon Sep 17 00:00:00 2001 From: omaus Date: Sun, 16 Jul 2023 22:40:44 +0200 Subject: [PATCH 1/2] Update worksheet tokenization functions Closes #16. --- playground.fsx | 15 ++++- src/ArcGraphModel.IO/ISA/Tokenization.fs | 3 + src/ArcGraphModel.IO/ISA/Worksheet.fs | 70 +++++++++++++----------- 3 files changed, 55 insertions(+), 33 deletions(-) diff --git a/playground.fsx b/playground.fsx index 0972911..5796eaf 100644 --- a/playground.fsx +++ b/playground.fsx @@ -24,7 +24,9 @@ open System.Collections.Generic //#r "c:/repos/csbiology/fsspreadsheet/src/FsSpreadsheet/bin/Debug/netstandard2.0/FsSpreadsheet.dll" //#r "c:/repos/csbiology/fsspreadsheet/src/FsSpreadsheet.CsvIO/bin/Debug/netstandard2.0/FsSpreadsheet.CsvIO.dll" //#r "c:/repos/csbiology/fsspreadsheet/src/FsSpreadsheet.ExcelIO/bin/Debug/netstandard2.0/FsSpreadsheet.ExcelIO.dll" -#r @"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel\bin\Debug\net6.0\ArcGraphModel.dll" +//#r @"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel\bin\Debug\net6.0\ArcGraphModel.dll" +#r @"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel\bin\Debug\netstandard2.0\ArcGraphModel.dll" +#r @"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel.IO\bin\Debug\netstandard2.0\ArcGraphModel.IO.dll" //#r @"C:/Users/olive/.nuget/packages/fsharpaux/1.1.0/lib/net5.0/FSharpAux.dll" open FsSpreadsheet @@ -38,6 +40,17 @@ open ArcType +let inves = FsWorkbook.fromXlsxFile @"C:\Users\revil\OneDrive\CSB-Stuff\NFDI\testARC30\isa.investigation.xlsx" + +let invesWs = FsWorkbook.getWorksheets inves |> Seq.head +invesWs.RescanRows() +invesWs.CellCollection +invesWs.Rows + +let invesWsParsed = ArcGraphModel.IO.Worksheet.parseRowsAggregated invesWs +let invesWsParsed = ArcGraphModel.IO.Worksheet.parseRowsFlat invesWs + + // new CvTypes - testin' and foolin' around diff --git a/src/ArcGraphModel.IO/ISA/Tokenization.fs b/src/ArcGraphModel.IO/ISA/Tokenization.fs index 328bcca..95d8cbe 100644 --- a/src/ArcGraphModel.IO/ISA/Tokenization.fs +++ b/src/ArcGraphModel.IO/ISA/Tokenization.fs @@ -7,12 +7,15 @@ open KeyParser module Tokenization = let convertTokens (line : FsCell seq) = + printfn "line: %A" line match line |> Seq.toList with | [] -> failwith "Cannot convert nothin" | key :: [] -> + printfn "case key :: []" let f = parseKey [] key.Value [f (ParamValue.Value "")] | key :: cells -> + printfn "case key :: cells" let f = parseKey [] key.Value cells |> List.map (fun c -> diff --git a/src/ArcGraphModel.IO/ISA/Worksheet.fs b/src/ArcGraphModel.IO/ISA/Worksheet.fs index f4b6bfe..71e827b 100644 --- a/src/ArcGraphModel.IO/ISA/Worksheet.fs +++ b/src/ArcGraphModel.IO/ISA/Worksheet.fs @@ -7,47 +7,53 @@ open FsSpreadsheet module Worksheet = - let parseRows (worksheet : FsWorksheet) = + /// Parses a given list of FsCells of a given FsWorksheet via a given tokenization function and returns the resulting IAttributeCollection list. + let parseCells cellsList tokenizationFunction (worksheet : FsWorksheet) = let sheetName = Address.createWorksheetParam worksheet.Name - worksheet.Rows + cellsList |> List.choose (fun r -> - match r |> Tokenization.parseLine |> Seq.toList with + match tokenizationFunction r |> Seq.toList with | [] -> None | l -> Some l ) |> List.concat - |> List.map (fun token -> + |> List.map (fun token -> CvAttributeCollection.tryAddAttribute sheetName token |> ignore token ) - let parseTableColumns (worksheet : FsWorksheet) = - let sheetName = Address.createWorksheetParam worksheet.Name - worksheet.Tables.Head.Columns(worksheet.CellCollection) - |> Seq.toList - |> List.choose (fun r -> - match r |> Tokenization.parseLine |> Seq.toList with - | [] -> None - | l -> Some l - ) - |> List.concat - |> List.map (fun token -> - CvAttributeCollection.tryAddAttribute sheetName token |> ignore - token - ) + /// Parses rows of a given FsWorksheet via a given tokenization function and returns the resulting IAttributeCollection list. + let parseRows tokenizationFunction (worksheet : FsWorksheet) = + parseCells (worksheet.Rows |> List.map (fun x -> x.Cells)) tokenizationFunction worksheet - let parseColumns (worksheet : FsWorksheet) = - let sheetName = Address.createWorksheetParam worksheet.Name - worksheet.Columns - |> Seq.toList - |> List.choose (fun r -> - match r |> Tokenization.parseLine |> Seq.toList with - | [] -> None - | l -> Some l - ) - |> List.concat - |> List.map (fun token -> - CvAttributeCollection.tryAddAttribute sheetName token |> ignore - token - ) + /// Parses rows of a given FsWorksheet and returns the resulting aggregated ICvBase list. + let parseRowsAggregated (worksheet : FsWorksheet) = + parseRows Tokenization.parseLine worksheet + + /// Parses rows of a given FsWorksheet and returns the resulting flat IParam list. + let parseRowsFlat (worksheet : FsWorksheet) = + parseRows Tokenization.convertTokens worksheet + + /// Parses columns of a given FsWorksheet via a given tokenization function and returns the resulting IAttributeCollection list. + let parseColumns tokenizationFunction (worksheet : FsWorksheet) = + parseCells (Seq.toList worksheet.Columns |> List.map (fun x -> x.Cells)) tokenizationFunction worksheet + + /// Parses columns of a given FsWorksheet and returns the resulting aggregated ICvBase list. + let parseColumnsAggregated (worksheet : FsWorksheet) = + parseColumns Tokenization.parseLine worksheet + + /// Parses columns of a given FsWorksheet and returns the resulting flat IParam list. + let parseColumnsFlat (worksheet : FsWorksheet) = + parseColumns Tokenization.convertTokens worksheet + + /// Parses the columns of the first FsTable in a given FsWorksheet via a given tokenization function and returns the resulting IAttributeCollection list. + let parseTableColumns tokenizationFunction (worksheet : FsWorksheet) = + parseCells (worksheet.Tables.Head.Columns(worksheet.CellCollection) |> Seq.toList |> List.map (fun x -> x.Cells)) tokenizationFunction worksheet + + /// Parses the columns of the first FsTable in a given FsWorksheet and returns the resulting aggregated ICvBase list. + let parseTableColumnsAggregated (worksheet : FsWorksheet) = + parseTableColumns Tokenization.parseLine worksheet + /// Parses the columns of the first FsTable in a a given FsWorksheet and returns the resulting flat IParam list. + let parseTableColumnsFlat (worksheet : FsWorksheet) = + parseTableColumns Tokenization.convertTokens worksheet \ No newline at end of file From 32ecc0751b719af30b1f8176ccb1f6031525147a Mon Sep 17 00:00:00 2001 From: omaus Date: Sun, 16 Jul 2023 23:37:05 +0200 Subject: [PATCH 2/2] Delete print-debugging --- src/ArcGraphModel.IO/ISA/Tokenization.fs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/ArcGraphModel.IO/ISA/Tokenization.fs b/src/ArcGraphModel.IO/ISA/Tokenization.fs index 95d8cbe..328bcca 100644 --- a/src/ArcGraphModel.IO/ISA/Tokenization.fs +++ b/src/ArcGraphModel.IO/ISA/Tokenization.fs @@ -7,15 +7,12 @@ open KeyParser module Tokenization = let convertTokens (line : FsCell seq) = - printfn "line: %A" line match line |> Seq.toList with | [] -> failwith "Cannot convert nothin" | key :: [] -> - printfn "case key :: []" let f = parseKey [] key.Value [f (ParamValue.Value "")] | key :: cells -> - printfn "case key :: cells" let f = parseKey [] key.Value cells |> List.map (fun c ->