encoding: add csv parse (denoland/std#458)

Original: denoland/std@167f529
ry · May 30, 2019 · 2487c45 · 2487c45
1 parent a0ce25e
commit 2487c45
Show file tree

Hide file tree

Showing 4 changed files with 349 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -24,6 +24,7 @@ Here are the dedicated documentations of modules:
 
 - [colors](colors/README.md)
 - [datetime](datetime/README.md)
+- [encoding](encoding/README.md)
 - [examples](examples/README.md)
 - [flags](flags/README.md)
 - [fs](fs/README.md)
@@ -33,7 +34,6 @@ Here are the dedicated documentations of modules:
 - [prettier](prettier/README.md)
 - [strings](strings/README.md)
 - [testing](testing/README.md)
-- [toml](encoding/toml/README.md)
 - [ws](ws/README.md)
 
 ## Contributing

diff --git a/encoding/README.md b/encoding/README.md
@@ -1,11 +1,112 @@
-# TOML
+# Encoding
+
+## CSV
+
+- **`readAll(reader: BufReader, opt: ParseOptions = { comma: ",", trimLeadingSpace: false, lazyQuotes: false } ): Promise<[string[][], BufState]>`**:
+  Read the whole buffer and output the structured CSV datas
+- **`parse(csvString: string, opt: ParseOption): Promise<unknown[]>`**:
+  See [parse](###Parse)
+
+### Parse
+
+Parse the CSV string with the options provided.
+
+#### Options
+
+##### ParseOption
+
+- **`header: boolean | string[] | HeaderOption[];`**: If a boolean is provided,
+  the first line will be used as Header definitions. If `string[]` or
+  `HeaderOption[]`
+  those names will be used for header definition.
+- **`parse?: (input: unknown) => unknown;`**: Parse function for the row, which
+  will be executed after parsing of all columns. Therefore if you don't provide
+  header and parse function with headers, input will be `string[]`.
+
+##### HeaderOption
+
+- **`name: string;`**: Name of the header to be used as property.
+- **`parse?: (input: string) => unknown;`**: Parse function for the column.
+  This is executed on each entry of the header. This can be combined with the
+  Parse function of the rows.
+
+#### Usage
+
+```ts
+// input:
+// a,b,c
+// e,f,g
+
+const r = await parseFile(filepath, {
+  header: false
+});
+// output:
+// [["a", "b", "c"], ["e", "f", "g"]]
+
+const r = await parseFile(filepath, {
+  header: true
+});
+// output:
+// [{ a: "e", b: "f", c: "g" }]
+
+const r = await parseFile(filepath, {
+  header: ["this", "is", "sparta"]
+});
+// output:
+// [
+//   { this: "a", is: "b", sparta: "c" },
+//   { this: "e", is: "f", sparta: "g" }
+// ]
+
+const r = await parseFile(filepath, {
+  header: [
+    {
+      name: "this",
+      parse: (e: string): string => {
+        return `b${e}$$`;
+      }
+    },
+    {
+      name: "is",
+      parse: (e: string): number => {
+        return e.length;
+      }
+    },
+    {
+      name: "sparta",
+      parse: (e: string): unknown => {
+        return { bim: `boom-${e}` };
+      }
+    }
+  ]
+});
+// output:
+// [
+//    { this: "ba$$", is: 1, sparta: { bim: `boom-c` } },
+//    { this: "be$$", is: 1, sparta: { bim: `boom-g` } }
+// ]
+
+const r = await parseFile(filepath, {
+  header: ["this", "is", "sparta"],
+  parse: (e: Record<string, unknown>) => {
+    return { super: e.this, street: e.is, fighter: e.sparta };
+  }
+});
+// output:
+// [
+//   { super: "a", street: "b", fighter: "c" },
+//   { super: "e", street: "f", fighter: "g" }
+// ]
+```
+
+## TOML
 
 This module parse TOML files. It follows as much as possible the
 [TOML specs](https://github.com/toml-lang/toml). Be sure to read the supported
 types as not every specs is supported at the moment and the handling in
 TypeScript side is a bit different.
 
-## Supported types and handling
+### Supported types and handling
 
 - :heavy_check_mark: [Keys](https://github.com/toml-lang/toml#string)
 - :exclamation: [String](https://github.com/toml-lang/toml#string)
@@ -27,39 +128,39 @@ TypeScript side is a bit different.
 
 :exclamation: _Supported with warnings see [Warning](#Warning)._
 
-### :warning: Warning
+#### :warning: Warning
 
-#### String
+##### String
 
 - Regex : Due to the spec, there is no flag to detect regex properly
   in a TOML declaration. So the regex is stored as string.
 
-#### Integer
+##### Integer
 
 For **Binary** / **Octal** / **Hexadecimal** numbers,
 they are stored as string to be not interpreted as Decimal.
 
-#### Local Time
+##### Local Time
 
 Because local time does not exist in JavaScript, the local time is stored as a string.
 
-#### Inline Table
+##### Inline Table
 
 Inline tables are supported. See below:
 
 ```toml
 animal = { type = { name = "pug" } }
-# Output
+## Output
 animal = { type.name = "pug" }
-# Output { animal : { type : { name : "pug" } }
+## Output { animal : { type : { name : "pug" } }
 animal.as.leaders = "tosin"
-# Output { animal: { as: { leaders: "tosin" } } }
+## Output { animal: { as: { leaders: "tosin" } } }
 "tosin.abasi" = "guitarist"
-# Output
+## Output
 "tosin.abasi" : "guitarist"
 ```
 
-#### Array of Tables
+##### Array of Tables
 
 At the moment only simple declarations like below are supported:
 
@@ -89,9 +190,9 @@ will output:
 }
 ```
 
-## Usage
+### Usage
 
-### Parse
+#### Parse
 
 ```ts
 import { parse } from "./parser.ts";
@@ -103,7 +204,7 @@ const tomlString = 'foo.bar = "Deno"';
 const tomlObject22 = parse(tomlString);
 ```
 
-### Stringify
+#### Stringify
 
 ```ts
 import { stringify } from "./parser.ts";

diff --git a/encoding/csv.ts b/encoding/csv.ts
@@ -4,6 +4,7 @@
 
 import { BufReader, EOF } from "../io/bufio.ts";
 import { TextProtoReader } from "../textproto/mod.ts";
+import { StringReader } from "../io/readers.ts";
 
 const INVALID_RUNE = ["\r", "\n", '"'];
 
@@ -17,28 +18,39 @@ export class ParseError extends Error {
   }
 }
 
+/**
+ * @property comma - Character which separates values. Default: ','
+ * @property comment - Character to start a comment. Default: '#'
+ * @property trimLeadingSpace - Flag to trim the leading space of the value. Default: 'false'
+ * @property lazyQuotes - Allow unquoted quote in a quoted field or non double
+ *  quoted quotes in quoted field Default: 'false'
+ * @property fieldsPerRecord - Enabling the check of fields for each row. If == 0
+ * first row is used as referal for the number of fields.
+ */
 export interface ParseOptions {
-  comma: string;
+  comma?: string;
   comment?: string;
-  trimLeadingSpace: boolean;
+  trimLeadingSpace?: boolean;
   lazyQuotes?: boolean;
   fieldsPerRecord?: number;
 }
 
 function chkOptions(opt: ParseOptions): void {
+  if (!opt.comma) opt.comma = ",";
+  if (!opt.trimLeadingSpace) opt.trimLeadingSpace = false;
   if (
-    INVALID_RUNE.includes(opt.comma) ||
-    (opt.comment && INVALID_RUNE.includes(opt.comment)) ||
+    INVALID_RUNE.includes(opt.comma!) ||
+    INVALID_RUNE.includes(opt.comment!) ||
     opt.comma === opt.comment
   ) {
     throw new Error("Invalid Delimiter");
   }
 }
 
-export async function read(
+async function read(
   Startline: number,
   reader: BufReader,
-  opt: ParseOptions = { comma: ",", comment: "#", trimLeadingSpace: false }
+  opt: ParseOptions = { comma: ",", trimLeadingSpace: false }
 ): Promise<string[] | EOF> {
   const tp = new TextProtoReader(reader);
   let line: string;
@@ -68,7 +80,7 @@ export async function read(
     return [];
   }
 
-  result = line.split(opt.comma);
+  result = line.split(opt.comma!);
 
   let quoteError = false;
   result = result.map(
@@ -138,3 +150,105 @@ export async function readAll(
   }
   return result;
 }
+
+/**
+ * HeaderOption provides the column definition
+ * and the parse function for each entry of the
+ * column.
+ */
+export interface HeaderOption {
+  name: string;
+  parse?: (input: string) => unknown;
+}
+
+export interface ExtendedParseOptions extends ParseOptions {
+  header: boolean | string[] | HeaderOption[];
+  parse?: (input: unknown) => unknown;
+}
+
+/**
+ * Csv parse helper to manipulate data.
+ * Provides an auto/custom mapper for columns and parse function
+ * for columns and rows.
+ * @param input Input to parse. Can be a string or BufReader.
+ * @param opt options of the parser.
+ * @param [opt.header=false] HeaderOptions
+ * @param [opt.parse=null] Parse function for rows.
+ * Example:
+ *     const r = await parseFile('a,b,c\ne,f,g\n', {
+ *      header: ["this", "is", "sparta"],
+ *       parse: (e: Record<string, unknown>) => {
+ *         return { super: e.this, street: e.is, fighter: e.sparta };
+ *       }
+ *     });
+ * // output
+ * [
+ *   { super: "a", street: "b", fighter: "c" },
+ *   { super: "e", street: "f", fighter: "g" }
+ * ]
+ */
+export async function parse(
+  input: string | BufReader,
+  opt: ExtendedParseOptions = {
+    header: false
+  }
+): Promise<unknown[]> {
+  let r: string[][];
+  if (input instanceof BufReader) {
+    r = await readAll(input, opt);
+  } else {
+    r = await readAll(new BufReader(new StringReader(input)), opt);
+  }
+  if (opt.header) {
+    let headers: HeaderOption[] = [];
+    let i = 0;
+    if (Array.isArray(opt.header)) {
+      if (typeof opt.header[0] !== "string") {
+        headers = opt.header as HeaderOption[];
+      } else {
+        const h = opt.header as string[];
+        headers = h.map(
+          (e): HeaderOption => {
+            return {
+              name: e
+            };
+          }
+        );
+      }
+    } else {
+      headers = r.shift()!.map(
+        (e): HeaderOption => {
+          return {
+            name: e
+          };
+        }
+      );
+      i++;
+    }
+    return r.map(
+      (e): unknown => {
+        if (e.length !== headers.length) {
+          throw `Error number of fields line:${i}`;
+        }
+        i++;
+        let out: Record<string, unknown> = {};
+        for (let j = 0; j < e.length; j++) {
+          const h = headers[j];
+          if (h.parse) {
+            out[h.name] = h.parse(e[j]);
+          } else {
+            out[h.name] = e[j];
+          }
+        }
+        if (opt.parse) {
+          return opt.parse(out);
+        }
+        return out;
+      }
+    );
+  }
+  if (opt.parse) {
+    return r.map((e: string[]): unknown => opt.parse!(e));
+  }
+  return r;
+}