diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json
index 7787a607d5..85cd7ba177 100644
--- a/src/core/config/Categories.json
+++ b/src/core/config/Categories.json
@@ -255,6 +255,7 @@
{
"name": "Utils",
"ops": [
+ "Cut",
"Diff",
"Remove whitespace",
"Remove null bytes",
diff --git a/src/core/operations/Cut.mjs b/src/core/operations/Cut.mjs
new file mode 100644
index 0000000000..d824e6958f
--- /dev/null
+++ b/src/core/operations/Cut.mjs
@@ -0,0 +1,217 @@
+/**
+ * @author emilhf [emil@cyberops.no]
+ * @copyright Crown Copyright 2020
+ * @license Apache-2.0
+ */
+
+import Operation from "../Operation.mjs";
+import OperationError from "../errors/OperationError.mjs";
+import {SPLIT_DELIM_OPTIONS, JOIN_DELIM_OPTIONS} from "../lib/Delim.mjs";
+import XRegExp from "xregexp";
+
+/**
+ * Cut operation
+ */
+class Cut extends Operation {
+
+ /**
+ * Cut constructor
+ */
+ constructor() {
+ super();
+
+ this.name = "Cut";
+ this.module = "Utils";
+ this.description = "Extract fields from records similarly to awk
and cut
. The expression 1, 3-4
will extract the 2nd, 4th and 5th fields. 3, 1 "T" 2
will extract the 4th field, then combine the 2nd and 3rd field into a new field (with the letter 'T' separating the original values).
If no input field delimiter is set, fixed width mode is enabled: Fields become the indices of the payload, and ranges will be appended to the current output field instead of creating new fields. This aids in carving e.g. CSVs from fixed width data.";
+ this.infoURL = "https://en.wikipedia.org/wiki/Cut_(Unix)";
+ this.inputType = "string";
+ this.outputType = "string";
+ this.args = [
+ {
+ "name": "Common input type",
+ "type": "populateOption",
+ "value": [
+ {
+ name: "User defined",
+ value: ""
+ },
+ {
+ name: "CSV",
+ value: ","
+ },
+ {
+ name: "TSV",
+ value: "\\t"
+ },
+ {
+ name: "PSV",
+ value: "\\|"
+ },
+ {
+ name: "Space aligned",
+ value: "\\s+"
+ }
+ ],
+ "target": 4
+ },
+ {
+ "name": "Expression",
+ "type": "text",
+ "value": "0-"
+ },
+ {
+ "name": "Input record delimiter",
+ "type": "editableOptionShort",
+ "value": SPLIT_DELIM_OPTIONS,
+ "defaultIndex": 2
+ },
+ {
+ "name": "Output record delimiter",
+ "type": "editableOptionShort",
+ "value": SPLIT_DELIM_OPTIONS,
+ "defaultIndex": 2
+ },
+ {
+ "name": "Input field delimiter",
+ "type": "shortString",
+ "value": ""
+ },
+ {
+ "name": "Output field delimiter",
+ "type": "editableOptionShort",
+ "value": JOIN_DELIM_OPTIONS,
+ "defaultIndex": 3
+ }
+ ];
+ }
+
+ /**
+ * @param {string} input
+ * @param {Object[]} args
+ * @returns {string}
+ */
+ run(input, args) {
+ const [, expr, inRecordDelim, outRecordDelim, inFieldDelim, outFieldDelim] = args;
+ const split = new XRegExp(inFieldDelim);
+ const fixedWidth = inFieldDelim === "";
+
+ /**
+ * @param {Array[]}
+ * @returns {Array[]}
+ */
+ const gr = (data) => {
+ data = fixedWidth ? data : data.split(split);
+ return this.extract(data, expr, fixedWidth).join(outFieldDelim);
+ };
+
+ return input.split(inRecordDelim).map(gr).join(outRecordDelim);
+ // return gr(input);
+ }
+
+ /**
+ * Extracts fields as specified by the extraction expression. If fixedWidth
+ * is true, ranges do not introduce new fields, but rather append to the
+ * current field being dealt with.
+ *
+ * The extract expression is a lightweight DSL similar to the fields flag
+ * (-f) of cut in UNIX, and also incorporates elements of the awk print
+ * statement. It departs from cut in a few noteworthy ways:
+ *
+ * - Reverse ranges are supported, e.g. 4-1.
+ *
+ * - Negative field values, e.g. -1, are offsets from the end of the data.
+ * Note that negative ranges are not supported.
+ *
+ * - Fields are numbered from 0 instead of 1.
+ *
+ * - New fields can be constructed by combining existing fields. This
+ * operation also supports appending strings: '1 "@" 2' will join field 1
+ * and 2 with "@" in between them.
+ *
+ * @param {Array[]} data
+ * @param {string} expr
+ * @param {Boolean} fixedWidth
+ * @returns {Array[Number]}
+ */
+ extract(data, expr, fixedWidth) {
+ const maxOffset = data.length - 1;
+
+ /**
+ * @param {Number} n
+ * @returns {Array[]}
+ */
+ const pick = (n) => n < 0 ? data[maxOffset + n + 1] : data[n];
+
+ const fields = [];
+ let currentField = [];
+ let previousToken = null;
+ const tokens = expr.trim().match(/((".*?")|(\d+-\d*)|(-?\d+)|(,))/g);
+ tokens.forEach(token => {
+ // Field separator
+ if (token.match(/^,$/)) {
+ previousToken = "delimiter";
+ if (currentField.length) {
+ fields.push(currentField.join(""));
+ currentField = [];
+ }
+ return;
+ }
+
+ if (!fixedWidth && previousToken === "range") {
+ throw new OperationError(
+ `Cannot join '${token}', as previous term was a range. Requires fixed width mode.`
+ );
+ }
+
+ if (token.match("^-?[0-9]+$")) {
+ previousToken = "extraction";
+ const n = Number(token);
+ currentField.push(pick(n));
+ return;
+ }
+ if (token.match(/^\d+-\d*$/)) {
+ previousToken = "range";
+ if (!fixedWidth && currentField.length) {
+ throw new OperationError(
+ `Cannot join range '${token}' with rest of field: ${currentField.join("")}. Requires fixed width mode.`
+ );
+ }
+ const m = token.match(/^([0-9]+)-([0-9]*)$/);
+ const a = Number(m[1]);
+ const b = m[2] === "" ? maxOffset: Number(m[2]);
+
+ const vals = [];
+ if (a <= b) {
+ for (let i = a; i <= b && i <= maxOffset; i++) {
+ vals.push(pick(i));
+ }
+ } else {
+ for (let i = a; i >= b && i <= maxOffset; i--) {
+ vals.push(pick(i));
+ }
+ }
+
+ if (fixedWidth) {
+ currentField.push(...vals);
+ } else {
+ fields.push(...vals);
+ }
+ return;
+ }
+ if (token.match(/^".*"$/)) {
+ previousToken = "string";
+ const m = token.match(/"(.*)"/);
+ currentField.push(m[1]);
+ }
+ // NOT REACHED
+ });
+ // Terminal condition
+ if (currentField.length) {
+ fields.push(currentField.join(""));
+ }
+ return fields;
+ }
+
+}
+
+export default Cut;
diff --git a/tests/operations/index.mjs b/tests/operations/index.mjs
index a4d14bfd3c..6ebece67b6 100644
--- a/tests/operations/index.mjs
+++ b/tests/operations/index.mjs
@@ -43,6 +43,7 @@ import "./tests/Compress.mjs";
import "./tests/ConditionalJump.mjs";
import "./tests/Crypt.mjs";
import "./tests/CSV.mjs";
+import "./tests/Cut.mjs";
import "./tests/DateTime.mjs";
import "./tests/ExtractEmailAddresses.mjs";
import "./tests/Fork.mjs";
diff --git a/tests/operations/tests/Cut.mjs b/tests/operations/tests/Cut.mjs
new file mode 100644
index 0000000000..8856587939
--- /dev/null
+++ b/tests/operations/tests/Cut.mjs
@@ -0,0 +1,101 @@
+/**
+ * Cut operation tests
+ *
+ * @author emilhf [emil@cyberops.no]
+ *
+ * @copyright Crown Copyright 2020
+ * @license Apache-2.0
+ */
+
+import TestRegister from "../../lib/TestRegister.mjs";
+
+TestRegister.addTests([
+ {
+ name: "Extract single field",
+ input: "test1,test2,test3",
+ expectedOutput: "test2",
+ recipeConfig: [
+ {
+ op: "Cut",
+ args: ["User defined", "1", "\\n", "\\n", ",", ","],
+ },
+ ],
+ },
+ {
+ name: "Extract range",
+ input: "test1,test2,test3",
+ expectedOutput: "test2,test3",
+ recipeConfig: [
+ {
+ op: "Cut",
+ args: ["User defined", "1-2", "\\n", "\\n", ",", ","],
+ },
+ ],
+ },
+ {
+ name: "Extract reverse range",
+ input: "test1,test2,test3",
+ expectedOutput: "test2,test1",
+ recipeConfig: [
+ {
+ op: "Cut",
+ args: ["User defined", "1-0", "\\n", "\\n", ",", ","],
+ },
+ ],
+ },
+ {
+ name: "Extract multiple ranges",
+ input: "test1,test2,test3",
+ expectedOutput: "test2,test3,test1",
+ recipeConfig: [
+ {
+ op: "Cut",
+ args: ["User defined", "1-2,0", "\\n", "\\n", ",", ","],
+ },
+ ],
+ },
+ {
+ name: "Combine two existing fields",
+ input: "john.doe,CONTOSO\nadams,CONTOSO",
+ expectedOutput: "john.doe@CONTOSO\nadams@CONTOSO",
+ recipeConfig: [
+ {
+ op: "Cut",
+ args: ["User defined", "0 \"@\" 1", "\\n", "\\n", ",", ","],
+ },
+ ],
+ },
+ {
+ name: "Fixed width to CSV",
+ input: "abcdefghijklmnopqrstuvxyz",
+ expectedOutput: "abc,xyz",
+ recipeConfig: [
+ {
+ op: "Cut",
+ args: ["User defined", "0-2, 22-24", "\\n", "\\n", "", ","],
+ },
+ ],
+ },
+ {
+ name: "Extract and convert CSV to TSV",
+ input: "ITEM,VALUE\nflamingo,439\nvodka,14",
+ expectedOutput: "ITEM\tVALUE\nflamingo\t439\nvodka\t14",
+ recipeConfig: [
+ {
+ op: "Cut",
+ args: ["User defined", "0-", "\\n", "\\n", ",", "\\t"],
+ }
+ ],
+ },
+ {
+ name: "Extract with wrong delimiter",
+ input: "test1,test2",
+ expectedOutput: "test1,test2",
+ recipeConfig: [
+ {
+ op: "Cut",
+ args: ["User defined", "0-", "\\n", "\\n", "\\t", ";"],
+ },
+ ],
+ },
+]);