-
Notifications
You must be signed in to change notification settings - Fork 8.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ML] Process delimited files like semi-structured text (#56038)
Changes the file upload functionality to process delimited files by splitting them into to messages, then sending these to the ingest pipeline as a single field for further processing in Elasticsearch. The csv_importer has been removed and the old sst_importer replaced with a similar message_importer that has been enhanced to cover the edge cases required by delimited file processing. Previously the file upload functionality parsed CSV in the browser, but by parsing CSV in the ingest pipeline it makes the Kibana file upload functionality more easily interchangable with Filebeat such that the configurations it creates can more easily be used to import data with the same structure repeatedly in production. Companion to elastic/elasticsearch#51492
- Loading branch information
1 parent
05ed2d6
commit 9fcbeb3
Showing
6 changed files
with
102 additions
and
178 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
102 changes: 0 additions & 102 deletions
102
...lic/application/datavisualizer/file_based/components/import_view/importer/csv_importer.js
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
94 changes: 94 additions & 0 deletions
94
...application/datavisualizer/file_based/components/import_view/importer/message_importer.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
|
||
import { Importer } from './importer'; | ||
|
||
export class MessageImporter extends Importer { | ||
constructor(results, settings) { | ||
super(settings); | ||
|
||
this.excludeLinesRegex = | ||
results.exclude_lines_pattern === undefined | ||
? null | ||
: new RegExp(results.exclude_lines_pattern); | ||
this.multilineStartRegex = | ||
results.multiline_start_pattern === undefined | ||
? null | ||
: new RegExp(results.multiline_start_pattern); | ||
} | ||
|
||
// split the text into an array of lines by looking for newlines. | ||
// any lines that match the exclude_lines_pattern regex are ignored. | ||
// if a newline is found, check the next line to see if it starts with the | ||
// multiline_start_pattern regex | ||
// if it does, it is a legitimate end of line and can be pushed into the list, | ||
// if not, it must be a newline char inside a field value, so keep looking. | ||
read(text) { | ||
try { | ||
const data = []; | ||
|
||
let message = ''; | ||
let line = ''; | ||
for (let i = 0; i < text.length; i++) { | ||
const char = text[i]; | ||
if (char === '\n') { | ||
message = this.processLine(data, message, line); | ||
line = ''; | ||
} else { | ||
line += char; | ||
} | ||
} | ||
|
||
// the last line may have been missing a newline ending | ||
if (line !== '') { | ||
message = this.processLine(data, message, line); | ||
} | ||
|
||
// add the last message to the list if not already done | ||
if (message !== '') { | ||
this.addMessage(data, message); | ||
} | ||
|
||
// remove first line if it is blank | ||
if (data[0] && data[0].message === '') { | ||
data.shift(); | ||
} | ||
|
||
this.data = data; | ||
this.docArray = this.data; | ||
|
||
return { | ||
success: true, | ||
}; | ||
} catch (error) { | ||
console.error(error); | ||
return { | ||
success: false, | ||
error, | ||
}; | ||
} | ||
} | ||
|
||
processLine(data, message, line) { | ||
if (this.excludeLinesRegex === null || line.match(this.excludeLinesRegex) === null) { | ||
if (this.multilineStartRegex === null || line.match(this.multilineStartRegex) !== null) { | ||
this.addMessage(data, message); | ||
message = ''; | ||
} else { | ||
message += '\n'; | ||
} | ||
message += line; | ||
} | ||
return message; | ||
} | ||
|
||
addMessage(data, message) { | ||
// if the message ended \r\n (Windows line endings) | ||
// then omit the \r as well as the \n for consistency | ||
message = message.replace(/\r$/, ''); | ||
data.push({ message }); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
70 changes: 0 additions & 70 deletions
70
...lic/application/datavisualizer/file_based/components/import_view/importer/sst_importer.js
This file was deleted.
Oops, something went wrong.