Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Web console: adding format notice for CSV and TSV #14783

Merged
merged 5 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions licenses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5704,6 +5704,16 @@ license_file_path: licenses/bin/color-name.MIT

---

name: "commander"
license_category: binary
module: web-console
license_name: MIT License
copyright: TJ Holowaychuk
version: 2.20.0
license_file_path: licenses/bin/commander.MIT

---

name: "constant-case"
license_category: binary
module: web-console
Expand Down Expand Up @@ -5804,6 +5814,16 @@ license_file_path: licenses/bin/d3-color.BSD3

---

name: "d3-dsv"
license_category: binary
module: web-console
license_name: BSD-3-Clause License
copyright: Mike Bostock
version: 2.0.0
license_file_path: licenses/bin/d3-dsv.BSD3

---

name: "d3-format"
license_category: binary
module: web-console
Expand Down Expand Up @@ -6172,6 +6192,16 @@ license_file_path: licenses/bin/hoist-non-react-statics.BSD3

---

name: "iconv-lite"
license_category: binary
module: web-console
license_name: MIT License
copyright: Alexander Shtuchkin
version: 0.4.24
license_file_path: licenses/bin/iconv-lite.MIT

---

name: "import-fresh"
license_category: binary
module: web-console
Expand Down Expand Up @@ -6721,6 +6751,16 @@ license_file_path: licenses/bin/resolve.MIT

---

name: "rw"
license_category: binary
module: web-console
license_name: BSD-3-Clause License
copyright: Mike Bostock
version: 1.3.3
license_file_path: licenses/bin/rw.BSD3

---

name: "safe-buffer"
license_category: binary
module: web-console
Expand All @@ -6731,6 +6771,16 @@ license_file_path: licenses/bin/safe-buffer.MIT

---

name: "safer-buffer"
license_category: binary
module: web-console
license_name: MIT License
copyright: Nikita Skovoroda
version: 2.1.2
license_file_path: licenses/bin/safer-buffer.MIT

---

name: "scheduler"
license_category: binary
module: web-console
Expand Down
69 changes: 59 additions & 10 deletions web-console/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions web-console/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
"core-js": "^3.10.1",
"d3-array": "^2.12.1",
"d3-axis": "^2.1.0",
"d3-dsv": "^2.0.0",
"d3-scale": "^3.3.0",
"d3-selection": "^2.0.0",
"echarts": "^5.4.1",
Expand Down Expand Up @@ -114,6 +115,7 @@
"@types/classnames": "^2.2.9",
"@types/d3-array": "^2.12.3",
"@types/d3-axis": "^2.1.3",
"@types/d3-dsv": "^2.0.0",
"@types/d3-scale": "^3.3.2",
"@types/d3-selection": "^2.0.1",
"@types/enzyme": "^3.10.3",
Expand Down
1 change: 1 addition & 0 deletions web-console/src/components/auto-form/auto-form.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ export class AutoForm<T extends Record<string, any>> extends React.PureComponent
disabled={AutoForm.evaluateFunctor(field.disabled, model, false)}
intent={required && modelValue == null ? AutoForm.REQUIRED_INTENT : undefined}
multiline={AutoForm.evaluateFunctor(field.multiline, model, false)}
height={field.height}
/>
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ export const FormattedInput = React.memo(function FormattedInput(props: Formatte
intent,
placeholder,
multiline,
height,
...rest
} = props;

Expand Down Expand Up @@ -105,6 +106,7 @@ export const FormattedInput = React.memo(function FormattedInput(props: Formatte
onBlur={myOnBlur}
intent={myIntent}
placeholder={placeholder}
style={height ? { height } : undefined}
/>
) : (
<InputGroup
Expand Down
61 changes: 41 additions & 20 deletions web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import { Code } from '@blueprintjs/core';
import { range } from 'd3-array';
import { csvParseRows, tsvParseRows } from 'd3-dsv';
import type { JSX } from 'react';
import React from 'react';

Expand Down Expand Up @@ -2135,33 +2136,53 @@ export function updateIngestionType(
return newSpec;
}

function findValueWithNewline(rows: string[][]): string | undefined {
return findMap(rows, row => findMap(row, value => (value.includes('\n') ? value : undefined)));
}

export function issueWithSampleData(
sampleData: SampleResponse,
spec: Partial<IngestionSpec>,
sampleLines: string[],
isStreaming: boolean,
): JSX.Element | undefined {
if (isStreamingSpec(spec)) return;
if (!sampleLines.length) return;

const firstData: string = findMap(sampleData.data, l => l.input?.raw);
if (firstData) return;
const firstLine = sampleLines[0];
if (!isStreaming) {
if (firstLine === '{') {
return (
<>
This data looks like a multi-line formatted JSON object. For Druid to parse a text file,
it must have one row per event. Consider reformatting your data as{' '}
<ExternalLink href="https://jsonlines.org">JSON Lines</ExternalLink>.
</>
);
}

if (firstData === '{') {
return (
<>
This data looks like multi-line formatted JSON object. For Druid to parse a text file it
must have one row per event. Consider reformatting your data as{' '}
<ExternalLink href="http://ndjson.org/">newline delimited JSON</ExternalLink>.
</>
);
if (oneOf(firstLine, '[', '[]')) {
return (
<>
This data looks like a multi-line JSON array. For Druid to parse a text file, it must have
one row per event. Consider reformatting your data as{' '}
<ExternalLink href="https://jsonlines.org">JSON Lines</ExternalLink>.
</>
);
}
}

if (oneOf(firstData, '[', '[]')) {
return (
<>
This data looks like a multi-line JSON array. For Druid to parse a text file it must have
one row per event. Consider reformatting your data as{' '}
<ExternalLink href="http://ndjson.org/">newline delimited JSON</ExternalLink>.
</>
const format = guessSimpleInputFormat(sampleLines, isStreaming);
const text = sampleLines.join('\n');
if (oneOf(format.type, 'csv', 'tsv')) {
const valueWithNewline = findValueWithNewline(
format.type === 'csv' ? csvParseRows(text) : tsvParseRows(text),
);
if (valueWithNewline) {
const formatLabel = format.type.toUpperCase();
return (
<>
{`This ${formatLabel} data has values that contain new lines. Druid requires ${formatLabel} files to have one event per line, so ${formatLabel} values can not contain new lines. Consider encoding new lines in the values of your ${formatLabel} with some special delimiter.`}
</>
);
}
}

return;
Expand Down
3 changes: 3 additions & 0 deletions web-console/src/druid-models/input-source/input-source.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import { deepGet, deepSet, nonEmptyArray, typeIsKnown } from '../../utils';

export const FILTER_SUGGESTIONS: string[] = [
'*',
'*.jsonl',
'*.jsonl.gz',
'*.json',
'*.json.gz',
'*.csv',
Expand Down Expand Up @@ -179,6 +181,7 @@ export const INPUT_SOURCE_FIELDS: Field<InputSource>[] = [
required: true,
placeholder: 'Paste your data here',
multiline: true,
height: '400px',
info: <p>Put you inline data here</p>,
},

Expand Down
Loading