Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Web console: adding format notice for CSV and TSV #14783

Merged
merged 5 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 94 additions & 4 deletions web-console/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions web-console/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
"core-js": "^3.10.1",
"d3-array": "^2.12.1",
"d3-axis": "^2.1.0",
"d3-dsv": "^3.0.1",
"d3-scale": "^3.3.0",
"d3-selection": "^2.0.0",
"echarts": "^5.4.1",
Expand Down Expand Up @@ -114,6 +115,7 @@
"@types/classnames": "^2.2.9",
"@types/d3-array": "^2.12.3",
"@types/d3-axis": "^2.1.3",
"@types/d3-dsv": "^3.0.1",
"@types/d3-scale": "^3.3.2",
"@types/d3-selection": "^2.0.1",
"@types/enzyme": "^3.10.3",
Expand Down
1 change: 1 addition & 0 deletions web-console/src/components/auto-form/auto-form.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ export class AutoForm<T extends Record<string, any>> extends React.PureComponent
disabled={AutoForm.evaluateFunctor(field.disabled, model, false)}
intent={required && modelValue == null ? AutoForm.REQUIRED_INTENT : undefined}
multiline={AutoForm.evaluateFunctor(field.multiline, model, false)}
height={field.height}
/>
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ export const FormattedInput = React.memo(function FormattedInput(props: Formatte
intent,
placeholder,
multiline,
height,
...rest
} = props;

Expand Down Expand Up @@ -105,6 +106,7 @@ export const FormattedInput = React.memo(function FormattedInput(props: Formatte
onBlur={myOnBlur}
intent={myIntent}
placeholder={placeholder}
style={height ? { height } : undefined}
/>
) : (
<InputGroup
Expand Down
61 changes: 41 additions & 20 deletions web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import { Code } from '@blueprintjs/core';
import { range } from 'd3-array';
import { csvParseRows, tsvParseRows } from 'd3-dsv';
import type { JSX } from 'react';
import React from 'react';

Expand Down Expand Up @@ -2135,33 +2136,53 @@ export function updateIngestionType(
return newSpec;
}

function findValueWithNewline(rows: string[][]): string | undefined {
return findMap(rows, row => findMap(row, value => (value.includes('\n') ? value : undefined)));
}

export function issueWithSampleData(
sampleData: SampleResponse,
spec: Partial<IngestionSpec>,
sampleLines: string[],
isStreaming: boolean,
): JSX.Element | undefined {
if (isStreamingSpec(spec)) return;
if (!sampleLines.length) return;

const firstData: string = findMap(sampleData.data, l => l.input?.raw);
if (firstData) return;
const firstLine = sampleLines[0];
if (!isStreaming) {
if (firstLine === '{') {
return (
<>
This data looks like multi-line formatted JSON object. For Druid to parse a text file it
vogievetsky marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
This data looks like multi-line formatted JSON object. For Druid to parse a text file it
This data looks like a multi-line formatted JSON object. For Druid to parse a text file it

must have one row per event. Consider reformatting your data as{' '}
<ExternalLink href="https://jsonlines.org">JSON Lines</ExternalLink>.
</>
);
}

if (firstData === '{') {
return (
<>
This data looks like multi-line formatted JSON object. For Druid to parse a text file it
must have one row per event. Consider reformatting your data as{' '}
<ExternalLink href="http://ndjson.org/">newline delimited JSON</ExternalLink>.
</>
);
if (oneOf(firstLine, '[', '[]')) {
return (
<>
This data looks like a multi-line JSON array. For Druid to parse a text file it must have
vogievetsky marked this conversation as resolved.
Show resolved Hide resolved
one row per event. Consider reformatting your data as{' '}
<ExternalLink href="https://jsonlines.org">JSON Lines</ExternalLink>.
</>
);
}
}

if (oneOf(firstData, '[', '[]')) {
return (
<>
This data looks like a multi-line JSON array. For Druid to parse a text file it must have
one row per event. Consider reformatting your data as{' '}
<ExternalLink href="http://ndjson.org/">newline delimited JSON</ExternalLink>.
</>
const format = guessSimpleInputFormat(sampleLines, isStreaming);
const text = sampleLines.join('\n');
if (oneOf(format.type, 'csv', 'tsv')) {
const valueWithNewline = findValueWithNewline(
format.type === 'csv' ? csvParseRows(text) : tsvParseRows(text),
);
if (valueWithNewline) {
const formatLabel = format.type.toUpperCase();
return (
<>
{`This ${formatLabel} data has values that contain new lines. Druid requires ${formatLabel} files to have one event per line and thus ${formatLabel} values can not contain new lines. Consider encoding new lines in the values of your ${formatLabel} with some special delimiter.`}
vogievetsky marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
{`This ${formatLabel} data has values that contain new lines. Druid requires ${formatLabel} files to have one event per line and thus ${formatLabel} values can not contain new lines. Consider encoding new lines in the values of your ${formatLabel} with some special delimiter.`}
{`This ${formatLabel} data has values that contain new lines. Druid requires ${formatLabel} files to have one event per line, so ${formatLabel} values cannot contain new lines. Consider encoding new lines in the values of your ${formatLabel} with some special delimiter.`}

</>
);
}
}

return;
Expand Down
3 changes: 3 additions & 0 deletions web-console/src/druid-models/input-source/input-source.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import { deepGet, deepSet, nonEmptyArray, typeIsKnown } from '../../utils';

export const FILTER_SUGGESTIONS: string[] = [
'*',
'*.jsonl',
'*.jsonl.gz',
'*.json',
'*.json.gz',
'*.csv',
Expand Down Expand Up @@ -179,6 +181,7 @@ export const INPUT_SOURCE_FIELDS: Field<InputSource>[] = [
required: true,
placeholder: 'Paste your data here',
multiline: true,
height: '400px',
info: <p>Put you inline data here</p>,
},

Expand Down
9 changes: 0 additions & 9 deletions web-console/src/utils/general.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -322,15 +322,6 @@ export function pluralIfNeeded(n: NumberLike, singular: string, plural?: string)

// ----------------------------

export function validJson(json: string): boolean {
try {
JSONBig.parse(json);
return true;
} catch (e) {
return false;
}
}

export function filterMap<T, Q>(xs: readonly T[], f: (x: T, i: number) => Q | undefined): Q[] {
return xs.map(f).filter((x: Q | undefined) => typeof x !== 'undefined') as Q[];
}
Expand Down
7 changes: 5 additions & 2 deletions web-console/src/views/load-data-view/load-data-view.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1366,13 +1366,16 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat

this.updateSpec(fillDataSourceNameIfNeeded(newSpec));
} else {
const issue = issueWithSampleData(inputData, spec);
const issue = issueWithSampleData(
filterMap(inputData.data, l => l.input?.raw),
isStreamingSpec(spec),
);
if (issue) {
AppToaster.show({
icon: IconNames.WARNING_SIGN,
intent: Intent.WARNING,
message: issue,
timeout: 10000,
timeout: 30000,
});
return false;
}
Expand Down
Loading