From b98f7fe8197b2e4d46bfad160fc62603edb03a8c Mon Sep 17 00:00:00 2001
From: Gary Gregory
- * This class implements the {@link Serializable} interface with the following caveats:
+ * This class implements the {@link Serializable} interface with the following caveats:
*
- * The {@code serialVersionUID} values are:
+ * The {@code serialVersionUID} values are:
* Serialization
*
- *
*
- *
*
* Notes
@@ -193,7 +193,14 @@ public static class Builder implements Supplier
- * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of - * each comment line. + * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. *
** If the comment marker is not set, then the header comments are ignored. @@ -349,13 +359,14 @@ public Builder setAutoFlush(final boolean autoFlush) { *
* For example: *
+ * *- * builder.setCommentMarker('#') - * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); + * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); **
* writes: *
+ * ** # Generated by Apache Commons CSV. * # 1970-01-01T00:00:00Z @@ -379,8 +390,7 @@ public Builder setCommentMarker(final char commentMarker) { * Comments are printed first, before headers. * *- * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of - * each comment line. + * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. *
** If the comment marker is not set, then the header comments are ignored. @@ -388,13 +398,14 @@ public Builder setCommentMarker(final char commentMarker) { *
* For example: *
+ * *- * builder.setCommentMarker('#') - * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); + * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); *** writes: *
+ * ** # Generated by Apache Commons CSV. * # 1970-01-01T00:00:00Z @@ -447,8 +458,8 @@ public Builder setDelimiter(final String delimiter) { * @since 1.10.0 */ public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) { - this.duplicateHeaderMode = Objects.requireNonNull(duplicateHeaderMode, "duplicateHeaderMode"); - return this; + this.duplicateHeaderMode = Objects.requireNonNull(duplicateHeaderMode, "duplicateHeaderMode"); + return this; } /** @@ -598,8 +609,7 @@ public Builder setHeader(final String... header) { * Comments are printed first, before headers. * *- * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of - * each comment line. + * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. *
** If the comment marker is not set, then the header comments are ignored. @@ -607,13 +617,14 @@ public Builder setHeader(final String... header) { *
* For example: *
+ * *- * builder.setCommentMarker('#') - * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); + * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); *** writes: *
+ * ** # Generated by Apache Commons CSV. * # 1970-01-01T00:00:00Z @@ -636,8 +647,7 @@ public Builder setHeaderComments(final Object... headerComments) { * Comments are printed first, before headers. * *- * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of - * each comment line. + * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. *
** If the comment marker is not set, then the header comments are ignored. @@ -645,13 +655,14 @@ public Builder setHeaderComments(final Object... headerComments) { *
* For example: *
+ * *- * builder.setCommentMarker('#') - * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0).toString()); + * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0).toString()); *** writes: *
+ * ** # Generated by Apache Commons CSV. * # 1970-01-01T00:00:00Z @@ -956,8 +967,7 @@ public CSVFormat getFormat() { } /** - * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing - * empty lines. + * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines. * ** The {@link Builder} settings are: @@ -972,8 +982,7 @@ public CSVFormat getFormat() { * * @see Predefined#Default */ - public static final CSVFormat DEFAULT = new CSVFormat(Constants.COMMA, Constants.DOUBLE_QUOTE_CHAR, null, null, null, false, true, Constants.CRLF, null, - null, null, false, false, false, false, false, false, DuplicateHeaderMode.ALLOW_ALL, false, false); + public static final CSVFormat DEFAULT = new CSVFormat(Builder.create()); /** * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary @@ -1091,10 +1100,9 @@ public CSVFormat getFormat() { *
* As of 2024-04-05, the MongoDB documentation for {@code mongoimport} states: *
- *The csv parser accepts that data that complies with RFC RFC-4180. - * As a result, backslashes are not a valid escape character. If you use double-quotes to enclose fields in the CSV data, you must escape - * internal double-quote marks by prepending another double-quote. - *+ *The csv parser accepts that data that complies with RFC RFC-4180. As a result, backslashes are + * not a valid escape character. If you use double-quotes to enclose fields in the CSV data, you must escape internal double-quote marks by prepending + * another double-quote.** The {@link Builder} settings are: *
@@ -1366,7 +1374,7 @@ staticT[] clone(final T... values) { /** * Returns true if the given string contains the search char. * - * @param source the string to check. + * @param source the string to check. * @param searchCh the character to search. * @return true if {@code c} contains a line break character */ @@ -1444,8 +1452,7 @@ private static boolean isTrimChar(final CharSequence charSequence, final int pos * @see #TDF */ public static CSVFormat newFormat(final char delimiter) { - return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false, - DuplicateHeaderMode.ALLOW_ALL, false, false); + return new CSVFormat(new Builder().setDelimiter(delimiter)); } static String[] toStringArray(final Object[] values) { @@ -1573,74 +1580,20 @@ private CSVFormat(final Builder builder) { validate(); } - /** - * Creates a customized CSV format. - * - * @param delimiter the char used for value separation, must not be a line break character. - * @param quoteChar the Character used as value encapsulation marker, may be {@code null} to disable. - * @param quoteMode the quote mode. - * @param commentStart the Character used for comment identification, may be {@code null} to disable. - * @param escape the Character used to escape special characters in values, may be {@code null} to disable. - * @param ignoreSurroundingSpaces {@code true} when whitespaces enclosing values should be ignored. - * @param ignoreEmptyLines {@code true} when the parser should skip empty lines. - * @param recordSeparator the line separator to use for output. - * @param nullString the String to convert to and from {@code null}. - * @param headerComments the comments to be printed by the Printer before the actual CSV data. - * @param header the header. - * @param skipHeaderRecord if {@code true} the header row will be skipped. - * @param allowMissingColumnNames if {@code true} the missing column names are allowed when parsing the header line. - * @param ignoreHeaderCase if {@code true} header names will be accessed ignoring case when parsing input. - * @param trim if {@code true} next record value will be trimmed. - * @param trailingDelimiter if {@code true} the trailing delimiter wil be added before record separator (if set). - * @param autoFlush if {@code true} the underlying stream will be flushed before closing. - * @param duplicateHeaderMode the behavior when handling duplicate headers. - * @param trailingData whether reading trailing data is allowed in records, helps Excel compatibility. - * @param lenientEof whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. - * @throws IllegalArgumentException if the delimiter is a line break character. - */ - private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape, - final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, - final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames, - final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush, - final DuplicateHeaderMode duplicateHeaderMode, final boolean trailingData, final boolean lenientEof) { - this.delimiter = delimiter; - this.quoteCharacter = quoteChar; - this.quoteMode = quoteMode; - this.commentMarker = commentStart; - this.escapeCharacter = escape; - this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; - this.allowMissingColumnNames = allowMissingColumnNames; - this.ignoreEmptyLines = ignoreEmptyLines; - this.recordSeparator = recordSeparator; - this.nullString = nullString; - this.headerComments = toStringArray(headerComments); - this.headers = clone(header); - this.skipHeaderRecord = skipHeaderRecord; - this.ignoreHeaderCase = ignoreHeaderCase; - this.lenientEof = lenientEof; - this.trailingData = trailingData; - this.trailingDelimiter = trailingDelimiter; - this.trim = trim; - this.autoFlush = autoFlush; - this.quotedNullString = quoteCharacter + nullString + quoteCharacter; - this.duplicateHeaderMode = duplicateHeaderMode; - validate(); - } - private void append(final char c, final Appendable appendable) throws IOException { - //try { - appendable.append(c); - //} catch (final IOException e) { - // throw new UncheckedIOException(e); - //} + // try { + appendable.append(c); + // } catch (final IOException e) { + // throw new UncheckedIOException(e); + // } } private void append(final CharSequence csq, final Appendable appendable) throws IOException { - //try { - appendable.append(csq); - //} catch (final IOException e) { - // throw new UncheckedIOException(e); - //} + // try { + appendable.append(csq); + // } catch (final IOException e) { + // throw new UncheckedIOException(e); + // } } /** @@ -1686,7 +1639,7 @@ public boolean equals(final Object obj) { } private void escape(final char c, final Appendable appendable) throws IOException { - append(escapeCharacter.charValue(), appendable); // N.B. Explicit (un)boxing is intentional + append(escapeCharacter.charValue(), appendable); // N.B. Explicit (un)boxing is intentional append(c, appendable); } @@ -1750,8 +1703,8 @@ public boolean getAutoFlush() { * Comments are printed first, before headers. * * - * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment - * marker written at the start of each comment line. + * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment marker written at the start of each comment + * line. *
** If the comment marker is not set, then the header comments are ignored. @@ -1759,13 +1712,14 @@ public boolean getAutoFlush() { *
* For example: *
+ * *- * builder.setCommentMarker('#') - * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); + * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); *** writes: *
+ * ** # Generated by Apache Commons CSV. * # 1970-01-01T00:00:00Z @@ -1823,7 +1777,7 @@ public DuplicateHeaderMode getDuplicateHeaderMode() { * @return the escape character, may be {@code 0} */ char getEscapeChar() { - return escapeCharacter != null ? escapeCharacter.charValue() : 0; // N.B. Explicit (un)boxing is intentional + return escapeCharacter != null ? escapeCharacter.charValue() : 0; // N.B. Explicit (un)boxing is intentional } /** @@ -1853,8 +1807,8 @@ public String[] getHeader() { * Comments are printed first, before headers. * *- * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment - * marker written at the start of each comment line. + * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment marker written at the start of each comment + * line. *
** If the comment marker is not set, then the header comments are ignored. @@ -1862,13 +1816,14 @@ public String[] getHeader() { *
* For example: *
+ * *- * builder.setCommentMarker('#') - * .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); + * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); *** writes: *
+ * ** # Generated by Apache Commons CSV. * # 1970-01-01T00:00:00Z @@ -1988,8 +1943,7 @@ public boolean getTrailingDelimiter() { } /** - * Gets whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by - * {CSVParser#addRecordValue(boolean)} + * Gets whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by {CSVParser#addRecordValue(boolean)} * * @return whether to trim leading and trailing blanks. */ @@ -2023,16 +1977,11 @@ public boolean isCommentMarkerSet() { /** * Tests whether the next characters constitute a delimiter * - * @param ch0 - * the first char (index 0). - * @param charSeq - * the match char sequence - * @param startIndex - * where start to match - * @param delimiter - * the delimiter - * @param delimiterLength - * the delimiter length + * @param ch0 the first char (index 0). + * @param charSeq the match char sequence + * @param startIndex where start to match + * @param delimiter the delimiter + * @param delimiterLength the delimiter length * @return true if the match is successful */ private boolean isDelimiter(final char ch0, final CharSequence charSeq, final int startIndex, final char[] delimiter, final int delimiterLength) { @@ -2087,7 +2036,7 @@ public boolean isQuoteCharacterSet() { * * @param reader the input stream * @return a parser over a stream of {@link CSVRecord}s. - * @throws IOException If an I/O error occurs + * @throws IOException If an I/O error occurs * @throws CSVException Thrown on invalid input. */ public CSVParser parse(final Reader reader) throws IOException { @@ -2134,7 +2083,7 @@ private void print(final InputStream inputStream, final Appendable out, final bo } final boolean quoteCharacterSet = isQuoteCharacterSet(); if (quoteCharacterSet) { - append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional + append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional } // Stream the input to the output without reading or holding the whole value in memory. // AppendableOutputStream cannot "close" an Appendable. @@ -2142,7 +2091,7 @@ private void print(final InputStream inputStream, final Appendable out, final bo IOUtils.copy(inputStream, outputStream); } if (quoteCharacterSet) { - append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional + append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional } } @@ -2203,8 +2152,7 @@ private synchronized void print(final Object object, final CharSequence value, f } /** - * Prints to the specified {@code Path} with given {@code Charset}, - * returns a {@code CSVPrinter} which the caller MUST close. + * Prints to the specified {@code Path} with given {@code Charset}, returns a {@code CSVPrinter} which the caller MUST close. * ** See also {@link CSVPrinter}. @@ -2276,8 +2224,8 @@ public synchronized void println(final Appendable appendable) throws IOException * the record, so there is no need to call {@link #println(Appendable)}. *
* - * @param appendable where to write. - * @param values values to output. + * @param appendable where to write. + * @param values values to output. * @throws IOException If an I/O error occurs. * @since 1.4 */ @@ -2392,7 +2340,7 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi final int len = charSeq.length(); final char[] delim = getDelimiterCharArray(); final int delimLength = delim.length; - final char quoteChar = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional + final char quoteChar = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional // If escape char not specified, default to the quote char // This avoids having to keep checking whether there is an escape character // at the cost of checking against quote twice @@ -2486,7 +2434,7 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi /** * Always use quotes unless QuoteMode is NONE, so we do not have to look ahead. * - * @param reader What to print + * @param reader What to print * @param appendable Where to print it * @throws IOException If an I/O error occurs */ @@ -2495,7 +2443,7 @@ private void printWithQuotes(final Reader reader, final Appendable appendable) t printWithEscapes(reader, appendable); return; } - final char quote = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional + final char quote = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional // (1) Append opening quote append(quote, appendable); // (2) Append Reader contents, doubling quotes @@ -2576,7 +2524,7 @@ private void validate() throws IllegalArgumentException { if (containsLineBreak(delimiter)) { throw new IllegalArgumentException("The delimiter cannot be a line break"); } - if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional + if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); } if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional @@ -2603,10 +2551,9 @@ private void validate() throws IllegalArgumentException { // Sanitize all empty headers to the empty string "" when checking duplicates final boolean containsHeader = !dupCheckSet.add(blank ? "" : header); if (containsHeader && !(blank && emptyDuplicatesAllowed)) { - throw new IllegalArgumentException( - String.format( - "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", - header, Arrays.toString(headers))); + throw new IllegalArgumentException(String.format( + "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", header, + Arrays.toString(headers))); } } }