From b98f7fe8197b2e4d46bfad160fc62603edb03a8c Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Thu, 2 Jan 2025 17:23:09 -0500 Subject: [PATCH] Simplify internals --- .../org/apache/commons/csv/CSVFormat.java | 221 +++++++----------- 1 file changed, 84 insertions(+), 137 deletions(-) diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index c83b0674f4..4990457bbe 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -156,18 +156,18 @@ * *

Serialization

*

- * This class implements the {@link Serializable} interface with the following caveats: + * This class implements the {@link Serializable} interface with the following caveats: *

* *

- * The {@code serialVersionUID} values are: + * The {@code serialVersionUID} values are: *

* * *

Notes

@@ -193,7 +193,14 @@ public static class Builder implements Supplier { * @return a copy of the builder */ public static Builder create() { - return new Builder(DEFAULT); + // @formatter:off + return new Builder() + .setDelimiter(Constants.COMMA) + .setRecordSeparator(Constants.CRLF) + .setQuote(Constants.DOUBLE_QUOTE_CHAR) + .setIgnoreEmptyLines(true) + .setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL); + // @formatter:on } /** @@ -248,6 +255,10 @@ public static Builder create(final CSVFormat csvFormat) { private boolean trim; + private Builder() { + // empty + } + private Builder(final CSVFormat csvFormat) { this.delimiter = csvFormat.delimiter; this.quoteCharacter = csvFormat.quoteCharacter; @@ -340,8 +351,7 @@ public Builder setAutoFlush(final boolean autoFlush) { * Comments are printed first, before headers. *

*

- * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of - * each comment line. + * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. *

*

* If the comment marker is not set, then the header comments are ignored. @@ -349,13 +359,14 @@ public Builder setAutoFlush(final boolean autoFlush) { *

* For example: *

+ * *
-         * builder.setCommentMarker('#')
-         *        .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+         * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
          * 
*

* writes: *

+ * *
          * # Generated by Apache Commons CSV.
          * # 1970-01-01T00:00:00Z
@@ -379,8 +390,7 @@ public Builder setCommentMarker(final char commentMarker) {
          * Comments are printed first, before headers.
          * 

*

- * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of - * each comment line. + * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. *

*

* If the comment marker is not set, then the header comments are ignored. @@ -388,13 +398,14 @@ public Builder setCommentMarker(final char commentMarker) { *

* For example: *

+ * *
-         * builder.setCommentMarker('#')
-         *        .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+         * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
          * 
*

* writes: *

+ * *
          * # Generated by Apache Commons CSV.
          * # 1970-01-01T00:00:00Z
@@ -447,8 +458,8 @@ public Builder setDelimiter(final String delimiter) {
          * @since 1.10.0
          */
         public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) {
-          this.duplicateHeaderMode = Objects.requireNonNull(duplicateHeaderMode, "duplicateHeaderMode");
-          return this;
+            this.duplicateHeaderMode = Objects.requireNonNull(duplicateHeaderMode, "duplicateHeaderMode");
+            return this;
         }
 
         /**
@@ -598,8 +609,7 @@ public Builder setHeader(final String... header) {
          * Comments are printed first, before headers.
          * 

*

- * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of - * each comment line. + * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. *

*

* If the comment marker is not set, then the header comments are ignored. @@ -607,13 +617,14 @@ public Builder setHeader(final String... header) { *

* For example: *

+ * *
-         * builder.setCommentMarker('#')
-         *        .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+         * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
          * 
*

* writes: *

+ * *
          * # Generated by Apache Commons CSV.
          * # 1970-01-01T00:00:00Z
@@ -636,8 +647,7 @@ public Builder setHeaderComments(final Object... headerComments) {
          * Comments are printed first, before headers.
          * 

*

- * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of - * each comment line. + * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. *

*

* If the comment marker is not set, then the header comments are ignored. @@ -645,13 +655,14 @@ public Builder setHeaderComments(final Object... headerComments) { *

* For example: *

+ * *
-         * builder.setCommentMarker('#')
-         *        .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0).toString());
+         * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0).toString());
          * 
*

* writes: *

+ * *
          * # Generated by Apache Commons CSV.
          * # 1970-01-01T00:00:00Z
@@ -956,8 +967,7 @@ public CSVFormat getFormat() {
     }
 
     /**
-     * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing
-     * empty lines.
+     * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines.
      *
      * 

* The {@link Builder} settings are: @@ -972,8 +982,7 @@ public CSVFormat getFormat() { * * @see Predefined#Default */ - public static final CSVFormat DEFAULT = new CSVFormat(Constants.COMMA, Constants.DOUBLE_QUOTE_CHAR, null, null, null, false, true, Constants.CRLF, null, - null, null, false, false, false, false, false, false, DuplicateHeaderMode.ALLOW_ALL, false, false); + public static final CSVFormat DEFAULT = new CSVFormat(Builder.create()); /** * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary @@ -1091,10 +1100,9 @@ public CSVFormat getFormat() { *

* As of 2024-04-05, the MongoDB documentation for {@code mongoimport} states: *

- *
The csv parser accepts that data that complies with RFC RFC-4180. - * As a result, backslashes are not a valid escape character. If you use double-quotes to enclose fields in the CSV data, you must escape - * internal double-quote marks by prepending another double-quote. - *
+ *
The csv parser accepts that data that complies with RFC RFC-4180. As a result, backslashes are + * not a valid escape character. If you use double-quotes to enclose fields in the CSV data, you must escape internal double-quote marks by prepending + * another double-quote.
*

* The {@link Builder} settings are: *

@@ -1366,7 +1374,7 @@ static T[] clone(final T... values) { /** * Returns true if the given string contains the search char. * - * @param source the string to check. + * @param source the string to check. * @param searchCh the character to search. * @return true if {@code c} contains a line break character */ @@ -1444,8 +1452,7 @@ private static boolean isTrimChar(final CharSequence charSequence, final int pos * @see #TDF */ public static CSVFormat newFormat(final char delimiter) { - return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false, - DuplicateHeaderMode.ALLOW_ALL, false, false); + return new CSVFormat(new Builder().setDelimiter(delimiter)); } static String[] toStringArray(final Object[] values) { @@ -1573,74 +1580,20 @@ private CSVFormat(final Builder builder) { validate(); } - /** - * Creates a customized CSV format. - * - * @param delimiter the char used for value separation, must not be a line break character. - * @param quoteChar the Character used as value encapsulation marker, may be {@code null} to disable. - * @param quoteMode the quote mode. - * @param commentStart the Character used for comment identification, may be {@code null} to disable. - * @param escape the Character used to escape special characters in values, may be {@code null} to disable. - * @param ignoreSurroundingSpaces {@code true} when whitespaces enclosing values should be ignored. - * @param ignoreEmptyLines {@code true} when the parser should skip empty lines. - * @param recordSeparator the line separator to use for output. - * @param nullString the String to convert to and from {@code null}. - * @param headerComments the comments to be printed by the Printer before the actual CSV data. - * @param header the header. - * @param skipHeaderRecord if {@code true} the header row will be skipped. - * @param allowMissingColumnNames if {@code true} the missing column names are allowed when parsing the header line. - * @param ignoreHeaderCase if {@code true} header names will be accessed ignoring case when parsing input. - * @param trim if {@code true} next record value will be trimmed. - * @param trailingDelimiter if {@code true} the trailing delimiter wil be added before record separator (if set). - * @param autoFlush if {@code true} the underlying stream will be flushed before closing. - * @param duplicateHeaderMode the behavior when handling duplicate headers. - * @param trailingData whether reading trailing data is allowed in records, helps Excel compatibility. - * @param lenientEof whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. - * @throws IllegalArgumentException if the delimiter is a line break character. - */ - private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape, - final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, - final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames, - final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush, - final DuplicateHeaderMode duplicateHeaderMode, final boolean trailingData, final boolean lenientEof) { - this.delimiter = delimiter; - this.quoteCharacter = quoteChar; - this.quoteMode = quoteMode; - this.commentMarker = commentStart; - this.escapeCharacter = escape; - this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; - this.allowMissingColumnNames = allowMissingColumnNames; - this.ignoreEmptyLines = ignoreEmptyLines; - this.recordSeparator = recordSeparator; - this.nullString = nullString; - this.headerComments = toStringArray(headerComments); - this.headers = clone(header); - this.skipHeaderRecord = skipHeaderRecord; - this.ignoreHeaderCase = ignoreHeaderCase; - this.lenientEof = lenientEof; - this.trailingData = trailingData; - this.trailingDelimiter = trailingDelimiter; - this.trim = trim; - this.autoFlush = autoFlush; - this.quotedNullString = quoteCharacter + nullString + quoteCharacter; - this.duplicateHeaderMode = duplicateHeaderMode; - validate(); - } - private void append(final char c, final Appendable appendable) throws IOException { - //try { - appendable.append(c); - //} catch (final IOException e) { - // throw new UncheckedIOException(e); - //} + // try { + appendable.append(c); + // } catch (final IOException e) { + // throw new UncheckedIOException(e); + // } } private void append(final CharSequence csq, final Appendable appendable) throws IOException { - //try { - appendable.append(csq); - //} catch (final IOException e) { - // throw new UncheckedIOException(e); - //} + // try { + appendable.append(csq); + // } catch (final IOException e) { + // throw new UncheckedIOException(e); + // } } /** @@ -1686,7 +1639,7 @@ public boolean equals(final Object obj) { } private void escape(final char c, final Appendable appendable) throws IOException { - append(escapeCharacter.charValue(), appendable); // N.B. Explicit (un)boxing is intentional + append(escapeCharacter.charValue(), appendable); // N.B. Explicit (un)boxing is intentional append(c, appendable); } @@ -1750,8 +1703,8 @@ public boolean getAutoFlush() { * Comments are printed first, before headers. *

*

- * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment - * marker written at the start of each comment line. + * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment marker written at the start of each comment + * line. *

*

* If the comment marker is not set, then the header comments are ignored. @@ -1759,13 +1712,14 @@ public boolean getAutoFlush() { *

* For example: *

+ * *
-     * builder.setCommentMarker('#')
-     *        .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+     * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
      * 
*

* writes: *

+ * *
      * # Generated by Apache Commons CSV.
      * # 1970-01-01T00:00:00Z
@@ -1823,7 +1777,7 @@ public DuplicateHeaderMode getDuplicateHeaderMode() {
      * @return the escape character, may be {@code 0}
      */
     char getEscapeChar() {
-        return escapeCharacter != null ? escapeCharacter.charValue() : 0;  // N.B. Explicit (un)boxing is intentional
+        return escapeCharacter != null ? escapeCharacter.charValue() : 0; // N.B. Explicit (un)boxing is intentional
     }
 
     /**
@@ -1853,8 +1807,8 @@ public String[] getHeader() {
      * Comments are printed first, before headers.
      * 

*

- * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment - * marker written at the start of each comment line. + * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment marker written at the start of each comment + * line. *

*

* If the comment marker is not set, then the header comments are ignored. @@ -1862,13 +1816,14 @@ public String[] getHeader() { *

* For example: *

+ * *
-     * builder.setCommentMarker('#')
-     *        .setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
+     * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0));
      * 
*

* writes: *

+ * *
      * # Generated by Apache Commons CSV.
      * # 1970-01-01T00:00:00Z
@@ -1988,8 +1943,7 @@ public boolean getTrailingDelimiter() {
     }
 
     /**
-     * Gets whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by
-     * {CSVParser#addRecordValue(boolean)}
+     * Gets whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by {CSVParser#addRecordValue(boolean)}
      *
      * @return whether to trim leading and trailing blanks.
      */
@@ -2023,16 +1977,11 @@ public boolean isCommentMarkerSet() {
     /**
      * Tests whether the next characters constitute a delimiter
      *
-     * @param ch0
-     *            the first char (index 0).
-     * @param charSeq
-     *            the match char sequence
-     * @param startIndex
-     *            where start to match
-     * @param delimiter
-     *            the delimiter
-     * @param delimiterLength
-     *            the delimiter length
+     * @param ch0             the first char (index 0).
+     * @param charSeq         the match char sequence
+     * @param startIndex      where start to match
+     * @param delimiter       the delimiter
+     * @param delimiterLength the delimiter length
      * @return true if the match is successful
      */
     private boolean isDelimiter(final char ch0, final CharSequence charSeq, final int startIndex, final char[] delimiter, final int delimiterLength) {
@@ -2087,7 +2036,7 @@ public boolean isQuoteCharacterSet() {
      *
      * @param reader the input stream
      * @return a parser over a stream of {@link CSVRecord}s.
-     * @throws IOException If an I/O error occurs
+     * @throws IOException  If an I/O error occurs
      * @throws CSVException Thrown on invalid input.
      */
     public CSVParser parse(final Reader reader) throws IOException {
@@ -2134,7 +2083,7 @@ private void print(final InputStream inputStream, final Appendable out, final bo
         }
         final boolean quoteCharacterSet = isQuoteCharacterSet();
         if (quoteCharacterSet) {
-            append(getQuoteCharacter().charValue(), out);  // N.B. Explicit (un)boxing is intentional
+            append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional
         }
         // Stream the input to the output without reading or holding the whole value in memory.
         // AppendableOutputStream cannot "close" an Appendable.
@@ -2142,7 +2091,7 @@ private void print(final InputStream inputStream, final Appendable out, final bo
             IOUtils.copy(inputStream, outputStream);
         }
         if (quoteCharacterSet) {
-            append(getQuoteCharacter().charValue(), out);  // N.B. Explicit (un)boxing is intentional
+            append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional
         }
     }
 
@@ -2203,8 +2152,7 @@ private synchronized void print(final Object object, final CharSequence value, f
     }
 
     /**
-     * Prints to the specified {@code Path} with given {@code Charset},
-     * returns a {@code CSVPrinter} which the caller MUST close.
+     * Prints to the specified {@code Path} with given {@code Charset}, returns a {@code CSVPrinter} which the caller MUST close.
      *
      * 

* See also {@link CSVPrinter}. @@ -2276,8 +2224,8 @@ public synchronized void println(final Appendable appendable) throws IOException * the record, so there is no need to call {@link #println(Appendable)}. *

* - * @param appendable where to write. - * @param values values to output. + * @param appendable where to write. + * @param values values to output. * @throws IOException If an I/O error occurs. * @since 1.4 */ @@ -2392,7 +2340,7 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi final int len = charSeq.length(); final char[] delim = getDelimiterCharArray(); final int delimLength = delim.length; - final char quoteChar = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional + final char quoteChar = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional // If escape char not specified, default to the quote char // This avoids having to keep checking whether there is an escape character // at the cost of checking against quote twice @@ -2486,7 +2434,7 @@ private void printWithQuotes(final Object object, final CharSequence charSeq, fi /** * Always use quotes unless QuoteMode is NONE, so we do not have to look ahead. * - * @param reader What to print + * @param reader What to print * @param appendable Where to print it * @throws IOException If an I/O error occurs */ @@ -2495,7 +2443,7 @@ private void printWithQuotes(final Reader reader, final Appendable appendable) t printWithEscapes(reader, appendable); return; } - final char quote = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional + final char quote = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional // (1) Append opening quote append(quote, appendable); // (2) Append Reader contents, doubling quotes @@ -2576,7 +2524,7 @@ private void validate() throws IllegalArgumentException { if (containsLineBreak(delimiter)) { throw new IllegalArgumentException("The delimiter cannot be a line break"); } - if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional + if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); } if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional @@ -2603,10 +2551,9 @@ private void validate() throws IllegalArgumentException { // Sanitize all empty headers to the empty string "" when checking duplicates final boolean containsHeader = !dupCheckSet.add(blank ? "" : header); if (containsHeader && !(blank && emptyDuplicatesAllowed)) { - throw new IllegalArgumentException( - String.format( - "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", - header, Arrays.toString(headers))); + throw new IllegalArgumentException(String.format( + "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", header, + Arrays.toString(headers))); } } }