Skip to content

Commit

Permalink
Update Java API with 'skipFileChecks' and 'maxCharsPerCell'
Browse files Browse the repository at this point in the history
  • Loading branch information
techncl committed Feb 4, 2025
1 parent 7f77b61 commit f5f44d9
Show file tree
Hide file tree
Showing 5 changed files with 357 additions and 35 deletions.
66 changes: 47 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,35 +41,63 @@ If you wish to use the CSV Validator from your own Java project, we provide a na
<dependency>
<groupId>uk.gov.nationalarchives</groupId>
<artifactId>csv-validator-java-api</artifactId>
<version>1.3.0</version>
<version>1.4.0</version>
</dependency>
```

The Javadoc, can be found in either Maven Central or you can build it locally by executing `mvn javadoc:javadoc`.

Example Java code of using the CSV Validator through the Java API:
```java
Boolean failFast = false;
List<Substitution> pathSubstitutions = new ArrayList<Substitution>();

List<FailMessage> messages = CsvValidator.validate(
"/data/csv/data.csv",
"/data/csv/data-schema.csvs",
failFast,
pathSubstitutions,
true,
false);
Charset csvEncoding = JCharset.forName("UTF-8"); // default is UTF-8
boolean validateCsvEncoding = true;
Charset csvSchemaEncoding = JCharset.forName("UTF-8"); // default is UTF-8
boolean failFast = true; // default is false
List<Substitution> pathSubstitutions = new ArrayList<Substitution>(); // default is any empty ArrayList
boolean enforceCaseSensitivePathChecks = true; // default is false
boolean trace = false; // default is false
ProgressCallback progress; // default is null
boolean skipFileChecks = true; // default is false
int maxCharsPerCell = 8096; // default is 4096

// add a substitution path
pathSubstitutions.add(new Substitution("file://something", "/home/xxx"));

CsvValidator.ValidatorBuilder validateWithStringNames = new CsvValidator.ValidatorBuilder(
"/home/dev/IdeaProjects/csv/csv-validator/csv-validator-core/data.csv",
"/home/dev/IdeaProjects/csv/csv-validator/csv-validator-core/data-schema.csvs"
)

// alternatively, you can pass in Readers for each file
Reader csvReader = new Reader();
Reader csvSchemaReader = new Reader();
CsvValidator.ValidatorBuilder validateWithReaders = new CsvValidator.ValidatorBuilder(
csvReader, csvSchemaReader
)

List<FailMessage> messages = validateWithStringNames
.usingCsvEncoding(csvEncoding, validateCsvEncoding) // should only be `true` if using UTF-8 encoding, otherwise it will throw an exception
.usingCsvSchemaEncoding(csvSchemaEncoding)
.usingFailFast(failFast)
.usingPathSubstitutions(pathSubstitutions)
.usingEnforceCaseSensitivePathChecks(enforceCaseSensitivePathChecks)
.usingTrace(trace)
.usingProgress(progress)
.usingSkipFileChecks(skipFileChecks)
.usingMaxCharsPerCell(maxCharsPerCell)
.runValidation();

if(messages.isEmpty()) {
System.out.println("Completed validation OK");
System.out.println("All worked OK");
} else {
for(FailMessage message : messages) {
if(message instanceof WarningMessage) {
System.out.println("[WARN] " + message.getMessage());
} else {
System.out.println("[ERROR] " + message.getMessage());
}
}
for(FailMessage message : messages) {
if(message instanceof WarningMessage) {
System.out.println("Warning: " + message.getMessage());
} else {
System.out.println("Error: " + message.getMessage());
}
}
}
}
```

Expand Down
5 changes: 5 additions & 0 deletions csv-validator-java-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@
<artifactId>csv-validator-core</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,52 @@

import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import static uk.gov.nationalarchives.csv.validator.api.CsvValidator$.MODULE$;
import uk.gov.nationalarchives.csv.validator.api.java.CsvValidatorJavaBridge.*;

/**
* Validate that a csv file matches a format specified a csv schema.
* This is a Java wrapper calling the main Scala application.
*
* <p> A typical invocation sequence:</p>
* <blockquote><pre>{@code
* Boolean failFast = false;
* List<Substitution> pathSubstitutions = new ArrayList<Substitution>();
* Charset csvEncoding = JCharset.forName("UTF-8"); // default is UTF-8
* Charset csvSchemaEncoding = JCharset.forName("UTF-8"); // default is UTF-8
* boolean failFast = true; // default is false
* List<Substitution> pathSubstitutions = new ArrayList<Substitution>(); // default is any empty ArrayList
* boolean enforceCaseSensitivePathChecks = true; // default is false
* boolean trace = false; // default is false
* ProgressCallback progress; // default is null
* boolean skipFileChecks = true; // default is false
* int maxCharsPerCell = 8096; // default is 4096
*
* //add a substitution path
* pathSubstitutions.add(new Substitution("file://something", "/home/xxx"));
*
* List<FailMessage> messages = CsvValidator.validate(
* "/home/dev/IdeaProjects/csv/csv-validator/csv-validator-core/data.csv",
* "/home/dev/IdeaProjects/csv/csv-validator/csv-validator-core/data-schema.csvs",
* failFast,
* pathSubstitutions,
* true);
* CsvValidator.ValidatorBuilder validateWithStringNames = new CsvValidator.ValidatorBuilder(
* "/home/dev/IdeaProjects/csv/csv-validator/csv-validator-core/data.csv",
* "/home/dev/IdeaProjects/csv/csv-validator/csv-validator-core/data-schema.csvs"
* )
*
* // alternatively, you can pass in Readers for each file
* Reader csvReader = new Reader();
* Reader csvSchemaReader = new Reader();
* CsvValidator.ValidatorBuilder validateWithReaders = new CsvValidator.ValidatorBuilder(
* csvReader, csvSchemaReader
* )
*
* List<FailMessage> messages = validateWithStringNames
* .csvEncoding()
* .csvSchemaEncoding()
* .failFast(failFast)
* .pathSubstitutions(pathSubstitutions)
* .enforceCaseSensitivePathChecks(enforceCaseSensitivePathChecks)
* .trace(trace)
* .progress(progress)
* .skipFileChecks(skipFileChecks)
* .maxCharsPerCell(maxCharsPerCell)
*
* if(messages.isEmpty()) {
* System.out.println("All worked OK");
Expand Down Expand Up @@ -63,6 +88,7 @@ public class CsvValidator {
*
* @return empty list of (if there are no errors), or list of error strings.
*/
@Deprecated
public static List<FailMessage> validate(final String csvFilename, final String csvSchemaFilename, final boolean failFast, final List<Substitution> pathSubstitutions, final Boolean enforceCaseSensitivePathChecks, final Boolean trace) {
return validate(csvFilename, MODULE$.DEFAULT_ENCODING(), csvSchemaFilename, MODULE$.DEFAULT_ENCODING(), failFast, pathSubstitutions, enforceCaseSensitivePathChecks, trace);
}
Expand All @@ -82,6 +108,7 @@ public static List<FailMessage> validate(final String csvFilename, final String
*
* @return empty list of (if there are no errors), or list of error strings.
*/
@Deprecated
public static List<FailMessage> validate(final String csvFilename, final Charset csvEncoding, final String csvSchemaFilename, final Charset csvSchemaEncoding, final boolean failFast, final List<Substitution> pathSubstitutions, final Boolean enforceCaseSensitivePathChecks, final Boolean trace) {
return CsvValidatorJavaBridge.validate(csvFilename, csvEncoding, csvSchemaFilename, csvSchemaEncoding, failFast, pathSubstitutions, enforceCaseSensitivePathChecks, trace);
}
Expand All @@ -104,6 +131,7 @@ public static List<FailMessage> validate(final String csvFilename, final Charset
*
* @return empty list of (if there are no errors), or list of error strings.
*/
@Deprecated
public static List<FailMessage> validate(final String csvFilename, final String csvSchemaFilename, final boolean failFast, final List<Substitution> pathSubstitutions, final Boolean enforceCaseSensitivePathChecks, final Boolean trace, final ProgressCallback progress) {
return validate(csvFilename, MODULE$.DEFAULT_ENCODING(), csvSchemaFilename, MODULE$.DEFAULT_ENCODING(), failFast, pathSubstitutions, enforceCaseSensitivePathChecks, trace, progress);
}
Expand All @@ -125,6 +153,7 @@ public static List<FailMessage> validate(final String csvFilename, final String
*
* @return empty list of (if there are no errors), or list of error strings.
*/
@Deprecated
public static List<FailMessage> validate(final String csvFilename, final Charset csvEncoding, final String csvSchemaFilename, final Charset csvSchemaEncoding, final boolean failFast, final List<Substitution> pathSubstitutions, final Boolean enforceCaseSensitivePathChecks, final Boolean trace, final ProgressCallback progress) {
return CsvValidatorJavaBridge.validate(csvFilename, csvEncoding, csvSchemaFilename, csvSchemaEncoding, failFast, pathSubstitutions, enforceCaseSensitivePathChecks, trace, progress);
}
Expand All @@ -142,6 +171,7 @@ public static List<FailMessage> validate(final String csvFilename, final Charset
*
* @return empty list of (if there are no errors), or list of error strings.
*/
@Deprecated
public static List<FailMessage> validate(final Reader csvData, final Reader csvSchema, final boolean failFast, final List<Substitution> pathSubstitutions, final Boolean enforceCaseSensitivePathChecks, final Boolean trace) {
return CsvValidatorJavaBridge.validate(csvData, csvSchema, failFast, pathSubstitutions, enforceCaseSensitivePathChecks, trace);
}
Expand All @@ -161,8 +191,97 @@ public static List<FailMessage> validate(final Reader csvData, final Reader csvS
*
* @return empty list of (if there are no errors), or list of error strings.
*/
@Deprecated
public static List<FailMessage> validate(final Reader csvData, final Reader csvSchema, final boolean failFast, final List<Substitution> pathSubstitutions, final Boolean enforceCaseSensitivePathChecks, final Boolean trace, final ProgressCallback progress) {
return CsvValidatorJavaBridge.validate(csvData, csvSchema, failFast, pathSubstitutions, enforceCaseSensitivePathChecks, trace, progress);
}

static class ValidatorBuilder {
private String csvFileName;
private String csvSchemaFilename;
private Reader csvReader;
private Reader csvSchemaReader;
private Charset csvEncoding = MODULE$.DEFAULT_ENCODING();
private boolean validateUtf8Encoding = csvEncoding.name().equals("UTF-8");
private Charset csvSchemaEncoding = MODULE$.DEFAULT_ENCODING();
private boolean failFast = false;
private List<Substitution> pathSubstitutions = new ArrayList<>();
private boolean enforceCaseSensitivePathChecks = false;
private boolean trace = false;
private ProgressCallback progress;
private boolean skipFileChecks = false;
private int maxCharsPerCell = 4096;

private boolean textFileValidation = false;

public ValidatorBuilder(String csvFileName, String csvSchemaFilename) {
this.csvFileName = csvFileName;
this.csvSchemaFilename = csvSchemaFilename;
this.textFileValidation = true;
}

public ValidatorBuilder(Reader csvReader, Reader csvSchemaReader) {
this.csvReader = csvReader;
this.csvSchemaReader = csvSchemaReader;
}

public ValidatorBuilder usingCsvEncoding(Charset encoding, boolean validateUtf8Encoding) throws Exception {
if(!encoding.name().equals("UTF-8") && validateUtf8Encoding){
throw new Exception("'validateUtf8Encoding' is set to 'true' but " + encoding.name() + " charset was passed in");
}

this.csvEncoding = encoding;
this.validateUtf8Encoding = validateUtf8Encoding;
return this;
}

public ValidatorBuilder usingCsvSchemaEncoding(Charset schemaEncoding) {
this.csvSchemaEncoding = schemaEncoding;
return this;
}

public ValidatorBuilder usingFailFast(boolean failFast) {
this.failFast = failFast;
return this;
}

public ValidatorBuilder usingPathSubstitutions(List<Substitution> pathSubstitutions) {
this.pathSubstitutions = pathSubstitutions;
return this;
}

public ValidatorBuilder usingEnforceCaseSensitivePathChecks(boolean enforceCaseSensitivePathChecks) {
this.enforceCaseSensitivePathChecks = enforceCaseSensitivePathChecks;
return this;
}

public ValidatorBuilder usingTrace(boolean trace) {
this.trace = trace;
return this;
}

public ValidatorBuilder usingProgress(ProgressCallback progress) {
this.progress = progress;
return this;
}

public ValidatorBuilder usingSkipFileChecks(boolean skipFileChecks) {
this.skipFileChecks = skipFileChecks;
return this;
}

public ValidatorBuilder usingMaxCharsPerCell(int maxCharsPerCell) {
this.maxCharsPerCell = maxCharsPerCell;
return this;
}

public Result runValidation() {
if(textFileValidation) {
return CsvValidatorJavaBridge.validate(new ValidationRequest(this.csvFileName, this.csvEncoding, this.validateUtf8Encoding, this.csvSchemaFilename, this.csvSchemaEncoding, true, this.failFast, this.pathSubstitutions, this.enforceCaseSensitivePathChecks, this.trace, this.progress, this.skipFileChecks, this.maxCharsPerCell));
} else {
return CsvValidatorJavaBridge.validate(new ReaderValidationRequest(this.csvReader, this.csvSchemaReader, this.failFast, this.pathSubstitutions, this.enforceCaseSensitivePathChecks, this.trace, this.progress, this.skipFileChecks, this.maxCharsPerCell));
}
}
}
}

Loading

0 comments on commit f5f44d9

Please sign in to comment.