From 865aa48c74fc07e4357fbbf793b2775cfdbc9563 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Wed, 28 Mar 2018 13:17:07 -0700 Subject: [PATCH 1/5] Add vision ocr for pdf/tiff --- vision/v1p2beta1/README.md | 60 ++++++ vision/v1p2beta1/pom.xml | 103 +++++++++ .../main/java/com/example/vision/Detect.java | 201 ++++++++++++++++++ .../java/com/example/vision/DetectIT.java | 77 +++++++ 4 files changed, 441 insertions(+) create mode 100644 vision/v1p2beta1/README.md create mode 100644 vision/v1p2beta1/pom.xml create mode 100644 vision/v1p2beta1/src/main/java/com/example/vision/Detect.java create mode 100644 vision/v1p2beta1/src/test/java/com/example/vision/DetectIT.java diff --git a/vision/v1p2beta1/README.md b/vision/v1p2beta1/README.md new file mode 100644 index 00000000000..a6547b7ba2d --- /dev/null +++ b/vision/v1p2beta1/README.md @@ -0,0 +1,60 @@ +# OCR Feature Detection Sample + + +Open in Cloud Shell + +[Google Cloud Vision API][vision] provides OCR detection for PDF/TIFF documents. +This API is part of the larger collection of Cloud Machine Learning APIs. + +This sample Java application demonstrates how to access the Cloud Vision API +using the [Google Cloud Client Library for Java][google-cloud-java]. + +[vision]: https://cloud.google.com/vision/docs/ +[google-cloud-java]: https://github.com/GoogleCloudPlatform/google-cloud-java + +## Prerequisites + +### Download Maven + +This sample uses the [Apache Maven][maven] build system. Before getting started, be +sure to [download][maven-download] and [install][maven-install] it. When you use +Maven as described here, it will automatically download the needed client +libraries. + +[maven]: https://maven.apache.org +[maven-download]: https://maven.apache.org/download.cgi +[maven-install]: https://maven.apache.org/install.html + +### Setup + +* Create a project with the [Google Cloud Console][cloud-console], and enable + the [Vision API][vision-api]. +* Set up your environment with [Application Default Credentials][adc]. For + example, from the Cloud Console, you might create a service account, + download its json credentials file, then set the appropriate environment + variable: + + ```bash + export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your-project-credentials.json + ``` +* Build the sample + ``` + mvn clean package + ``` + +[cloud-console]: https://console.cloud.google.com +[vision-api]: https://console.cloud.google.com/apis/api/vision.googleapis.com/overview?project=_ +[adc]: https://cloud.google.com/docs/authentication#developer_workflow + +## Samples +You can then run `Detect` via: + +``` +mvn exec:java -DDetect -Dexec.args="arg1 'arg 2' arg3" +``` + +#### OCR +``` +mvn exec:java -DDetect -Dexec.args="ocr gs://java-docs-samples-testing/vision/HodgeConj.pdf \ + gs:///" +``` diff --git a/vision/v1p2beta1/pom.xml b/vision/v1p2beta1/pom.xml new file mode 100644 index 00000000000..13d1719f8cd --- /dev/null +++ b/vision/v1p2beta1/pom.xml @@ -0,0 +1,103 @@ + + + 4.0.0 + com.example.vision + vision-detect-v1p2beta1 + jar + + + + com.google.cloud.samples + shared-configuration + 1.0.8 + + + + 1.8 + 1.8 + UTF-8 + + + + + + com.google.cloud + google-cloud-vision + 1.24.0 + + + com.google.cloud + google-cloud-storage + 1.14.0 + + + com.google.protobuf + protobuf-java + 2.5.0 + + + + + + junit + junit + 4.12 + test + + + + com.google.truth + truth + 0.39 + test + + + + + + Detect + + + Detect + + + + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + + + java + + + + + com.example.vision.Detect + false + + + + + + + diff --git a/vision/v1p2beta1/src/main/java/com/example/vision/Detect.java b/vision/v1p2beta1/src/main/java/com/example/vision/Detect.java new file mode 100644 index 00000000000..7d2e0ba8407 --- /dev/null +++ b/vision/v1p2beta1/src/main/java/com/example/vision/Detect.java @@ -0,0 +1,201 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.vision; + +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.Storage.BlobListOption; +import com.google.cloud.storage.StorageOptions; +import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse; +import com.google.cloud.vision.v1p2beta1.AnnotateImageResponse; +import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileRequest; +import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileResponse; +import com.google.cloud.vision.v1p2beta1.AsyncBatchAnnotateFilesResponse; +import com.google.cloud.vision.v1p2beta1.Feature; +import com.google.cloud.vision.v1p2beta1.Feature.Type; +import com.google.cloud.vision.v1p2beta1.GcsDestination; +import com.google.cloud.vision.v1p2beta1.GcsSource; +import com.google.cloud.vision.v1p2beta1.ImageAnnotatorClient; +import com.google.cloud.vision.v1p2beta1.InputConfig; +import com.google.cloud.vision.v1p2beta1.OperationMetadata; +import com.google.cloud.vision.v1p2beta1.OutputConfig; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class Detect { + + /** + * Performs OCR detection on PDF/TIFF documents on Cloud Storage using the Vision API. + * + * @throws Exception on errors while closing the client. + * @throws IOException on Input/Output errors. + */ + public static void main(String[] args) throws Exception { + argsHelper(args, System.out); + } + + /** + * Helper that handles the input passed to the program. + * + * @throws Exception on errors while closing the client. + * @throws IOException on Input/Output errors. + */ + public static void argsHelper(String[] args, PrintStream out) throws Exception { + if (args.length < 1) { + out.println("Usage:"); + out.printf( + "\tmvn exec:java -DDetect -Dexec.args=\"ocr \"\n" + + "Commands:\n" + + "\tocr\n" + + "Path to File:\n\tA path to the remote file on Cloud Storage (gs://...)\n" + + "Path to Destination\n\tA path to the remote destination on Cloud Storage for the" + + " file to be saved. (gs://BUCKET_NAME/PREFIX/)\n"); + return; + } + String command = args[0]; + String path = args.length > 1 ? args[1] : ""; + + if (command.equals("ocr")) { + String destPath = args.length > 2 ? args[2] : ""; + detectDocumentsGcs(path, destPath); + } + } + + // [START vision_async_detect_document_ocr] + /** + * Performs document text OCR with PDF/TIFF as source files on Google Cloud Storage. + * + * @param gcsSourcePath The path to the remote file on Google Cloud Storage to detect document + * text on. + * @param gcsDestinationPath The path to the remote file on Google Cloud Storage to store the + * results on. + * @throws Exception on errors while closing the client. + */ + public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath) throws + Exception { + try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) { + List requests = new ArrayList<>(); + + // Set the GCS source path for the remote file. + GcsSource gcsSource = GcsSource.newBuilder() + .setUri(gcsSourcePath) + .build(); + + // Create the configuration with the specified MIME (Multipurpose Internet Mail Extensions) + // types + InputConfig inputConfig = InputConfig.newBuilder() + .setMimeType("application/pdf") // Supported MimeTypes: "application/pdf", "image/tiff" + .setGcsSource(gcsSource) + .build(); + + // Set the GCS destination path for where to save the results. + GcsDestination gcsDestination = GcsDestination.newBuilder() + .setUri(gcsDestinationPath) + .build(); + + // Create the configuration for the output with the batch size. + // The batch size sets how many pages should be grouped into each json output file. + OutputConfig outputConfig = OutputConfig.newBuilder() + .setBatchSize(2) + .setGcsDestination(gcsDestination) + .build(); + + // Select the Feature required by the vision API + Feature feature = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build(); + + // Build the OCR request + AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder() + .addFeatures(feature) + .setInputConfig(inputConfig) + .setOutputConfig(outputConfig) + .build(); + + requests.add(request); + + // Perform the OCR request + OperationFuture response = + client.asyncBatchAnnotateFilesAsync(requests); + + System.out.println("Waiting for the operation to finish."); + + // Wait for the request to finish. (The result is not used, since the API saves the result to + // the specified location on GCS.) + List result = response.get(90, TimeUnit.SECONDS) + .getResponsesList(); + + // Once the request has completed and the output has been + // written to GCS, we can list all the output files. + Storage storage = StorageOptions.getDefaultInstance().getService(); + + // Get the destination location from the gcsDestinationPath + Pattern pattern = Pattern.compile("gs://([^/]+)/(.+)"); + Matcher matcher = pattern.matcher(gcsDestinationPath); + + if (matcher.find()) { + String bucketName = matcher.group(1); + String prefix = matcher.group(2); + + // Get the list of objects with the given prefix from the GCS bucket + Bucket bucket = storage.get(bucketName); + com.google.api.gax.paging.Page pageList = bucket.list(BlobListOption.prefix(prefix)); + + Blob firstOutputFile = null; + + // List objects with the given prefix. + System.out.println("Output files:"); + for (Blob blob : pageList.iterateAll()) { + System.out.println(blob.getName()); + + // Process the first output file from GCS. + // Since we specified batch size = 2, the first response contains + // the first two pages of the input file. + if (firstOutputFile == null) { + firstOutputFile = blob; + } + } + + // Get the contents of the file and convert the JSON contents to an AnnotateFileResponse + // object. If the Blob is small read all its content in one request + // (Note: the file is a .json file) + // Storage guide: https://cloud.google.com/storage/docs/downloading-objects + AnnotateFileResponse annotateFileResponse = AnnotateFileResponse.newBuilder() + .mergeFrom(firstOutputFile.getContent()) + .build(); + + // Parse through the object to get the actual response for the first page of the input file. + AnnotateImageResponse annotateImageResponse = annotateFileResponse.getResponses(0); + + // Here we print the full text from the first page. + // The response contains more information: + // annotation/pages/blocks/paragraphs/words/symbols + // including confidence score and bounding boxes + System.out.format("\nText: %s\n", annotateImageResponse.getFullTextAnnotation()); + } else { + System.out.println("No MATCH"); + } + } + } + // [END vision_async_detect_document_ocr] +} diff --git a/vision/v1p2beta1/src/test/java/com/example/vision/DetectIT.java b/vision/v1p2beta1/src/test/java/com/example/vision/DetectIT.java new file mode 100644 index 00000000000..3cc08975d82 --- /dev/null +++ b/vision/v1p2beta1/src/test/java/com/example/vision/DetectIT.java @@ -0,0 +1,77 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.vision; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.api.gax.paging.Page; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.Storage.BlobListOption; +import com.google.cloud.storage.StorageOptions; +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for vision "Detect" sample. */ +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class DetectIT { + private ByteArrayOutputStream bout; + private PrintStream out; + private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); + private static final String BUCKET = PROJECT_ID; + private static final String OUTPUT_PREFIX = "OCR_PDF_TEST_OUTPUT"; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + } + + @After + public void tearDown() { + System.setOut(null); + } + + @Test + public void testDetectDocumentsGcs() throws Exception { + // Act + String[] args = {"ocr", "gs://" + BUCKET + "/vision/HodgeConj.pdf", + "gs://" + BUCKET + "/" + OUTPUT_PREFIX + "/"}; + Detect.argsHelper(args, out); + + // Assert + String got = bout.toString(); + assertThat(got).contains("HODGE'S GENERAL CONJECTURE"); + + Storage storage = StorageOptions.getDefaultInstance().getService(); + + Page blobs = storage.list(BUCKET, BlobListOption.currentDirectory(), + BlobListOption.prefix(OUTPUT_PREFIX + "/")); + + for (Blob blob : blobs.iterateAll()) { + blob.delete(); + } + } +} From 7b5092f6cb153fff50b9366feeb983ae4882bee3 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Mon, 2 Apr 2018 15:45:36 -0700 Subject: [PATCH 2/5] Update samples with latest library --- pom.xml | 1 + vision/v1p2beta1/pom.xml | 9 ++------ .../main/java/com/example/vision/Detect.java | 22 ++++++++++++++----- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/pom.xml b/pom.xml index 8711cd723ee..e962acaa8b1 100644 --- a/pom.xml +++ b/pom.xml @@ -98,6 +98,7 @@ vision/beta/cloud-client vision/cloud-client vision/face-detection + vision/v1p2beta1 vision/label vision/landmark-detection vision/text diff --git a/vision/v1p2beta1/pom.xml b/vision/v1p2beta1/pom.xml index 13d1719f8cd..6cf76499817 100644 --- a/vision/v1p2beta1/pom.xml +++ b/vision/v1p2beta1/pom.xml @@ -40,17 +40,12 @@ com.google.cloud google-cloud-vision - 1.24.0 + 1.24.1 com.google.cloud google-cloud-storage - 1.14.0 - - - com.google.protobuf - protobuf-java - 2.5.0 + 1.24.1 diff --git a/vision/v1p2beta1/src/main/java/com/example/vision/Detect.java b/vision/v1p2beta1/src/main/java/com/example/vision/Detect.java index 7d2e0ba8407..30805924ede 100644 --- a/vision/v1p2beta1/src/main/java/com/example/vision/Detect.java +++ b/vision/v1p2beta1/src/main/java/com/example/vision/Detect.java @@ -18,11 +18,13 @@ import com.google.api.gax.longrunning.OperationFuture; import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Blob.BlobSourceOption; import com.google.cloud.storage.Bucket; import com.google.cloud.storage.Storage; import com.google.cloud.storage.Storage.BlobListOption; import com.google.cloud.storage.StorageOptions; import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse; +import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse.Builder; import com.google.cloud.vision.v1p2beta1.AnnotateImageResponse; import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileRequest; import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileResponse; @@ -36,9 +38,14 @@ import com.google.cloud.vision.v1p2beta1.OperationMetadata; import com.google.cloud.vision.v1p2beta1.OutputConfig; +import com.google.protobuf.util.JsonFormat; +import com.google.protobuf.util.JsonFormat.Parser; +import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.InputStream; import java.io.PrintStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; @@ -118,7 +125,7 @@ public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinatio // Create the configuration for the output with the batch size. // The batch size sets how many pages should be grouped into each json output file. OutputConfig outputConfig = OutputConfig.newBuilder() - .setBatchSize(2) + .setBatchSize(1) .setGcsDestination(gcsDestination) .build(); @@ -142,7 +149,7 @@ public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinatio // Wait for the request to finish. (The result is not used, since the API saves the result to // the specified location on GCS.) - List result = response.get(90, TimeUnit.SECONDS) + List result = response.get(180, TimeUnit.SECONDS) .getResponsesList(); // Once the request has completed and the output has been @@ -180,9 +187,12 @@ public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinatio // object. If the Blob is small read all its content in one request // (Note: the file is a .json file) // Storage guide: https://cloud.google.com/storage/docs/downloading-objects - AnnotateFileResponse annotateFileResponse = AnnotateFileResponse.newBuilder() - .mergeFrom(firstOutputFile.getContent()) - .build(); + String jsonContents = new String(firstOutputFile.getContent()); + Builder builder = AnnotateFileResponse.newBuilder(); + JsonFormat.parser().merge(jsonContents, builder); + + // Build the AnnotateFileResponse object + AnnotateFileResponse annotateFileResponse = builder.build(); // Parse through the object to get the actual response for the first page of the input file. AnnotateImageResponse annotateImageResponse = annotateFileResponse.getResponses(0); @@ -191,7 +201,7 @@ public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinatio // The response contains more information: // annotation/pages/blocks/paragraphs/words/symbols // including confidence score and bounding boxes - System.out.format("\nText: %s\n", annotateImageResponse.getFullTextAnnotation()); + System.out.format("\nText: %s\n", annotateImageResponse.getFullTextAnnotation().getText()); } else { System.out.println("No MATCH"); } From 57eb9588deac51da93aea64793e8cfc36a6372ef Mon Sep 17 00:00:00 2001 From: nnegrey Date: Tue, 3 Apr 2018 13:26:26 -0700 Subject: [PATCH 3/5] Move samples into beta directory --- vision/beta/cloud-client/README.md | 6 + vision/beta/cloud-client/pom.xml | 7 +- .../main/java/com/example/vision/Detect.java | 210 ++++++++++++++--- .../java/com/example/vision/DetectIT.java | 27 +++ vision/v1p2beta1/README.md | 60 ----- vision/v1p2beta1/pom.xml | 98 -------- .../main/java/com/example/vision/Detect.java | 211 ------------------ .../java/com/example/vision/DetectIT.java | 77 ------- 8 files changed, 217 insertions(+), 479 deletions(-) delete mode 100644 vision/v1p2beta1/README.md delete mode 100644 vision/v1p2beta1/pom.xml delete mode 100644 vision/v1p2beta1/src/main/java/com/example/vision/Detect.java delete mode 100644 vision/v1p2beta1/src/test/java/com/example/vision/DetectIT.java diff --git a/vision/beta/cloud-client/README.md b/vision/beta/cloud-client/README.md index 108cfbdff58..6b25a0cc0d6 100644 --- a/vision/beta/cloud-client/README.md +++ b/vision/beta/cloud-client/README.md @@ -88,3 +88,9 @@ mvn exec:java -DDetect -Dexec.args="web-entities-include-geo ./resources/landmar ``` mvn exec:java -DDetect -Dexec.args="crop ./resources/landmark.jpg" ``` + +#### OCR +``` +mvn exec:java -DDetect -Dexec.args="ocr gs://java-docs-samples-testing/vision/HodgeConj.pdf \ + gs:///" +``` diff --git a/vision/beta/cloud-client/pom.xml b/vision/beta/cloud-client/pom.xml index 432a81a4ac5..b9424c24968 100644 --- a/vision/beta/cloud-client/pom.xml +++ b/vision/beta/cloud-client/pom.xml @@ -40,7 +40,12 @@ com.google.cloud google-cloud-vision - 1.22.0 + 1.24.1 + + + com.google.cloud + google-cloud-storage + 1.24.1 diff --git a/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java b/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java index f42323a6aa2..7b928ce6d52 100644 --- a/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java +++ b/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java @@ -16,37 +16,53 @@ package com.example.vision; -import com.google.cloud.vision.v1p1beta1.AnnotateImageRequest; -import com.google.cloud.vision.v1p1beta1.AnnotateImageResponse; -import com.google.cloud.vision.v1p1beta1.BatchAnnotateImagesResponse; -import com.google.cloud.vision.v1p1beta1.Block; -import com.google.cloud.vision.v1p1beta1.ColorInfo; -import com.google.cloud.vision.v1p1beta1.CropHint; -import com.google.cloud.vision.v1p1beta1.CropHintsAnnotation; -import com.google.cloud.vision.v1p1beta1.DominantColorsAnnotation; -import com.google.cloud.vision.v1p1beta1.EntityAnnotation; -import com.google.cloud.vision.v1p1beta1.FaceAnnotation; -import com.google.cloud.vision.v1p1beta1.Feature; -import com.google.cloud.vision.v1p1beta1.Feature.Type; -import com.google.cloud.vision.v1p1beta1.Image; -import com.google.cloud.vision.v1p1beta1.ImageAnnotatorClient; -import com.google.cloud.vision.v1p1beta1.ImageContext; -import com.google.cloud.vision.v1p1beta1.ImageSource; -import com.google.cloud.vision.v1p1beta1.LocationInfo; -import com.google.cloud.vision.v1p1beta1.Page; -import com.google.cloud.vision.v1p1beta1.Paragraph; -import com.google.cloud.vision.v1p1beta1.SafeSearchAnnotation; -import com.google.cloud.vision.v1p1beta1.Symbol; -import com.google.cloud.vision.v1p1beta1.TextAnnotation; -import com.google.cloud.vision.v1p1beta1.WebDetection; -import com.google.cloud.vision.v1p1beta1.WebDetection.WebEntity; -import com.google.cloud.vision.v1p1beta1.WebDetection.WebImage; -import com.google.cloud.vision.v1p1beta1.WebDetection.WebLabel; -import com.google.cloud.vision.v1p1beta1.WebDetection.WebPage; -import com.google.cloud.vision.v1p1beta1.WebDetectionParams; -import com.google.cloud.vision.v1p1beta1.Word; - +import com.google.api.gax.longrunning.OperationFuture; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.Storage.BlobListOption; +import com.google.cloud.storage.StorageOptions; +import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse; +import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse.Builder; +import com.google.cloud.vision.v1p2beta1.AnnotateImageRequest; +import com.google.cloud.vision.v1p2beta1.AnnotateImageResponse; +import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileRequest; +import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileResponse; +import com.google.cloud.vision.v1p2beta1.AsyncBatchAnnotateFilesResponse; +import com.google.cloud.vision.v1p2beta1.BatchAnnotateImagesResponse; +import com.google.cloud.vision.v1p2beta1.Block; +import com.google.cloud.vision.v1p2beta1.ColorInfo; +import com.google.cloud.vision.v1p2beta1.CropHint; +import com.google.cloud.vision.v1p2beta1.CropHintsAnnotation; +import com.google.cloud.vision.v1p2beta1.DominantColorsAnnotation; +import com.google.cloud.vision.v1p2beta1.EntityAnnotation; +import com.google.cloud.vision.v1p2beta1.FaceAnnotation; +import com.google.cloud.vision.v1p2beta1.Feature; +import com.google.cloud.vision.v1p2beta1.Feature.Type; +import com.google.cloud.vision.v1p2beta1.GcsDestination; +import com.google.cloud.vision.v1p2beta1.GcsSource; +import com.google.cloud.vision.v1p2beta1.Image; +import com.google.cloud.vision.v1p2beta1.ImageAnnotatorClient; +import com.google.cloud.vision.v1p2beta1.ImageContext; +import com.google.cloud.vision.v1p2beta1.ImageSource; +import com.google.cloud.vision.v1p2beta1.InputConfig; +import com.google.cloud.vision.v1p2beta1.LocationInfo; +import com.google.cloud.vision.v1p2beta1.OperationMetadata; +import com.google.cloud.vision.v1p2beta1.OutputConfig; +import com.google.cloud.vision.v1p2beta1.Page; +import com.google.cloud.vision.v1p2beta1.Paragraph; +import com.google.cloud.vision.v1p2beta1.SafeSearchAnnotation; +import com.google.cloud.vision.v1p2beta1.Symbol; +import com.google.cloud.vision.v1p2beta1.TextAnnotation; +import com.google.cloud.vision.v1p2beta1.WebDetection; +import com.google.cloud.vision.v1p2beta1.WebDetection.WebEntity; +import com.google.cloud.vision.v1p2beta1.WebDetection.WebImage; +import com.google.cloud.vision.v1p2beta1.WebDetection.WebLabel; +import com.google.cloud.vision.v1p2beta1.WebDetection.WebPage; +import com.google.cloud.vision.v1p2beta1.WebDetectionParams; +import com.google.cloud.vision.v1p2beta1.Word; import com.google.protobuf.ByteString; +import com.google.protobuf.util.JsonFormat; import java.io.FileInputStream; import java.io.IOException; @@ -54,6 +70,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class Detect { @@ -78,11 +97,16 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception, out.println("Usage:"); out.printf( "\tmvn exec:java -DDetect -Dexec.args=\" \"\n" + + "\tmvn exec:java -DDetect -Dexec.args=\"ocr \"" + + "\n" + "Commands:\n" + "\tfaces | labels | landmarks | logos | text | safe-search | properties" - + "| web | web-entities | web-entities-include-geo | crop \n" + + "| web | web-entities | web-entities-include-geo | crop | ocr \n" + "Path:\n\tA file path (ex: ./resources/wakeupcat.jpg) or a URI for a Cloud Storage " - + "resource (gs://...)\n"); + + "resource (gs://...)\n" + + "Path to File:\n\tA path to the remote file on Cloud Storage (gs://...)\n" + + "Path to Destination\n\tA path to the remote destination on Cloud Storage for the" + + " file to be saved. (gs://BUCKET_NAME/PREFIX/)\n"); return; } String command = args[0]; @@ -162,6 +186,9 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception, } else { detectDocumentText(path, out); } + } else if (command.equals("ocr")) { + String destPath = args.length > 2 ? args[2] : ""; + detectDocumentsGcs(path, destPath); } } @@ -1277,4 +1304,123 @@ public static void detectDocumentTextGcs(String gcsPath, PrintStream out) throws } } // [END vision_detect_document_uri] + + // [START vision_async_detect_document_ocr] + /** + * Performs document text OCR with PDF/TIFF as source files on Google Cloud Storage. + * + * @param gcsSourcePath The path to the remote file on Google Cloud Storage to detect document + * text on. + * @param gcsDestinationPath The path to the remote file on Google Cloud Storage to store the + * results on. + * @throws Exception on errors while closing the client. + */ + public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath) throws + Exception { + try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) { + List requests = new ArrayList<>(); + + // Set the GCS source path for the remote file. + GcsSource gcsSource = GcsSource.newBuilder() + .setUri(gcsSourcePath) + .build(); + + // Create the configuration with the specified MIME (Multipurpose Internet Mail Extensions) + // types + InputConfig inputConfig = InputConfig.newBuilder() + .setMimeType("application/pdf") // Supported MimeTypes: "application/pdf", "image/tiff" + .setGcsSource(gcsSource) + .build(); + + // Set the GCS destination path for where to save the results. + GcsDestination gcsDestination = GcsDestination.newBuilder() + .setUri(gcsDestinationPath) + .build(); + + // Create the configuration for the output with the batch size. + // The batch size sets how many pages should be grouped into each json output file. + OutputConfig outputConfig = OutputConfig.newBuilder() + .setBatchSize(1) + .setGcsDestination(gcsDestination) + .build(); + + // Select the Feature required by the vision API + Feature feature = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build(); + + // Build the OCR request + AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder() + .addFeatures(feature) + .setInputConfig(inputConfig) + .setOutputConfig(outputConfig) + .build(); + + requests.add(request); + + // Perform the OCR request + OperationFuture response = + client.asyncBatchAnnotateFilesAsync(requests); + + System.out.println("Waiting for the operation to finish."); + + // Wait for the request to finish. (The result is not used, since the API saves the result to + // the specified location on GCS.) + List result = response.get(180, TimeUnit.SECONDS) + .getResponsesList(); + + // Once the request has completed and the output has been + // written to GCS, we can list all the output files. + Storage storage = StorageOptions.getDefaultInstance().getService(); + + // Get the destination location from the gcsDestinationPath + Pattern pattern = Pattern.compile("gs://([^/]+)/(.+)"); + Matcher matcher = pattern.matcher(gcsDestinationPath); + + if (matcher.find()) { + String bucketName = matcher.group(1); + String prefix = matcher.group(2); + + // Get the list of objects with the given prefix from the GCS bucket + Bucket bucket = storage.get(bucketName); + com.google.api.gax.paging.Page pageList = bucket.list(BlobListOption.prefix(prefix)); + + Blob firstOutputFile = null; + + // List objects with the given prefix. + System.out.println("Output files:"); + for (Blob blob : pageList.iterateAll()) { + System.out.println(blob.getName()); + + // Process the first output file from GCS. + // Since we specified batch size = 2, the first response contains + // the first two pages of the input file. + if (firstOutputFile == null) { + firstOutputFile = blob; + } + } + + // Get the contents of the file and convert the JSON contents to an AnnotateFileResponse + // object. If the Blob is small read all its content in one request + // (Note: the file is a .json file) + // Storage guide: https://cloud.google.com/storage/docs/downloading-objects + String jsonContents = new String(firstOutputFile.getContent()); + Builder builder = AnnotateFileResponse.newBuilder(); + JsonFormat.parser().merge(jsonContents, builder); + + // Build the AnnotateFileResponse object + AnnotateFileResponse annotateFileResponse = builder.build(); + + // Parse through the object to get the actual response for the first page of the input file. + AnnotateImageResponse annotateImageResponse = annotateFileResponse.getResponses(0); + + // Here we print the full text from the first page. + // The response contains more information: + // annotation/pages/blocks/paragraphs/words/symbols + // including confidence score and bounding boxes + System.out.format("\nText: %s\n", annotateImageResponse.getFullTextAnnotation().getText()); + } else { + System.out.println("No MATCH"); + } + } + } + // [END vision_async_detect_document_ocr] } diff --git a/vision/beta/cloud-client/src/test/java/com/example/vision/DetectIT.java b/vision/beta/cloud-client/src/test/java/com/example/vision/DetectIT.java index 72f52d848ae..cab7a9e3aef 100644 --- a/vision/beta/cloud-client/src/test/java/com/example/vision/DetectIT.java +++ b/vision/beta/cloud-client/src/test/java/com/example/vision/DetectIT.java @@ -18,6 +18,11 @@ import static com.google.common.truth.Truth.assertThat; +import com.google.api.gax.paging.Page; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.Storage.BlobListOption; +import com.google.cloud.storage.StorageOptions; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; @@ -37,6 +42,7 @@ public class DetectIT { private Detect app; private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); private static final String BUCKET = PROJECT_ID; + private static final String OUTPUT_PREFIX = "OCR_PDF_TEST_OUTPUT"; @Before public void setUp() throws IOException { @@ -348,4 +354,25 @@ public void testDocumentTextGcs() throws Exception { assertThat(got).contains("37%"); assertThat(got).contains("Word text: class (confidence:"); } + + @Test + public void testDetectDocumentsGcs() throws Exception { + // Act + String[] args = {"ocr", "gs://" + BUCKET + "/vision/HodgeConj.pdf", + "gs://" + BUCKET + "/" + OUTPUT_PREFIX + "/"}; + Detect.argsHelper(args, out); + + // Assert + String got = bout.toString(); + assertThat(got).contains("HODGE'S GENERAL CONJECTURE"); + + Storage storage = StorageOptions.getDefaultInstance().getService(); + + Page blobs = storage.list(BUCKET, BlobListOption.currentDirectory(), + BlobListOption.prefix(OUTPUT_PREFIX + "/")); + + for (Blob blob : blobs.iterateAll()) { + blob.delete(); + } + } } diff --git a/vision/v1p2beta1/README.md b/vision/v1p2beta1/README.md deleted file mode 100644 index a6547b7ba2d..00000000000 --- a/vision/v1p2beta1/README.md +++ /dev/null @@ -1,60 +0,0 @@ -# OCR Feature Detection Sample - - -Open in Cloud Shell - -[Google Cloud Vision API][vision] provides OCR detection for PDF/TIFF documents. -This API is part of the larger collection of Cloud Machine Learning APIs. - -This sample Java application demonstrates how to access the Cloud Vision API -using the [Google Cloud Client Library for Java][google-cloud-java]. - -[vision]: https://cloud.google.com/vision/docs/ -[google-cloud-java]: https://github.com/GoogleCloudPlatform/google-cloud-java - -## Prerequisites - -### Download Maven - -This sample uses the [Apache Maven][maven] build system. Before getting started, be -sure to [download][maven-download] and [install][maven-install] it. When you use -Maven as described here, it will automatically download the needed client -libraries. - -[maven]: https://maven.apache.org -[maven-download]: https://maven.apache.org/download.cgi -[maven-install]: https://maven.apache.org/install.html - -### Setup - -* Create a project with the [Google Cloud Console][cloud-console], and enable - the [Vision API][vision-api]. -* Set up your environment with [Application Default Credentials][adc]. For - example, from the Cloud Console, you might create a service account, - download its json credentials file, then set the appropriate environment - variable: - - ```bash - export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your-project-credentials.json - ``` -* Build the sample - ``` - mvn clean package - ``` - -[cloud-console]: https://console.cloud.google.com -[vision-api]: https://console.cloud.google.com/apis/api/vision.googleapis.com/overview?project=_ -[adc]: https://cloud.google.com/docs/authentication#developer_workflow - -## Samples -You can then run `Detect` via: - -``` -mvn exec:java -DDetect -Dexec.args="arg1 'arg 2' arg3" -``` - -#### OCR -``` -mvn exec:java -DDetect -Dexec.args="ocr gs://java-docs-samples-testing/vision/HodgeConj.pdf \ - gs:///" -``` diff --git a/vision/v1p2beta1/pom.xml b/vision/v1p2beta1/pom.xml deleted file mode 100644 index 6cf76499817..00000000000 --- a/vision/v1p2beta1/pom.xml +++ /dev/null @@ -1,98 +0,0 @@ - - - 4.0.0 - com.example.vision - vision-detect-v1p2beta1 - jar - - - - com.google.cloud.samples - shared-configuration - 1.0.8 - - - - 1.8 - 1.8 - UTF-8 - - - - - - com.google.cloud - google-cloud-vision - 1.24.1 - - - com.google.cloud - google-cloud-storage - 1.24.1 - - - - - - junit - junit - 4.12 - test - - - - com.google.truth - truth - 0.39 - test - - - - - - Detect - - - Detect - - - - - - org.codehaus.mojo - exec-maven-plugin - 1.6.0 - - - - java - - - - - com.example.vision.Detect - false - - - - - - - diff --git a/vision/v1p2beta1/src/main/java/com/example/vision/Detect.java b/vision/v1p2beta1/src/main/java/com/example/vision/Detect.java deleted file mode 100644 index 30805924ede..00000000000 --- a/vision/v1p2beta1/src/main/java/com/example/vision/Detect.java +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright 2018 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.example.vision; - -import com.google.api.gax.longrunning.OperationFuture; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Blob.BlobSourceOption; -import com.google.cloud.storage.Bucket; -import com.google.cloud.storage.Storage; -import com.google.cloud.storage.Storage.BlobListOption; -import com.google.cloud.storage.StorageOptions; -import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse; -import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse.Builder; -import com.google.cloud.vision.v1p2beta1.AnnotateImageResponse; -import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileRequest; -import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileResponse; -import com.google.cloud.vision.v1p2beta1.AsyncBatchAnnotateFilesResponse; -import com.google.cloud.vision.v1p2beta1.Feature; -import com.google.cloud.vision.v1p2beta1.Feature.Type; -import com.google.cloud.vision.v1p2beta1.GcsDestination; -import com.google.cloud.vision.v1p2beta1.GcsSource; -import com.google.cloud.vision.v1p2beta1.ImageAnnotatorClient; -import com.google.cloud.vision.v1p2beta1.InputConfig; -import com.google.cloud.vision.v1p2beta1.OperationMetadata; -import com.google.cloud.vision.v1p2beta1.OutputConfig; - -import com.google.protobuf.util.JsonFormat; -import com.google.protobuf.util.JsonFormat.Parser; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.TimeUnit; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class Detect { - - /** - * Performs OCR detection on PDF/TIFF documents on Cloud Storage using the Vision API. - * - * @throws Exception on errors while closing the client. - * @throws IOException on Input/Output errors. - */ - public static void main(String[] args) throws Exception { - argsHelper(args, System.out); - } - - /** - * Helper that handles the input passed to the program. - * - * @throws Exception on errors while closing the client. - * @throws IOException on Input/Output errors. - */ - public static void argsHelper(String[] args, PrintStream out) throws Exception { - if (args.length < 1) { - out.println("Usage:"); - out.printf( - "\tmvn exec:java -DDetect -Dexec.args=\"ocr \"\n" - + "Commands:\n" - + "\tocr\n" - + "Path to File:\n\tA path to the remote file on Cloud Storage (gs://...)\n" - + "Path to Destination\n\tA path to the remote destination on Cloud Storage for the" - + " file to be saved. (gs://BUCKET_NAME/PREFIX/)\n"); - return; - } - String command = args[0]; - String path = args.length > 1 ? args[1] : ""; - - if (command.equals("ocr")) { - String destPath = args.length > 2 ? args[2] : ""; - detectDocumentsGcs(path, destPath); - } - } - - // [START vision_async_detect_document_ocr] - /** - * Performs document text OCR with PDF/TIFF as source files on Google Cloud Storage. - * - * @param gcsSourcePath The path to the remote file on Google Cloud Storage to detect document - * text on. - * @param gcsDestinationPath The path to the remote file on Google Cloud Storage to store the - * results on. - * @throws Exception on errors while closing the client. - */ - public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath) throws - Exception { - try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) { - List requests = new ArrayList<>(); - - // Set the GCS source path for the remote file. - GcsSource gcsSource = GcsSource.newBuilder() - .setUri(gcsSourcePath) - .build(); - - // Create the configuration with the specified MIME (Multipurpose Internet Mail Extensions) - // types - InputConfig inputConfig = InputConfig.newBuilder() - .setMimeType("application/pdf") // Supported MimeTypes: "application/pdf", "image/tiff" - .setGcsSource(gcsSource) - .build(); - - // Set the GCS destination path for where to save the results. - GcsDestination gcsDestination = GcsDestination.newBuilder() - .setUri(gcsDestinationPath) - .build(); - - // Create the configuration for the output with the batch size. - // The batch size sets how many pages should be grouped into each json output file. - OutputConfig outputConfig = OutputConfig.newBuilder() - .setBatchSize(1) - .setGcsDestination(gcsDestination) - .build(); - - // Select the Feature required by the vision API - Feature feature = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build(); - - // Build the OCR request - AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder() - .addFeatures(feature) - .setInputConfig(inputConfig) - .setOutputConfig(outputConfig) - .build(); - - requests.add(request); - - // Perform the OCR request - OperationFuture response = - client.asyncBatchAnnotateFilesAsync(requests); - - System.out.println("Waiting for the operation to finish."); - - // Wait for the request to finish. (The result is not used, since the API saves the result to - // the specified location on GCS.) - List result = response.get(180, TimeUnit.SECONDS) - .getResponsesList(); - - // Once the request has completed and the output has been - // written to GCS, we can list all the output files. - Storage storage = StorageOptions.getDefaultInstance().getService(); - - // Get the destination location from the gcsDestinationPath - Pattern pattern = Pattern.compile("gs://([^/]+)/(.+)"); - Matcher matcher = pattern.matcher(gcsDestinationPath); - - if (matcher.find()) { - String bucketName = matcher.group(1); - String prefix = matcher.group(2); - - // Get the list of objects with the given prefix from the GCS bucket - Bucket bucket = storage.get(bucketName); - com.google.api.gax.paging.Page pageList = bucket.list(BlobListOption.prefix(prefix)); - - Blob firstOutputFile = null; - - // List objects with the given prefix. - System.out.println("Output files:"); - for (Blob blob : pageList.iterateAll()) { - System.out.println(blob.getName()); - - // Process the first output file from GCS. - // Since we specified batch size = 2, the first response contains - // the first two pages of the input file. - if (firstOutputFile == null) { - firstOutputFile = blob; - } - } - - // Get the contents of the file and convert the JSON contents to an AnnotateFileResponse - // object. If the Blob is small read all its content in one request - // (Note: the file is a .json file) - // Storage guide: https://cloud.google.com/storage/docs/downloading-objects - String jsonContents = new String(firstOutputFile.getContent()); - Builder builder = AnnotateFileResponse.newBuilder(); - JsonFormat.parser().merge(jsonContents, builder); - - // Build the AnnotateFileResponse object - AnnotateFileResponse annotateFileResponse = builder.build(); - - // Parse through the object to get the actual response for the first page of the input file. - AnnotateImageResponse annotateImageResponse = annotateFileResponse.getResponses(0); - - // Here we print the full text from the first page. - // The response contains more information: - // annotation/pages/blocks/paragraphs/words/symbols - // including confidence score and bounding boxes - System.out.format("\nText: %s\n", annotateImageResponse.getFullTextAnnotation().getText()); - } else { - System.out.println("No MATCH"); - } - } - } - // [END vision_async_detect_document_ocr] -} diff --git a/vision/v1p2beta1/src/test/java/com/example/vision/DetectIT.java b/vision/v1p2beta1/src/test/java/com/example/vision/DetectIT.java deleted file mode 100644 index 3cc08975d82..00000000000 --- a/vision/v1p2beta1/src/test/java/com/example/vision/DetectIT.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright 2018 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.example.vision; - -import static com.google.common.truth.Truth.assertThat; - -import com.google.api.gax.paging.Page; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Storage; -import com.google.cloud.storage.Storage.BlobListOption; -import com.google.cloud.storage.StorageOptions; -import java.io.ByteArrayOutputStream; -import java.io.PrintStream; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests for vision "Detect" sample. */ -@RunWith(JUnit4.class) -@SuppressWarnings("checkstyle:abbreviationaswordinname") -public class DetectIT { - private ByteArrayOutputStream bout; - private PrintStream out; - private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); - private static final String BUCKET = PROJECT_ID; - private static final String OUTPUT_PREFIX = "OCR_PDF_TEST_OUTPUT"; - - @Before - public void setUp() { - bout = new ByteArrayOutputStream(); - out = new PrintStream(bout); - System.setOut(out); - } - - @After - public void tearDown() { - System.setOut(null); - } - - @Test - public void testDetectDocumentsGcs() throws Exception { - // Act - String[] args = {"ocr", "gs://" + BUCKET + "/vision/HodgeConj.pdf", - "gs://" + BUCKET + "/" + OUTPUT_PREFIX + "/"}; - Detect.argsHelper(args, out); - - // Assert - String got = bout.toString(); - assertThat(got).contains("HODGE'S GENERAL CONJECTURE"); - - Storage storage = StorageOptions.getDefaultInstance().getService(); - - Page blobs = storage.list(BUCKET, BlobListOption.currentDirectory(), - BlobListOption.prefix(OUTPUT_PREFIX + "/")); - - for (Blob blob : blobs.iterateAll()) { - blob.delete(); - } - } -} From 7062004eb0c05098b650c4b6883b17588e328f2c Mon Sep 17 00:00:00 2001 From: nnegrey Date: Tue, 3 Apr 2018 13:28:00 -0700 Subject: [PATCH 4/5] Update project pom --- pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/pom.xml b/pom.xml index e962acaa8b1..8711cd723ee 100644 --- a/pom.xml +++ b/pom.xml @@ -98,7 +98,6 @@ vision/beta/cloud-client vision/cloud-client vision/face-detection - vision/v1p2beta1 vision/label vision/landmark-detection vision/text From 00713709cef1d53fa13eeac7ed906d296d757d08 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Tue, 3 Apr 2018 13:30:17 -0700 Subject: [PATCH 5/5] Update batch size to 2 --- .../cloud-client/src/main/java/com/example/vision/Detect.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java b/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java index 7b928ce6d52..d506d25ba9d 100644 --- a/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java +++ b/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java @@ -1340,7 +1340,7 @@ public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinatio // Create the configuration for the output with the batch size. // The batch size sets how many pages should be grouped into each json output file. OutputConfig outputConfig = OutputConfig.newBuilder() - .setBatchSize(1) + .setBatchSize(2) .setGcsDestination(gcsDestination) .build();