Skip to content

Commit

Permalink
chore: Enable requesting numeric enums in "transport=rest" responses …
Browse files Browse the repository at this point in the history
…for services supporting this (Java, Go, Python, PHP, TypeScript, C#, and Ruby), even if they do not yet turn on REST transport (#3500)

feat: added font_family to document.proto
feat: added ImageQualityScores message to document.proto
feat: added PropertyMetadata and EntityTypeMetadata to document_schema.proto
PiperOrigin-RevId: 486975621
Source-Link: googleapis/googleapis@398c9f9
Source-Link: googleapis/googleapis-gen@7cd1f5f
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWNvbnRlbnR3YXJlaG91c2UvLk93bEJvdC55YW1sIiwiaCI6IjdjZDFmNWY0ZTQzNTc3N2NiODI0YWYyNjhkYzhkMzcxMzQ2MTNlNmEifQ==
See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
PiperOrigin-RevId: 487258346
Source-Link: googleapis/googleapis@6e1a5a0
Source-Link: googleapis/googleapis-gen@966c9cd
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWNvbnRlbnR3YXJlaG91c2UvLk93bEJvdC55YW1sIiwiaCI6Ijk2NmM5Y2Q4OWI2NWU4MTMyYzA4NzM0NDg4M2QwMmI3ODhjMTM3YTAifQ==
See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
chore: Enable requesting numeric enums in "transport=rest" responses for services supporting this (Java, Go, Python, PHP, TypeScript, C#, and Ruby), even if they do not yet turn on REST transport
chore: disallow "transport=rest" for services where numeric enums are not confirmed to be supported (except in PHP and Java)
PiperOrigin-RevId: 493113566

Source-Link: googleapis/googleapis@758f0d1
Source-Link: googleapis/googleapis-gen@78bd8f0
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWNvbnRlbnR3YXJlaG91c2UvLk93bEJvdC55YW1sIiwiaCI6Ijc4YmQ4ZjA1ZTEyNzYzNjNlYjE0ZWFlNzBlOTFmZTRiYzIwNzAzYWIifQ==
See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Co-authored-by: sofisl <[email protected]>
Co-authored-by: Benjamin E. Coe <[email protected]>
  • Loading branch information
4 people authored Dec 7, 2022
1 parent ddbc5a0 commit 5d84b5c
Show file tree
Hide file tree
Showing 17 changed files with 976 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ message SearchDocumentsRequest {
//
// * Histogram facet (aka filterable properties): Facet names with format
// &lt;schema id&gt;.&lt;facet&gt;. Facets will have the
// format of: [a-zA-Z][a-zA-Z0-9_:/-.]. If the facet is a child
// format of: `[a-zA-Z][a-zA-Z0-9_:/-.]`. If the facet is a child
// facet, then the parent hierarchy needs to be specified separated by
// dots in the prefix after the schema id. Thus, the format for a multi-
// level facet is: &lt;schema id&gt;.&lt;parent facet name&gt;.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,40 +28,42 @@ option ruby_package = "Google::Cloud::DocumentAI::V1";
message Barcode {
// Format of a barcode.
// The supported formats are:
// CODE_128: Code 128 type.
// CODE_39: Code 39 type.
// CODE_93: Code 93 type.
// CODABAR: Codabar type.
// DATA_MATRIX: 2D Data Matrix type.
// ITF: ITF type.
// EAN_13: EAN-13 type.
// EAN_8: EAN-8 type.
// QR_CODE: 2D QR code type.
// UPC_A: UPC-A type.
// UPC_E: UPC-E type.
// PDF417: PDF417 type.
// AZTEC: 2D Aztec code type.
// DATABAR: GS1 DataBar code type.
//
// - `CODE_128`: Code 128 type.
// - `CODE_39`: Code 39 type.
// - `CODE_93`: Code 93 type.
// - `CODABAR`: Codabar type.
// - `DATA_MATRIX`: 2D Data Matrix type.
// - `ITF`: ITF type.
// - `EAN_13`: EAN-13 type.
// - `EAN_8`: EAN-8 type.
// - `QR_CODE`: 2D QR code type.
// - `UPC_A`: UPC-A type.
// - `UPC_E`: UPC-E type.
// - `PDF417`: PDF417 type.
// - `AZTEC`: 2D Aztec code type.
// - `DATABAR`: GS1 DataBar code type.
string format = 1;

// Value format describes the format of the value that a barcode
// encodes.
// The supported formats are:
// CONTACT_INFO: Contact information.
// EMAIL: Email address.
// ISBN: ISBN identifier.
// PHONE: Phone number.
// PRODUCT: Product.
// SMS: SMS message.
// TEXT: Text string.
// URL: URL address.
// WIFI: Wifi information.
// GEO: Geo-localization.
// CALENDAR_EVENT: Calendar event.
// DRIVER_LICENSE: Driver's license.
//
// - `CONTACT_INFO`: Contact information.
// - `EMAIL`: Email address.
// - `ISBN`: ISBN identifier.
// - `PHONE`: Phone number.
// - `PRODUCT`: Product.
// - `SMS`: SMS message.
// - `TEXT`: Text string.
// - `URL`: URL address.
// - `WIFI`: Wifi information.
// - `GEO`: Geo-localization.
// - `CALENDAR_EVENT`: Calendar event.
// - `DRIVER_LICENSE`: Driver's license.
string value_format = 2;

// Raw value encoded in the barcode.
// For example, 'MEBKM:TITLE:Google;URL:https://www.google.com;;'.
// For example: `'MEBKM:TITLE:Google;URL:https://www.google.com;;'`.
string raw_value = 3;
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ message Document {

// Font size.
FontSize font_size = 7;

// Font family such as `Arial`, `Times New Roman`.
// https://www.w3schools.com/cssref/pr_font_font-family.asp
string font_family = 8;
}

// A page in a [Document][google.cloud.documentai.v1.Document].
Expand Down Expand Up @@ -169,7 +173,7 @@ message Document {

// Confidence of the current [Layout][google.cloud.documentai.v1.Document.Page.Layout] within context of the object this
// layout is for. e.g. confidence can be for a single token, a table,
// a visual element, etc. depending on context. Range [0, 1].
// a visual element, etc. depending on context. Range `[0, 1]`.
float confidence = 2;

// The bounding polygon for the [Layout][google.cloud.documentai.v1.Document.Page.Layout].
Expand All @@ -189,7 +193,7 @@ message Document {
repeated DetectedLanguage detected_languages = 2;

// The history of this annotation.
Provenance provenance = 3;
Provenance provenance = 3 [deprecated = true];
}

// A collection of lines that a human would perceive as a paragraph.
Expand All @@ -201,7 +205,7 @@ message Document {
repeated DetectedLanguage detected_languages = 2;

// The history of this annotation.
Provenance provenance = 3;
Provenance provenance = 3 [deprecated = true];
}

// A collection of tokens that a human would perceive as a line.
Expand All @@ -214,7 +218,7 @@ message Document {
repeated DetectedLanguage detected_languages = 2;

// The history of this annotation.
Provenance provenance = 3;
Provenance provenance = 3 [deprecated = true];
}

// A detected token.
Expand Down Expand Up @@ -249,8 +253,8 @@ message Document {
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 3;

// The history of this annotation.
Provenance provenance = 4;
// The history of this annotation.
Provenance provenance = 4 [deprecated = true];
}

// A detected symbol.
Expand Down Expand Up @@ -309,6 +313,9 @@ message Document {

// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 4;

// The history of this table.
Provenance provenance = 5;
}

// A form field detected on the page.
Expand Down Expand Up @@ -358,15 +365,43 @@ message Document {

// Detected language for a structural component.
message DetectedLanguage {
// The BCP-47 language code, such as "en-US" or "sr-Latn". For more
// The BCP-47 language code, such as `en-US` or `sr-Latn`. For more
// information, see
// https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
string language_code = 1;

// Confidence of detected language. Range [0, 1].
// Confidence of detected language. Range `[0, 1]`.
float confidence = 2;
}

// Image Quality Scores for the page image
message ImageQualityScores {
// Image Quality Defects
message DetectedDefect {
// Name of the defect type. Supported values are:
//
// - `quality/defect_blurry`
// - `quality/defect_noisy`
// - `quality/defect_dark`
// - `quality/defect_faint`
// - `quality/defect_text_too_small`
// - `quality/defect_document_cutoff`
// - `quality/defect_text_cutoff`
// - `quality/defect_glare`
string type = 1;

// Confidence of detected defect. Range `[0, 1]` where 1 indicates
// strong confidence of that the defect exists.
float confidence = 2;
}

// The overall quality score. Range `[0, 1]` where 1 is perfect quality.
float quality_score = 1;

// A list of detected defects.
repeated DetectedDefect detected_defects = 2;
}

// 1-based index for current [Page][google.cloud.documentai.v1.Document.Page] in a parent [Document][google.cloud.documentai.v1.Document].
// Useful when a page is taken out of a [Document][google.cloud.documentai.v1.Document] for individual
// processing.
Expand Down Expand Up @@ -422,8 +457,11 @@ message Document {
// A list of detected barcodes.
repeated DetectedBarcode detected_barcodes = 15;

// Image Quality Scores.
ImageQualityScores image_quality_scores = 17;

// The history of this page.
Provenance provenance = 16;
Provenance provenance = 16 [deprecated = true];
}

// An entity that could be a phrase in the text or a property that belongs to
Expand Down Expand Up @@ -471,6 +509,7 @@ message Document {
// or int normalized text by default.
//
// Below are sample formats mapped to structured values.
//
// - Money/Currency type (`money_value`) is in the ISO 4217 text format.
// - Date type (`date_value`) is in the ISO 8601 text format.
// - Datetime type (`datetime_value`) is in the ISO 8601 text format.
Expand All @@ -484,14 +523,13 @@ message Document {
// Required. Entity type from a schema e.g. `Address`.
string type = 2 [(google.api.field_behavior) = REQUIRED];

// Optional. Text value in the document e.g. `1600 Amphitheatre Pkwy`. If the entity
// is not present in the document, this field will be empty.
// Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`.
string mention_text = 3 [(google.api.field_behavior) = OPTIONAL];

// Optional. Deprecated. Use `id` field instead.
string mention_id = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. Confidence of detected Schema entity. Range [0, 1].
// Optional. Confidence of detected Schema entity. Range `[0, 1]`.
float confidence = 5 [(google.api.field_behavior) = OPTIONAL];

// Optional. Represents the provenance of this entity wrt. the location on the
Expand Down Expand Up @@ -605,7 +643,7 @@ message Document {
// Optional. Identifies the bounding polygon of a layout element on the page.
BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. Confidence of detected page element, if applicable. Range [0, 1].
// Optional. Confidence of detected page element, if applicable. Range `[0, 1]`.
float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
}

Expand Down Expand Up @@ -726,7 +764,7 @@ message Document {
string changed_text = 2;

// The history of this annotation.
repeated Provenance provenance = 3;
repeated Provenance provenance = 3 [deprecated = true];
}

// Original source document from the user.
Expand Down Expand Up @@ -765,9 +803,9 @@ message Document {
// Placeholder. Relationship among [Document.entities][google.cloud.documentai.v1.Document.entities].
repeated EntityRelation entity_relations = 8;

// Placeholder. A list of text corrections made to [Document.text]. This is
// usually used for annotating corrections to OCR mistakes. Text changes for
// a given revision may not overlap with each other.
// Placeholder. A list of text corrections made to [Document.text][google.cloud.documentai.v1.Document.text]. This
// is usually used for annotating corrections to OCR mistakes. Text changes
// for a given revision may not overlap with each other.
repeated TextChange text_changes = 14;

// Information about the sharding if this document is sharded part of a larger
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ syntax = "proto3";

package google.cloud.documentai.v1;

import "google/protobuf/field_mask.proto";

option csharp_namespace = "Google.Cloud.DocumentAI.V1";
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1;documentai";
option java_multiple_files = true;
Expand All @@ -30,7 +32,7 @@ message RawDocument {
bytes content = 1;

// An IANA MIME type (RFC6838) indicating the nature and format of the
// [content].
// [content][google.cloud.documentai.v1.RawDocument.content].
string mime_type = 2;
}

Expand Down Expand Up @@ -59,7 +61,7 @@ message GcsPrefix {
message BatchDocumentsInputConfig {
// The source.
oneof source {
// The set of documents that match the specified Cloud Storage [gcs_prefix].
// The set of documents that match the specified Cloud Storage `gcs_prefix`.
GcsPrefix gcs_prefix = 1;

// The set of documents individually specified on Cloud Storage.
Expand All @@ -74,6 +76,11 @@ message DocumentOutputConfig {
message GcsOutputConfig {
// The Cloud Storage uri (a directory) of the output.
string gcs_uri = 1;

// Specifies which fields to include in the output documents.
// Only supports top level document and pages field so it must be in the
// form of `{document_field_name}` or `pages.{page_field_name}`.
google.protobuf.FieldMask field_mask = 2;
}

// The destination of the results.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ message ProcessRequest {
bool skip_human_review = 3;

// Specifies which fields to include in ProcessResponse's document.
// Only supports top level document and pages field so it must be in the form
// of `{document_field_name}` or `pages.{page_field_name}`.
google.protobuf.FieldMask field_mask = 6;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,16 @@ message DocumentSchema {
// Name of the type. It must be unique within the schema file and
// cannot be a 'Common Type'. Besides that we use the following naming
// conventions:
// - *use snake_casing*
//
// - *use `snake_casing`*
// - name matching is case-insensitive
// - Maximum 64 characters.
// - Must start with a letter.
// - Allowed characters: ASCII letters `[a-z0-9_-]`. (For backward
// compatibility internal infrastructure and tooling can handle any ascii
// character)
// - The '/' is sometimes used to denote a property of a type. For example
// line_item/amount. This convention is deprecated, but will still be
// - The `/` is sometimes used to denote a property of a type. For example
// `line_item/amount`. This convention is deprecated, but will still be
// honored for backward compatibility.
string name = 1;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package google.cloud.documentai.v1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/documentai/v1/document_schema.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.DocumentAI.V1";
Expand Down Expand Up @@ -85,6 +86,9 @@ message ProcessorVersion {
// The display name of the processor version.
string display_name = 2;

// The schema of the processor version. Describes the output.
DocumentSchema document_schema = 12;

// The state of the processor version.
State state = 6;

Expand Down Expand Up @@ -125,14 +129,14 @@ message Processor {
// The processor is disabled.
DISABLED = 2;

// The processor is being enabled, will become ENABLED if successful.
// The processor is being enabled, will become `ENABLED` if successful.
ENABLING = 3;

// The processor is being disabled, will become DISABLED if successful.
// The processor is being disabled, will become `DISABLED` if successful.
DISABLING = 4;

// The processor is being created, will become either ENABLED (for
// successful creation) or FAILED (for failed ones).
// The processor is being created, will become either `ENABLED` (for
// successful creation) or `FAILED` (for failed ones).
// Once a processor is in this state, it can then be used for document
// processing, but the feature dependencies of the processor might not be
// fully created yet.
Expand All @@ -154,7 +158,7 @@ message Processor {
(google.api.field_behavior) = OUTPUT_ONLY
];

// The processor type, e.g., OCR_PROCESSOR, INVOICE_PROCESSOR, etc.
// The processor type, e.g., `OCR_PROCESSOR`, `INVOICE_PROCESSOR`, etc.
// To get a list of processors types, see
// [FetchProcessorTypes][google.cloud.documentai.v1.DocumentProcessorService.FetchProcessorTypes].
string type = 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ message ProcessorType {
}

// The resource name of the processor type.
// Format: projects/{project}/processorTypes/{processor_type}
// Format: `projects/{project}/processorTypes/{processor_type}`
string name = 1;

// The type of the processor, e.g., "invoice_parsing".
// The processor type, e.g., `OCR_PROCESSOR`, `INVOICE_PROCESSOR`, etc.
string type = 2;

// The processor category, used by UI to group processor types.
Expand Down
Loading

0 comments on commit 5d84b5c

Please sign in to comment.