Skip to content

Commit

Permalink
[DI] Enable to run sphinx in pipeline (#35078)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yalin Li authored Apr 9, 2024
1 parent dba02d4 commit f07513c
Show file tree
Hide file tree
Showing 10 changed files with 146 additions and 50 deletions.
89 changes: 87 additions & 2 deletions sdk/documentintelligence/azure-ai-documentintelligence/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,91 @@ print("----------------------------------------")

<!-- END SNIPPET -->

### Using the General Document Model

Analyze key-value pairs, tables, styles, and selection marks from documents using the general document model provided by the Document Intelligence service.
Select the General Document Model by passing `model_id="prebuilt-document"` into the `begin_analyze_document` method:

<!-- SNIPPET:sample_analyze_general_documents.analyze_general_documents -->

```python
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature, AnalyzeResult

endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
key = os.environ["DOCUMENTINTELLIGENCE_API_KEY"]

document_intelligence_client = DocumentIntelligenceClient(endpoint=endpoint, credential=AzureKeyCredential(key))
with open(path_to_sample_documents, "rb") as f:
poller = document_intelligence_client.begin_analyze_document(
"prebuilt-layout",
analyze_request=f,
features=[DocumentAnalysisFeature.KEY_VALUE_PAIRS],
content_type="application/octet-stream",
)
result: AnalyzeResult = poller.result()

if result.styles:
for style in result.styles:
if style.is_handwritten:
print("Document contains handwritten content: ")
print(",".join([result.content[span.offset : span.offset + span.length] for span in style.spans]))

print("----Key-value pairs found in document----")
if result.key_value_pairs:
for kv_pair in result.key_value_pairs:
if kv_pair.key:
print(f"Key '{kv_pair.key.content}' found within " f"'{kv_pair.key.bounding_regions}' bounding regions")
if kv_pair.value:
print(
f"Value '{kv_pair.value.content}' found within "
f"'{kv_pair.value.bounding_regions}' bounding regions\n"
)

for page in result.pages:
print(f"----Analyzing document from page #{page.page_number}----")
print(f"Page has width: {page.width} and height: {page.height}, measured with unit: {page.unit}")

if page.lines:
for line_idx, line in enumerate(page.lines):
words = get_words(page.words, line)
print(
f"...Line #{line_idx} has {len(words)} words and text '{line.content}' within "
f"bounding polygon '{line.polygon}'"
)

for word in words:
print(f"......Word '{word.content}' has a confidence of {word.confidence}")

if page.selection_marks:
for selection_mark in page.selection_marks:
print(
f"Selection mark is '{selection_mark.state}' within bounding polygon "
f"'{selection_mark.polygon}' and has a confidence of "
f"{selection_mark.confidence}"
)

if result.tables:
for table_idx, table in enumerate(result.tables):
print(f"Table # {table_idx} has {table.row_count} rows and {table.column_count} columns")
if table.bounding_regions:
for region in table.bounding_regions:
print(f"Table # {table_idx} location on page: {region.page_number} is {region.polygon}")
for cell in table.cells:
print(f"...Cell[{cell.row_index}][{cell.column_index}] has text '{cell.content}'")
if cell.bounding_regions:
for region in cell.bounding_regions:
print(
f"...content on page {region.page_number} is within bounding polygon '{region.polygon}'\n"
)
print("----------------------------------------")
```

<!-- END SNIPPET -->

- Read more about the features provided by the `prebuilt-document` model [here][service_prebuilt_document].

### Using Prebuilt Models

Extract fields from select document types such as receipts, invoices, business cards, identity documents, and U.S. W-2 tax documents using prebuilt models provided by the Document Intelligence service.
Expand Down Expand Up @@ -467,8 +552,7 @@ if result.documents:
value_obj = obj[KEY_OF_VALUE_OBJECT]
extract_value_by_col_name = lambda key: (
value_obj[key].get(KEY_OF_CELL_CONTENT)
if key in value_obj
and KEY_OF_CELL_CONTENT in value_obj[key]
if key in value_obj and KEY_OF_CELL_CONTENT in value_obj[key]
else "None"
)
row_data = list(map(extract_value_by_col_name, col_names))
Expand Down Expand Up @@ -720,3 +804,4 @@ additional questions or comments.
[addon_languages_sample]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/documentintelligence/azure-ai-documentintelligence/samples/sample_analyze_addon_languages.py
[query_fields_sample]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/documentintelligence/azure-ai-documentintelligence/samples/sample_analyze_addon_query_fields.py
[service-rename]: https://techcommunity.microsoft.com/t5/azure-ai-services-blog/azure-form-recognizer-is-now-azure-ai-document-intelligence-with/ba-p/3875765
[service_prebuilt_document]: https://docs.microsoft.com/azure/ai-services/document-intelligence/concept-general-document#general-document-features
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from io import IOBase
import json
import sys
from typing import Any, Callable, Dict, IO, Iterable, List, Optional, TypeVar, Union, cast, overload
from typing import Any, Callable, Dict, IO, Iterable, List, Optional, Type, TypeVar, Union, cast, overload
import urllib.parse

from azure.core.exceptions import (
Expand Down Expand Up @@ -460,7 +460,7 @@ def _analyze_document_initial( # pylint: disable=inconsistent-return-statements
output_content_format: Optional[Union[str, _models.ContentFormat]] = None,
**kwargs: Any,
) -> None:
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -3686,7 +3686,7 @@ def _classify_document_initial( # pylint: disable=inconsistent-return-statement
split: Optional[Union[str, _models.SplitMode]] = None,
**kwargs: Any,
) -> None:
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -6831,7 +6831,7 @@ class DocumentIntelligenceAdministrationClientOperationsMixin( # pylint: disabl
def _build_document_model_initial( # pylint: disable=inconsistent-return-statements
self, build_request: Union[_models.BuildDocumentModelRequest, JSON, IO[bytes]], **kwargs: Any
) -> None:
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -7331,7 +7331,7 @@ def get_long_running_output(pipeline_response):
def _compose_model_initial( # pylint: disable=inconsistent-return-statements
self, compose_request: Union[_models.ComposeDocumentModelRequest, JSON, IO[bytes]], **kwargs: Any
) -> None:
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -7982,7 +7982,7 @@ def authorize_model_copy(
the document model should be copied to. Required.
}
"""
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -8042,7 +8042,7 @@ def authorize_model_copy(
def _copy_model_to_initial( # pylint: disable=inconsistent-return-statements
self, model_id: str, copy_to_request: Union[_models.CopyAuthorization, JSON, IO[bytes]], **kwargs: Any
) -> None:
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -8619,7 +8619,7 @@ def get_model(self, model_id: str, **kwargs: Any) -> _models.DocumentModelDetail
]
}
"""
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -8755,7 +8755,7 @@ def list_models(self, **kwargs: Any) -> Iterable["_models.DocumentModelDetails"]

cls: ClsType[List[_models.DocumentModelDetails]] = kwargs.pop("cls", None)

error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -8837,7 +8837,7 @@ def delete_model(self, model_id: str, **kwargs: Any) -> None: # pylint: disable
:rtype: None
:raises ~azure.core.exceptions.HttpResponseError:
"""
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -8911,7 +8911,7 @@ def get_resource_info(self, **kwargs: Any) -> _models.ResourceDetails:
}
}
"""
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -8971,6 +8971,7 @@ def get_operation(self, operation_id: str, **kwargs: Any) -> _models.OperationDe

Example:
.. code-block:: python

# The response is polymorphic. The following are possible polymorphic responses based
off discriminator "kind":

Expand Down Expand Up @@ -9263,7 +9264,7 @@ def get_operation(self, operation_id: str, **kwargs: Any) -> _models.OperationDe
# response body for status code(s): 200
response == operation_details
"""
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -9327,6 +9328,7 @@ def list_operations(self, **kwargs: Any) -> Iterable["_models.OperationDetails"]

Example:
.. code-block:: python

# The response is polymorphic. The following are possible polymorphic responses based
off discriminator "kind":

Expand Down Expand Up @@ -9624,7 +9626,7 @@ def list_operations(self, **kwargs: Any) -> Iterable["_models.OperationDetails"]

cls: ClsType[List[_models.OperationDetails]] = kwargs.pop("cls", None)

error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -9699,7 +9701,7 @@ def get_next(next_link=None):
def _build_classifier_initial( # pylint: disable=inconsistent-return-statements
self, build_request: Union[_models.BuildDocumentClassifierRequest, JSON, IO[bytes]], **kwargs: Any
) -> None:
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -10170,7 +10172,7 @@ def get_classifier(self, classifier_id: str, **kwargs: Any) -> _models.DocumentC
]
}
"""
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -10282,7 +10284,7 @@ def list_classifiers(self, **kwargs: Any) -> Iterable["_models.DocumentClassifie

cls: ClsType[List[_models.DocumentClassifierDetails]] = kwargs.pop("cls", None)

error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down Expand Up @@ -10366,7 +10368,7 @@ def delete_classifier( # pylint: disable=inconsistent-return-statements
:rtype: None
:raises ~azure.core.exceptions.HttpResponseError:
"""
error_map = {
error_map: MutableMapping[int, Type[HttpResponseError]] = {
401: ClientAuthenticationError,
404: ResourceNotFoundError,
409: ResourceExistsError,
Expand Down
Loading

0 comments on commit f07513c

Please sign in to comment.