Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Scan Planning Endpoints to open api spec #9695

Merged
merged 12 commits into from
Sep 10, 2024
229 changes: 226 additions & 3 deletions open-api/rest-catalog-open-api.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class Namespace(BaseModel):
class PageToken(BaseModel):
__root__: Optional[str] = Field(
None,
description='An opaque token that allows clients to make use of pagination for list APIs (e.g. ListTables). Clients may initiate the first paginated request by sending an empty query parameter `pageToken` to the server.\nServers that support pagination should identify the `pageToken` parameter and return a `next-page-token` in the response if there are more results available. After the initial request, the value of `next-page-token` from each response must be used as the `pageToken` parameter value for the next request. The server must return `null` value for the `next-page-token` in the last response.\nServers that support pagination must return all results in a single response with the value of `next-page-token` set to `null` if the query parameter `pageToken` is not set in the request.\nServers that do not support pagination should ignore the `pageToken` parameter and return all results in a single response. The `next-page-token` must be omitted from the response.\nClients must interpret either `null` or missing response value of `next-page-token` as the end of the listing results.',
description='An opaque token that allows clients to make use of pagination for list APIs (e.g. ListTables) as well as for scan-planning APIs (e.g PlanTable). Clients may initiate the first paginated request by sending an empty query parameter `pageToken` to the server.\nServers that support pagination should identify the `pageToken` parameter and return a `next-page-token` in the response if there are more results available. After the initial request, the value of `next-page-token` from each response must be used as the `pageToken` parameter value for the next request. The server must return `null` value for the `next-page-token` in the last response.\nServers that support pagination must return all results in a single response with the value of `next-page-token` set to `null` if the query parameter `pageToken` is not set in the request.\nServers that do not support pagination should ignore the `pageToken` parameter and return all results in a single response. The `next-page-token` must be omitted from the response.\nClients must interpret either `null` or missing response value of `next-page-token` as the end of the listing results.',
)


Expand All @@ -97,6 +97,8 @@ class ExpressionType(BaseModel):
__root__: str = Field(
...,
example=[
'true',
'false',
'eq',
'and',
'or',
Expand All @@ -118,6 +120,14 @@ class ExpressionType(BaseModel):
)


class TrueExpression(BaseModel):
type: ExpressionType


class FalseExpression(BaseModel):
type: ExpressionType


class Reference(BaseModel):
__root__: str = Field(..., example=['column-name'])

Expand Down Expand Up @@ -777,8 +787,8 @@ class ContentFile(BaseModel):
file_path: str = Field(..., alias='file-path')
file_format: FileFormat = Field(..., alias='file-format')
spec_id: int = Field(..., alias='spec-id')
partition: Optional[List[PrimitiveTypeValue]] = Field(
None,
partition: List[PrimitiveTypeValue] = Field(
...,
description='A list of partition field values ordered based on the fields of the partition spec specified by the `spec-id`',
example=[1, 'bar'],
)
Expand Down Expand Up @@ -808,6 +818,23 @@ class EqualityDeleteFile(ContentFile):
)


class DeleteFile(BaseModel):
__root__: Union[PositionDeleteFile, EqualityDeleteFile]


class FieldName(BaseModel):
__root__: str = Field(
...,
description='A field name that follows the Iceberg naming standard, and can be used in APIs like Java `Schema#findField(String name)`.\nThe nested field name follows these rules - nested struct fields are named by concatenating field names at each struct level using dot (`.`) delimiter, e.g. employer.contact_info.address.zip_code - nested fields in a map key are named using the keyword `key`, e.g. employee_address_map.key.first_name - nested fields in a map value are named using the keyword `value`, e.g. employee_address_map.value.zip_code - nested fields in a list are named using the keyword `element`, e.g. employees.element.first_name',
)


class PlanTask(BaseModel):
"""
An opaque JSON object that contains information provided by the REST server to be utilized by clients for distributed table scan planning; should be supplied as input in `PlanTable` operation.
"""


class CreateNamespaceRequest(BaseModel):
namespace: Namespace
properties: Optional[Dict[str, str]] = Field(
Expand Down Expand Up @@ -852,6 +879,11 @@ class ViewRequirement(BaseModel):
__root__: AssertViewUUID = Field(..., discriminator='type')


class PreplanTableResult(BaseModel):
plan_tasks: List[PlanTask] = Field(..., alias='plan-tasks')
next_page_token: Optional[PageToken] = Field(None, alias='next-page-token')


class ReportMetricsRequest2(CommitReport):
report_type: str = Field(..., alias='report-type')

Expand Down Expand Up @@ -968,6 +1000,8 @@ class Type(BaseModel):

class Expression(BaseModel):
__root__: Union[
TrueExpression,
FalseExpression,
AndOrExpression,
NotExpression,
SetExpression,
Expand Down Expand Up @@ -1111,6 +1145,16 @@ class LoadTableResult(BaseModel):
config: Optional[Dict[str, str]] = None


class PlanTableResult(BaseModel):
file_scan_tasks: List[FileScanTask] = Field(..., alias='file-scan-tasks')
delete_files: Optional[List[DeleteFile]] = Field(
None,
alias='delete-files',
description='A list of delete files that can be either positional or equality. If the client does not recognize the type of delete file being returned by the service it should immediately throw an exception that it does not support this type.',
)
next_page_token: Optional[PageToken] = Field(None, alias='next-page-token')


class CommitTableRequest(BaseModel):
identifier: Optional[TableIdentifier] = Field(
None,
Expand Down Expand Up @@ -1197,6 +1241,184 @@ class CommitTableResponse(BaseModel):
metadata: TableMetadata


class PreplanTableRequest1(BaseModel):
snapshot_id: int = Field(
...,
alias='snapshot-id',
description='The ID of the snapshot to use for the table scan.',
)
select: Optional[List[FieldName]] = Field(
None,
description='A list of fields in schema that are selected in a table scan. When not specified, all columns in the requested schema should be selected.',
)
filter: Optional[Expression] = Field(
None,
description='an unbounded expression to describe the filters to apply to a table scan,',
)
case_sensitive: Optional[bool] = Field(
True,
alias='case-sensitive',
description='If field selection and filtering should be case sensitive',
)
use_snapshot_schema: Optional[bool] = Field(
False,
alias='use-snapshot-schema',
description='If the client is performing time travel, the snapshot schema should be used. For clients performing a plan for a branch, should default to using the table schema.',
)
start_snapshot_id: Optional[int] = Field(
None,
alias='start-snapshot-id',
description='The ID of the starting snapshot of the incremental scan',
)
end_snapshot_id: Optional[int] = Field(
None,
alias='end-snapshot-id',
description='The ID of the inclusive ending snapshot of the incremental scan. If not specified, the snapshot at the main branch head will be used as the end snapshot.',
)


class PreplanTableRequest2(BaseModel):
snapshot_id: Optional[int] = Field(
None,
alias='snapshot-id',
description='The ID of the snapshot to use for the table scan.',
)
select: Optional[List[FieldName]] = Field(
None,
description='A list of fields in schema that are selected in a table scan. When not specified, all columns in the requested schema should be selected.',
)
filter: Optional[Expression] = Field(
None,
description='an unbounded expression to describe the filters to apply to a table scan,',
)
case_sensitive: Optional[bool] = Field(
True,
alias='case-sensitive',
description='If field selection and filtering should be case sensitive',
)
use_snapshot_schema: Optional[bool] = Field(
False,
alias='use-snapshot-schema',
description='If the client is performing time travel, the snapshot schema should be used. For clients performing a plan for a branch, should default to using the table schema.',
)
start_snapshot_id: int = Field(
...,
alias='start-snapshot-id',
description='The ID of the starting snapshot of the incremental scan',
)
end_snapshot_id: int = Field(
...,
alias='end-snapshot-id',
description='The ID of the inclusive ending snapshot of the incremental scan. If not specified, the snapshot at the main branch head will be used as the end snapshot.',
)


class PreplanTableRequest(BaseModel):
__root__: Union[PreplanTableRequest1, PreplanTableRequest2]


class PlanTableRequest1(BaseModel):
plan_task: Optional[PlanTask] = Field(None, alias='plan-task')
snapshot_id: int = Field(
...,
alias='snapshot-id',
description='The ID of the snapshot to use for the table scan.',
)
select: Optional[List[FieldName]] = Field(
None,
description='A list of fields in schema that are selected in a table scan. When not specified, all columns in the requested schema should be selected.',
)
filter: Optional[Expression] = Field(
None,
description='an unbounded expression to describe the filters to apply to a table scan,',
)
case_sensitive: Optional[bool] = Field(
True,
alias='case-sensitive',
description='If field selection and filtering should be case sensitive',
)
use_snapshot_schema: Optional[bool] = Field(
False,
alias='use-snapshot-schema',
description='If the client is performing time travel, the snapshot schema should be used. For clients performing a plan for a branch, should default to using the table schema.',
)
start_snapshot_id: Optional[int] = Field(
None,
alias='start-snapshot-id',
description='The ID of the starting snapshot of the incremental scan',
)
end_snapshot_id: Optional[int] = Field(
None,
alias='end-snapshot-id',
description='The ID of the inclusive ending snapshot of the incremental scan. If not specified, the snapshot at the main branch head will be used as the end snapshot.',
)
stats_fields: Optional[List[FieldName]] = Field(
None,
alias='stats-fields',
description='A list of fields that the client requests the server to send statistics in each `FileScanTask` returned in the response',
)


class PlanTableRequest2(BaseModel):
plan_task: Optional[PlanTask] = Field(None, alias='plan-task')
snapshot_id: Optional[int] = Field(
None,
alias='snapshot-id',
description='The ID of the snapshot to use for the table scan.',
)
select: Optional[List[FieldName]] = Field(
None,
description='A list of fields in schema that are selected in a table scan. When not specified, all columns in the requested schema should be selected.',
)
filter: Optional[Expression] = Field(
None,
description='an unbounded expression to describe the filters to apply to a table scan,',
)
case_sensitive: Optional[bool] = Field(
True,
alias='case-sensitive',
description='If field selection and filtering should be case sensitive',
)
use_snapshot_schema: Optional[bool] = Field(
False,
alias='use-snapshot-schema',
description='If the client is performing time travel, the snapshot schema should be used. For clients performing a plan for a branch, should default to using the table schema.',
)
start_snapshot_id: int = Field(
...,
alias='start-snapshot-id',
description='The ID of the starting snapshot of the incremental scan',
)
end_snapshot_id: int = Field(
...,
alias='end-snapshot-id',
description='The ID of the inclusive ending snapshot of the incremental scan. If not specified, the snapshot at the main branch head will be used as the end snapshot.',
)
stats_fields: Optional[List[FieldName]] = Field(
None,
alias='stats-fields',
description='A list of fields that the client requests the server to send statistics in each `FileScanTask` returned in the response',
)


class PlanTableRequest(BaseModel):
__root__: Union[PlanTableRequest1, PlanTableRequest2]


class FileScanTask(BaseModel):
data_file: DataFile = Field(..., alias='data-file')
delete_files_references: Optional[List[int]] = Field(
None,
alias='delete-files-references',
description='A list of positional indices that correspond to a delete files array.',
)
residual_filter: Optional[Expression] = Field(
None,
alias='residual-filter',
description='An optional filter to be applied to rows in this file scan task. If the residual is not present, the client should calculate this or the original filter should be used.',
)


class Schema(StructType):
schema_id: Optional[int] = Field(None, alias='schema-id')
identifier_field_ids: Optional[List[int]] = Field(
Expand All @@ -1215,6 +1437,7 @@ class ReportMetricsRequest1(ScanReport):
TableMetadata.update_forward_refs()
ViewMetadata.update_forward_refs()
AddSchemaUpdate.update_forward_refs()
PlanTableResult.update_forward_refs()
CreateTableRequest.update_forward_refs()
CreateViewRequest.update_forward_refs()
ReportMetricsRequest.update_forward_refs()
Loading