Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Scan Planning Endpoints to open api spec #9695

Merged
merged 12 commits into from
Sep 10, 2024
154 changes: 152 additions & 2 deletions open-api/rest-catalog-open-api.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ class ExpressionType(BaseModel):
__root__: str = Field(
...,
example=[
'true',
'false',
'eq',
'and',
'or',
Expand All @@ -118,6 +120,14 @@ class ExpressionType(BaseModel):
)


class TrueExpression(BaseModel):
type: ExpressionType


class FalseExpression(BaseModel):
type: ExpressionType


class Reference(BaseModel):
__root__: str = Field(..., example=['column-name'])

Expand Down Expand Up @@ -777,8 +787,8 @@ class ContentFile(BaseModel):
file_path: str = Field(..., alias='file-path')
file_format: FileFormat = Field(..., alias='file-format')
spec_id: int = Field(..., alias='spec-id')
partition: Optional[List[PrimitiveTypeValue]] = Field(
None,
partition: List[PrimitiveTypeValue] = Field(
...,
description='A list of partition field values ordered based on the fields of the partition spec specified by the `spec-id`',
example=[1, 'bar'],
)
Expand Down Expand Up @@ -808,6 +818,37 @@ class EqualityDeleteFile(ContentFile):
)


class GetTasksStatusRequest(BaseModel):
plan_id: str = Field(
..., alias='plan-id', description='id used to track status of `planTable`'
)


class CancelPlanRequest(BaseModel):
plan_id: str = Field(
..., alias='plan-id', description='id used to cancel `planTable` operation'
)


class FieldName(BaseModel):
__root__: str = Field(
...,
description='A field name that follows the Iceberg naming standard, and can be used in APIs like Java `Schema#findField(String name)`.\nThe nested field name follows these rules - nested struct fields are named by concatenating field names at each struct level using dot (`.`) delimiter, e.g. employer.contact_info.address.zip_code - nested fields in a map key are named using the keyword `key`, e.g. employee_address_map.key.first_name - nested fields in a map value are named using the keyword `value`, e.g. employee_address_map.value.zip_code - nested fields in a list are named using the keyword `element`, e.g. employees.element.first_name',
)


class PlanTask(BaseModel):
"""
An opaque JSON object that contains information provided by the REST server to be utilized by clients for distributed table scan planning; should be supplied as input in `RetrieveTasks` operation.
"""


class PlanStatus(BaseModel):
__root__: Literal['started', 'cancelled', 'failed'] = Field(
..., description='Represents the current status of the `planTable` operation.'
)


class CreateNamespaceRequest(BaseModel):
namespace: Namespace
properties: Optional[Dict[str, str]] = Field(
Expand Down Expand Up @@ -852,6 +893,14 @@ class ViewRequirement(BaseModel):
__root__: AssertViewUUID = Field(..., discriminator='type')


class CancelPlanResult(BaseModel):
"""
Used to indicate state of cancellation. If successful should return "cancelled" state.
"""

cancel_status: Optional[PlanStatus] = Field(None, alias='cancel-status')


class ReportMetricsRequest2(CommitReport):
report_type: str = Field(..., alias='report-type')

Expand Down Expand Up @@ -905,6 +954,16 @@ class DataFile(ContentFile):
)


class DeleteFile(BaseModel):
__root__: Union[PositionDeleteFile, EqualityDeleteFile] = Field(
..., discriminator='content'
)


class RetrieveTasksRequest(BaseModel):
plan_task: PlanTask = Field(..., alias='plan-task')


class Term(BaseModel):
__root__: Union[Reference, TransformTerm]

Expand Down Expand Up @@ -968,6 +1027,8 @@ class Type(BaseModel):

class Expression(BaseModel):
__root__: Union[
TrueExpression,
FalseExpression,
AndOrExpression,
NotExpression,
SetExpression,
Expand Down Expand Up @@ -1111,6 +1172,37 @@ class LoadTableResult(BaseModel):
config: Optional[Dict[str, str]] = None


class PlanTableResult(BaseModel):
"""
If the plan has not finished return a `plan-id`. If finished, the response will contain a list of `FileScanTask`, a list of `PlanTask`, or both.
"""

file_scan_tasks: Optional[List[FileScanTask]] = Field(None, alias='file-scan-tasks')
plan_tasks: Optional[List[PlanTask]] = Field(None, alias='plan-tasks')
plan_id: Optional[str] = Field(
None, alias='plan-id', description='id used to track progress of the plan'
)


class GetTasksStatusResult(BaseModel):
"""
If the plan has not finished return a `plan-status`. If the plan has finished can return a list of `FileScanTask`, a list of `PlanTask`, or both.
"""

file_scan_tasks: Optional[List[FileScanTask]] = Field(None, alias='file-scan-tasks')
plan_tasks: Optional[List[PlanTask]] = Field(None, alias='plan-tasks')
plan_status: Optional[PlanStatus] = Field(None, alias='plan-status')


class RetrieveTasksResult(BaseModel):
"""
Used to fetch file scan tasks for a given `planTask`. Can also return additional plan-tasks.
"""

file_scan_tasks: Optional[List[FileScanTask]] = Field(None, alias='file-scan-tasks')
plan_tasks: Optional[List[PlanTask]] = Field(None, alias='plan-tasks')


class CommitTableRequest(BaseModel):
identifier: Optional[TableIdentifier] = Field(
None,
Expand Down Expand Up @@ -1197,6 +1289,61 @@ class CommitTableResponse(BaseModel):
metadata: TableMetadata


class PlanTableRequest(BaseModel):
snapshot_id: Optional[int] = Field(
None,
alias='snapshot-id',
description='The ID of the snapshot to use for the table scan.',
)
select: Optional[List[FieldName]] = Field(
None,
description='A list of fields in schema that are selected in a table scan. When not specified, all columns in the requested schema should be selected.',
)
filter: Optional[Expression] = Field(
None,
description='an unbounded expression to describe the filters to apply to a table scan,',
)
case_sensitive: Optional[bool] = Field(
True,
alias='case-sensitive',
description='If field selection and filtering should be case sensitive',
)
use_snapshot_schema: Optional[bool] = Field(
False,
alias='use-snapshot-schema',
description='If the client is performing time travel, the snapshot schema should be used. For clients performing a plan for a branch, should default to using the table schema.',
)
start_snapshot_id: Optional[int] = Field(
None,
alias='start-snapshot-id',
description='The ID of the starting snapshot of the incremental scan',
)
end_snapshot_id: Optional[int] = Field(
None,
alias='end-snapshot-id',
description='The ID of the inclusive ending snapshot of the incremental scan. If not specified, the snapshot at the main branch head will be used as the end snapshot.',
)
stats_fields: Optional[List[FieldName]] = Field(
None,
alias='stats-fields',
description='A list of fields that the client requests the server to send statistics in each `FileScanTask` returned in the response',
)


class FileScanTask(BaseModel):
data_file: DataFile = Field(..., alias='data-file')
delete_files_references: Optional[List[int]] = Field(
None,
alias='delete-files-references',
description='A list of positional indices that correspond to a delete files array.',
)
residual_filter: Optional[Expression] = Field(
None,
alias='residual-filter',
description='An optional filter to be applied to rows in this file scan task. If the residual is not present, the client should calculate this or the original filter should be used.',
)


class Schema(StructType):
schema_id: Optional[int] = Field(None, alias='schema-id')
identifier_field_ids: Optional[List[int]] = Field(
Expand All @@ -1215,6 +1362,9 @@ class ReportMetricsRequest1(ScanReport):
TableMetadata.update_forward_refs()
ViewMetadata.update_forward_refs()
AddSchemaUpdate.update_forward_refs()
PlanTableResult.update_forward_refs()
GetTasksStatusResult.update_forward_refs()
RetrieveTasksResult.update_forward_refs()
CreateTableRequest.update_forward_refs()
CreateViewRequest.update_forward_refs()
ReportMetricsRequest.update_forward_refs()
Loading
Loading