Skip to content

Commit

Permalink
OpenAPI: Add Scan Planning Endpoints to REST spec (apache#9695)
Browse files Browse the repository at this point in the history
  • Loading branch information
rahil-c authored and zachdisc committed Dec 12, 2024
1 parent 2d35932 commit 6fcb18b
Show file tree
Hide file tree
Showing 2 changed files with 767 additions and 5 deletions.
195 changes: 193 additions & 2 deletions open-api/rest-catalog-open-api.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ class ExpressionType(BaseModel):
__root__: str = Field(
...,
example=[
'true',
'false',
'eq',
'and',
'or',
Expand All @@ -129,6 +131,14 @@ class ExpressionType(BaseModel):
)


class TrueExpression(BaseModel):
type: ExpressionType


class FalseExpression(BaseModel):
type: ExpressionType


class Reference(BaseModel):
__root__: str = Field(..., example=['column-name'])

Expand Down Expand Up @@ -457,6 +467,12 @@ class AssertViewUUID(BaseModel):
uuid: str


class PlanStatus(BaseModel):
__root__: Literal['completed', 'submitted', 'cancelled', 'failed'] = Field(
..., description='Status of a server-side planning operation'
)


class RegisterTableRequest(BaseModel):
name: str
metadata_location: str = Field(..., alias='metadata-location')
Expand Down Expand Up @@ -808,8 +824,8 @@ class ContentFile(BaseModel):
file_path: str = Field(..., alias='file-path')
file_format: FileFormat = Field(..., alias='file-format')
spec_id: int = Field(..., alias='spec-id')
partition: Optional[List[PrimitiveTypeValue]] = Field(
None,
partition: List[PrimitiveTypeValue] = Field(
...,
description='A list of partition field values ordered based on the fields of the partition spec specified by the `spec-id`',
example=[1, 'bar'],
)
Expand Down Expand Up @@ -839,6 +855,20 @@ class EqualityDeleteFile(ContentFile):
)


class FieldName(BaseModel):
__root__: str = Field(
...,
description='A full field name (including parent field names), such as those passed in APIs like Java `Schema#findField(String name)`.\nThe nested field name follows these rules - Nested struct fields are named by concatenating field names at each struct level using dot (`.`) delimiter, e.g. employer.contact_info.address.zip_code - Nested fields in a map key are named using the keyword `key`, e.g. employee_address_map.key.first_name - Nested fields in a map value are named using the keyword `value`, e.g. employee_address_map.value.zip_code - Nested fields in a list are named using the keyword `element`, e.g. employees.element.first_name',
)


class PlanTask(BaseModel):
__root__: str = Field(
...,
description='An opaque string provided by the REST server that represents a unit of work to produce file scan tasks for scan planning. This allows clients to fetch tasks across multiple requests to accommodate large result sets.',
)


class CreateNamespaceRequest(BaseModel):
namespace: Namespace
properties: Optional[Dict[str, str]] = Field(
Expand Down Expand Up @@ -883,6 +913,29 @@ class ViewRequirement(BaseModel):
__root__: AssertViewUUID = Field(..., discriminator='type')


class FailedPlanningResult(IcebergErrorResponse):
"""
Failed server-side planning result
"""

status: Literal['failed']


class AsyncPlanningResult(BaseModel):
status: Literal['submitted']
plan_id: Optional[str] = Field(
None, alias='plan-id', description='ID used to track a planning request'
)


class EmptyPlanningResult(BaseModel):
"""
Empty server-side planning result
"""

status: Literal['cancelled']


class ReportMetricsRequest2(CommitReport):
report_type: str = Field(..., alias='report-type')

Expand Down Expand Up @@ -936,6 +989,16 @@ class DataFile(ContentFile):
)


class DeleteFile(BaseModel):
__root__: Union[PositionDeleteFile, EqualityDeleteFile] = Field(
..., discriminator='content'
)


class FetchScanTasksRequest(BaseModel):
plan_task: PlanTask = Field(..., alias='plan-task')


class Term(BaseModel):
__root__: Union[Reference, TransformTerm]

Expand Down Expand Up @@ -999,6 +1062,8 @@ class Type(BaseModel):

class Expression(BaseModel):
__root__: Union[
TrueExpression,
FalseExpression,
AndOrExpression,
NotExpression,
SetExpression,
Expand Down Expand Up @@ -1141,6 +1206,52 @@ class LoadTableResult(BaseModel):
config: Optional[Dict[str, str]] = None


class ScanTasks(BaseModel):
"""
Scan and planning tasks for server-side scan planning
- `plan-tasks` contains opaque units of planning work
- `file-scan-tasks` contains a partial or complete list of table scan tasks
- `delete-files` contains delete files referenced by file scan tasks
Each plan task must be passed to the fetchScanTasks endpoint to fetch the file scan tasks for the plan task.
The list of delete files must contain all delete files referenced by the file scan tasks.
"""

delete_files: Optional[List[DeleteFile]] = Field(
None,
alias='delete-files',
description='Delete files referenced by file scan tasks',
)
file_scan_tasks: Optional[List[FileScanTask]] = Field(None, alias='file-scan-tasks')
plan_tasks: Optional[List[PlanTask]] = Field(None, alias='plan-tasks')


class FetchPlanningResult(BaseModel):
__root__: Union[
CompletedPlanningResult, FailedPlanningResult, EmptyPlanningResult
] = Field(
...,
description='Result of server-side scan planning for fetchPlanningResult',
discriminator='status',
)


class PlanTableScanResult(BaseModel):
__root__: Union[
CompletedPlanningWithIDResult,
FailedPlanningResult,
AsyncPlanningResult,
EmptyPlanningResult,
] = Field(
...,
description='Result of server-side scan planning for planTableScan',
discriminator='status',
)


class CommitTableRequest(BaseModel):
identifier: Optional[TableIdentifier] = Field(
None,
Expand Down Expand Up @@ -1227,24 +1338,104 @@ class CommitTableResponse(BaseModel):
metadata: TableMetadata


class PlanTableScanRequest(BaseModel):
snapshot_id: Optional[int] = Field(
None,
alias='snapshot-id',
description='Identifier for the snapshot to scan in a point-in-time scan',
)
select: Optional[List[FieldName]] = Field(
None, description='List of selected schema fields'
)
filter: Optional[Expression] = Field(
None, description='Expression used to filter the table data'
)
case_sensitive: Optional[bool] = Field(
True,
alias='case-sensitive',
description='Enables case sensitive field matching for filter and select',
)
use_snapshot_schema: Optional[bool] = Field(
False,
alias='use-snapshot-schema',
description='Whether to use the schema at the time the snapshot was written.\nWhen time travelling, the snapshot schema should be used (true). When scanning a branch, the table schema should be used (false).',
)
start_snapshot_id: Optional[int] = Field(
None,
alias='start-snapshot-id',
description='Starting snapshot ID for an incremental scan (exclusive)',
)
end_snapshot_id: Optional[int] = Field(
None,
alias='end-snapshot-id',
description='Ending snapshot ID for an incremental scan (inclusive).\nRequired when start-snapshot-id is specified.',
)
stats_fields: Optional[List[FieldName]] = Field(
None,
alias='stats-fields',
description='List of fields for which the service should send column stats.',
)


class FileScanTask(BaseModel):
data_file: DataFile = Field(..., alias='data-file')
delete_file_references: Optional[List[int]] = Field(
None,
alias='delete-file-references',
description='A list of indices in the delete files array (0-based)',
)
residual_filter: Optional[Expression] = Field(
None,
alias='residual-filter',
description='An optional filter to be applied to rows in this file scan task.\nIf the residual is not present, the client must produce the residual or use the original filter.',
)


class Schema(StructType):
schema_id: Optional[int] = Field(None, alias='schema-id')
identifier_field_ids: Optional[List[int]] = Field(
None, alias='identifier-field-ids'
)


class CompletedPlanningResult(ScanTasks):
"""
Completed server-side planning result
"""

status: Literal['completed']


class FetchScanTasksResult(ScanTasks):
"""
Response schema for fetchScanTasks
"""


class ReportMetricsRequest1(ScanReport):
report_type: str = Field(..., alias='report-type')


class CompletedPlanningWithIDResult(CompletedPlanningResult):
plan_id: Optional[str] = Field(
None, alias='plan-id', description='ID used to track a planning request'
)
status: Literal['completed']


StructField.update_forward_refs()
ListType.update_forward_refs()
MapType.update_forward_refs()
Expression.update_forward_refs()
TableMetadata.update_forward_refs()
ViewMetadata.update_forward_refs()
AddSchemaUpdate.update_forward_refs()
ScanTasks.update_forward_refs()
FetchPlanningResult.update_forward_refs()
PlanTableScanResult.update_forward_refs()
CreateTableRequest.update_forward_refs()
CreateViewRequest.update_forward_refs()
ReportMetricsRequest.update_forward_refs()
CompletedPlanningResult.update_forward_refs()
FetchScanTasksResult.update_forward_refs()
CompletedPlanningWithIDResult.update_forward_refs()
Loading

0 comments on commit 6fcb18b

Please sign in to comment.