apache · amogh-jahagirdar · Sep 10, 2024 · Feb 9, 2024 · Jul 25, 2024 · Jul 26, 2024
diff --git a/open-api/rest-catalog-open-api.py b/open-api/rest-catalog-open-api.py
@@ -80,7 +80,7 @@ class Namespace(BaseModel):
 class PageToken(BaseModel):
     __root__: Optional[str] = Field(
         None,
-        description='An opaque token that allows clients to make use of pagination for list APIs (e.g. ListTables). Clients may initiate the first paginated request by sending an empty query parameter `pageToken` to the server.\nServers that support pagination should identify the `pageToken` parameter and return a `next-page-token` in the response if there are more results available.  After the initial request, the value of `next-page-token` from each response must be used as the `pageToken` parameter value for the next request. The server must return `null` value for the `next-page-token` in the last response.\nServers that support pagination must return all results in a single response with the value of `next-page-token` set to `null` if the query parameter `pageToken` is not set in the request.\nServers that do not support pagination should ignore the `pageToken` parameter and return all results in a single response. The `next-page-token` must be omitted from the response.\nClients must interpret either `null` or missing response value of `next-page-token` as the end of the listing results.',
+        description='An opaque token that allows clients to make use of pagination for list APIs (e.g. ListTables) as well as for scan-planning APIs (e.g PlanTable). Clients may initiate the first paginated request by sending an empty query parameter `pageToken` to the server.\nServers that support pagination should identify the `pageToken` parameter and return a `next-page-token` in the response if there are more results available.  After the initial request, the value of `next-page-token` from each response must be used as the `pageToken` parameter value for the next request. The server must return `null` value for the `next-page-token` in the last response.\nServers that support pagination must return all results in a single response with the value of `next-page-token` set to `null` if the query parameter `pageToken` is not set in the request.\nServers that do not support pagination should ignore the `pageToken` parameter and return all results in a single response. The `next-page-token` must be omitted from the response.\nClients must interpret either `null` or missing response value of `next-page-token` as the end of the listing results.',
     )
 
 
@@ -97,6 +97,8 @@ class ExpressionType(BaseModel):
     __root__: str = Field(
         ...,
         example=[
+            'true',
+            'false',
             'eq',
             'and',
             'or',
@@ -118,6 +120,14 @@ class ExpressionType(BaseModel):
     )
 
 
+class TrueExpression(BaseModel):
+    type: ExpressionType
+
+
+class FalseExpression(BaseModel):
+    type: ExpressionType
+
+
 class Reference(BaseModel):
     __root__: str = Field(..., example=['column-name'])
 
@@ -777,8 +787,8 @@ class ContentFile(BaseModel):
     file_path: str = Field(..., alias='file-path')
     file_format: FileFormat = Field(..., alias='file-format')
     spec_id: int = Field(..., alias='spec-id')
-    partition: Optional[List[PrimitiveTypeValue]] = Field(
-        None,
+    partition: List[PrimitiveTypeValue] = Field(
+        ...,
         description='A list of partition field values ordered based on the fields of the partition spec specified by the `spec-id`',
         example=[1, 'bar'],
     )
@@ -808,6 +818,23 @@ class EqualityDeleteFile(ContentFile):
     )
 
 
+class DeleteFile(BaseModel):
+    __root__: Union[PositionDeleteFile, EqualityDeleteFile]
+
+
+class FieldName(BaseModel):
+    __root__: str = Field(
+        ...,
+        description='A field name that follows the Iceberg naming standard, and can be used in APIs like Java `Schema#findField(String name)`.\nThe nested field name follows these rules - nested struct fields are named by concatenating field names at each struct level using dot (`.`) delimiter, e.g. employer.contact_info.address.zip_code - nested fields in a map key are named using the keyword `key`, e.g. employee_address_map.key.first_name - nested fields in a map value are named using the keyword `value`, e.g. employee_address_map.value.zip_code - nested fields in a list are named using the keyword `element`, e.g. employees.element.first_name',
+    )
+
+
+class PlanTask(BaseModel):
+    """
+    An opaque JSON object that contains information provided by the REST server to be utilized by clients for distributed table scan planning; should be supplied as input in `PlanTable` operation.
+    """
+
+
 class CreateNamespaceRequest(BaseModel):
     namespace: Namespace
     properties: Optional[Dict[str, str]] = Field(
@@ -852,6 +879,11 @@ class ViewRequirement(BaseModel):
     __root__: AssertViewUUID = Field(..., discriminator='type')
 
 
+class PreplanTableResult(BaseModel):
+    plan_tasks: List[PlanTask] = Field(..., alias='plan-tasks')
+    next_page_token: Optional[PageToken] = Field(None, alias='next-page-token')
+
+
 class ReportMetricsRequest2(CommitReport):
     report_type: str = Field(..., alias='report-type')
 
@@ -968,6 +1000,8 @@ class Type(BaseModel):
 
 class Expression(BaseModel):
     __root__: Union[
+        TrueExpression,
+        FalseExpression,
         AndOrExpression,
         NotExpression,
         SetExpression,
@@ -1111,6 +1145,16 @@ class LoadTableResult(BaseModel):
     config: Optional[Dict[str, str]] = None
 
 
+class PlanTableResult(BaseModel):
+    file_scan_tasks: List[FileScanTask] = Field(..., alias='file-scan-tasks')
+    delete_files: Optional[List[DeleteFile]] = Field(
+        None,
+        alias='delete-files',
+        description='A list of delete files that can be either positional or equality. If the client does not recognize the type of delete file being returned by the service it should immediately throw an exception that it does not support this type.',
+    )
+    next_page_token: Optional[PageToken] = Field(None, alias='next-page-token')
+
+
 class CommitTableRequest(BaseModel):
     identifier: Optional[TableIdentifier] = Field(
         None,
@@ -1197,6 +1241,184 @@ class CommitTableResponse(BaseModel):
     metadata: TableMetadata
 
 
+class PreplanTableRequest1(BaseModel):
+    snapshot_id: int = Field(
+        ...,
+        alias='snapshot-id',
+        description='The ID of the snapshot to use for the table scan.',
+    )
+    select: Optional[List[FieldName]] = Field(
+        None,
+        description='A list of fields in schema that are selected in a table scan. When not specified, all columns in the requested schema should be selected.',
+    )
+    filter: Optional[Expression] = Field(
+        None,
+        description='an unbounded expression to describe the filters to apply to a table scan,',
+    )
+    case_sensitive: Optional[bool] = Field(
+        True,
+        alias='case-sensitive',
+        description='If field selection and filtering should be case sensitive',
+    )
+    use_snapshot_schema: Optional[bool] = Field(
+        False,
+        alias='use-snapshot-schema',
+        description='If the client is performing time travel, the snapshot schema should be used. For clients performing a plan for a branch, should default to using the table schema.',
+    )
+    start_snapshot_id: Optional[int] = Field(
+        None,
+        alias='start-snapshot-id',
+        description='The ID of the starting snapshot of the incremental scan',
+    )
+    end_snapshot_id: Optional[int] = Field(
+        None,
+        alias='end-snapshot-id',
+        description='The ID of the inclusive ending snapshot of the incremental scan. If not specified, the snapshot at the main branch head will be used as the end snapshot.',
+    )
+
+
+class PreplanTableRequest2(BaseModel):
+    snapshot_id: Optional[int] = Field(
+        None,
+        alias='snapshot-id',
+        description='The ID of the snapshot to use for the table scan.',
+    )
+    select: Optional[List[FieldName]] = Field(
+        None,
+        description='A list of fields in schema that are selected in a table scan. When not specified, all columns in the requested schema should be selected.',
+    )
+    filter: Optional[Expression] = Field(
+        None,
+        description='an unbounded expression to describe the filters to apply to a table scan,',
+    )
+    case_sensitive: Optional[bool] = Field(
+        True,
+        alias='case-sensitive',
+        description='If field selection and filtering should be case sensitive',
+    )
+    use_snapshot_schema: Optional[bool] = Field(
+        False,
+        alias='use-snapshot-schema',
+        description='If the client is performing time travel, the snapshot schema should be used. For clients performing a plan for a branch, should default to using the table schema.',
+    )
+    start_snapshot_id: int = Field(
+        ...,
+        alias='start-snapshot-id',
+        description='The ID of the starting snapshot of the incremental scan',
+    )
+    end_snapshot_id: int = Field(
+        ...,
+        alias='end-snapshot-id',
+        description='The ID of the inclusive ending snapshot of the incremental scan. If not specified, the snapshot at the main branch head will be used as the end snapshot.',
+    )
+
+
+class PreplanTableRequest(BaseModel):
+    __root__: Union[PreplanTableRequest1, PreplanTableRequest2]
+
+
+class PlanTableRequest1(BaseModel):
+    plan_task: Optional[PlanTask] = Field(None, alias='plan-task')
+    snapshot_id: int = Field(
+        ...,
+        alias='snapshot-id',
+        description='The ID of the snapshot to use for the table scan.',
+    )
+    select: Optional[List[FieldName]] = Field(
+        None,
+        description='A list of fields in schema that are selected in a table scan. When not specified, all columns in the requested schema should be selected.',
+    )
+    filter: Optional[Expression] = Field(
+        None,
+        description='an unbounded expression to describe the filters to apply to a table scan,',
+    )
+    case_sensitive: Optional[bool] = Field(
+        True,
+        alias='case-sensitive',
+        description='If field selection and filtering should be case sensitive',
+    )
+    use_snapshot_schema: Optional[bool] = Field(
+        False,
+        alias='use-snapshot-schema',
+        description='If the client is performing time travel, the snapshot schema should be used. For clients performing a plan for a branch, should default to using the table schema.',
+    )
+    start_snapshot_id: Optional[int] = Field(
+        None,
+        alias='start-snapshot-id',
+        description='The ID of the starting snapshot of the incremental scan',
+    )
+    end_snapshot_id: Optional[int] = Field(
+        None,
+        alias='end-snapshot-id',
+        description='The ID of the inclusive ending snapshot of the incremental scan. If not specified, the snapshot at the main branch head will be used as the end snapshot.',
+    )
+    stats_fields: Optional[List[FieldName]] = Field(
+        None,
+        alias='stats-fields',
+        description='A list of fields that the client requests the server to send statistics in each `FileScanTask` returned in the response',
+    )
+
+
+class PlanTableRequest2(BaseModel):
+    plan_task: Optional[PlanTask] = Field(None, alias='plan-task')
+    snapshot_id: Optional[int] = Field(
+        None,
+        alias='snapshot-id',
+        description='The ID of the snapshot to use for the table scan.',
+    )
+    select: Optional[List[FieldName]] = Field(
+        None,
+        description='A list of fields in schema that are selected in a table scan. When not specified, all columns in the requested schema should be selected.',
+    )
+    filter: Optional[Expression] = Field(
+        None,
+        description='an unbounded expression to describe the filters to apply to a table scan,',
+    )
+    case_sensitive: Optional[bool] = Field(
+        True,
+        alias='case-sensitive',
+        description='If field selection and filtering should be case sensitive',
+    )
+    use_snapshot_schema: Optional[bool] = Field(
+        False,
+        alias='use-snapshot-schema',
+        description='If the client is performing time travel, the snapshot schema should be used. For clients performing a plan for a branch, should default to using the table schema.',
+    )
+    start_snapshot_id: int = Field(
+        ...,
+        alias='start-snapshot-id',
+        description='The ID of the starting snapshot of the incremental scan',
+    )
+    end_snapshot_id: int = Field(
+        ...,
+        alias='end-snapshot-id',
+        description='The ID of the inclusive ending snapshot of the incremental scan. If not specified, the snapshot at the main branch head will be used as the end snapshot.',
+    )
+    stats_fields: Optional[List[FieldName]] = Field(
+        None,
+        alias='stats-fields',
+        description='A list of fields that the client requests the server to send statistics in each `FileScanTask` returned in the response',
+    )
+
+
+class PlanTableRequest(BaseModel):
+    __root__: Union[PlanTableRequest1, PlanTableRequest2]
+
+
+class FileScanTask(BaseModel):
+    data_file: DataFile = Field(..., alias='data-file')
+    delete_files_references: Optional[List[int]] = Field(
+        None,
+        alias='delete-files-references',
+        description='A list of positional indices that correspond to a delete files array.',
+    )
+    residual_filter: Optional[Expression] = Field(
+        None,
+        alias='residual-filter',
+        description='An optional filter to be applied to rows in this file scan task. If the residual is not present, the client should calculate this or the original filter should be used.',
+    )
+
+
 class Schema(StructType):
     schema_id: Optional[int] = Field(None, alias='schema-id')
     identifier_field_ids: Optional[List[int]] = Field(
@@ -1215,6 +1437,7 @@ class ReportMetricsRequest1(ScanReport):
 TableMetadata.update_forward_refs()
 ViewMetadata.update_forward_refs()
 AddSchemaUpdate.update_forward_refs()
+PlanTableResult.update_forward_refs()
 CreateTableRequest.update_forward_refs()
 CreateViewRequest.update_forward_refs()
 ReportMetricsRequest.update_forward_refs()