Skip to content

Commit

Permalink
fix(python): correct type hint for write_fragments() (#3373)
Browse files Browse the repository at this point in the history
documents for new arguments of write_fragments method are missing. and
write_fragments is dependent typing now. I added two override signatures
to make the type checker happy.

---------

Co-authored-by: Will Jones <[email protected]>
  • Loading branch information
chenkovsky and wjones127 authored Jan 13, 2025
1 parent 47b0b6c commit cfeece4
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 10 deletions.
66 changes: 60 additions & 6 deletions python/python/lance/fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
Dict,
Iterator,
List,
Literal,
Optional,
Tuple,
Union,
overload,
)

import pyarrow as pa
Expand Down Expand Up @@ -672,12 +674,51 @@ def metadata(self) -> FragmentMetadata:
return self._fragment.metadata()


if TYPE_CHECKING:

@overload
def write_fragments(
data: ReaderLike,
dataset_uri: Union[str, Path, LanceDataset],
schema: Optional[pa.Schema] = None,
*,
return_transaction: Literal[True],
mode: str = "append",
max_rows_per_file: int = 1024 * 1024,
max_rows_per_group: int = 1024,
max_bytes_per_file: int = DEFAULT_MAX_BYTES_PER_FILE,
progress: Optional[FragmentWriteProgress] = None,
data_storage_version: Optional[str] = None,
use_legacy_format: Optional[bool] = None,
storage_options: Optional[Dict[str, str]] = None,
enable_move_stable_row_ids: bool = False,
) -> Transaction: ...

@overload
def write_fragments(
data: ReaderLike,
dataset_uri: Union[str, Path, LanceDataset],
schema: Optional[pa.Schema] = None,
*,
return_transaction: Literal[False] = False,
mode: str = "append",
max_rows_per_file: int = 1024 * 1024,
max_rows_per_group: int = 1024,
max_bytes_per_file: int = DEFAULT_MAX_BYTES_PER_FILE,
progress: Optional[FragmentWriteProgress] = None,
data_storage_version: Optional[str] = None,
use_legacy_format: Optional[bool] = None,
storage_options: Optional[Dict[str, str]] = None,
enable_move_stable_row_ids: bool = False,
) -> List[FragmentMetadata]: ...


def write_fragments(
data: ReaderLike,
dataset_uri: Union[str, Path, LanceDataset],
schema: Optional[pa.Schema] = None,
return_transaction: bool = False,
*,
return_transaction: bool = False,
mode: str = "append",
max_rows_per_file: int = 1024 * 1024,
max_rows_per_group: int = 1024,
Expand Down Expand Up @@ -705,6 +746,8 @@ def write_fragments(
schema : pa.Schema, optional
The schema of the data. If not specified, the schema will be inferred
from the data.
return_transaction: bool, default False
If it's true, the transaction will be returned.
mode : str, default "append"
The write mode. If "append" is specified, the data will be checked
against the existing dataset's schema. Otherwise, pass "create" or
Expand Down Expand Up @@ -733,13 +776,24 @@ def write_fragments(
storage_options : Optional[Dict[str, str]]
Extra options that make sense for a particular storage connection. This is
used to store connection parameters like credentials, endpoint, etc.
enable_move_stable_row_ids: bool
Experimental: if set to true, the writer will use move-stable row ids.
These row ids are stable after compaction operations, but not after updates.
This makes compaction more efficient, since with stable row ids no
secondary indices need to be updated to point to new row ids.
Returns
-------
List[FragmentMetadata]
A list of :class:`FragmentMetadata` for the fragments written. The
fragment ids are left as zero meaning they are not yet specified. They
will be assigned when the fragments are committed to a dataset.
List[FragmentMetadata] | Transaction
If return_transaction is False:
a list of :class:`FragmentMetadata` for the fragments written. The
fragment ids are left as zero meaning they are not yet specified. They
will be assigned when the fragments are committed to a dataset.
If return_transaction is True:
The write transaction. The type of transaction will correspond to
the mode parameter specified. This transaction can be passed to
:meth:`LanceDataset.commit`.
"""
from .dataset import LanceDataset

Expand Down
10 changes: 6 additions & 4 deletions python/python/lance/lance/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,9 @@ def _write_fragments(
max_rows_per_group: int,
max_bytes_per_file: int,
progress: Optional[FragmentWriteProgress],
data_storage_version=Optional[str],
storage_options=Optional[Dict[str, str]],
data_storage_version: Optional[str],
storage_options: Optional[Dict[str, str]],
enable_move_stable_row_ids: bool,
): ...
def _write_fragments_transaction(
dataset_uri: str | Path | _Dataset,
Expand All @@ -402,8 +403,9 @@ def _write_fragments_transaction(
max_rows_per_group: int,
max_bytes_per_file: int,
progress: Optional[FragmentWriteProgress],
data_storage_version=Optional[str],
storage_options=Optional[Dict[str, str]],
data_storage_version: Optional[str],
storage_options: Optional[Dict[str, str]],
enable_move_stable_row_ids: bool,
) -> Transaction: ...
def _json_to_schema(schema_json: str) -> pa.Schema: ...
def _schema_to_json(schema: pa.Schema) -> str: ...
Expand Down

0 comments on commit cfeece4

Please sign in to comment.