diff --git a/src/safeds/data/tabular/containers/_experimental_polars_row.py b/src/safeds/data/tabular/containers/_experimental_polars_row.py index c6b43b06c..1edc0b961 100644 --- a/src/safeds/data/tabular/containers/_experimental_polars_row.py +++ b/src/safeds/data/tabular/containers/_experimental_polars_row.py @@ -1,7 +1,110 @@ from __future__ import annotations -from abc import ABC +from abc import ABC, abstractmethod +from collections.abc import Iterator, Mapping +from typing import TYPE_CHECKING, Any +if TYPE_CHECKING: + from safeds.data.tabular.typing import ColumnType, Schema -class ExperimentalPolarsRow(ABC): # noqa: B024 - pass + from ._experimental_polars_cell import ExperimentalPolarsCell + + +class ExperimentalPolarsRow(ABC, Mapping[str, Any]): + """A row is a one-dimensional collection of named, heterogeneous values.""" + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __contains__(self, name: Any) -> bool: + return self.has_column(name) + + @abstractmethod + def __eq__(self, other: object) -> bool: ... + + def __getitem__(self, name: str) -> ExperimentalPolarsCell: + return self.get_value(name) + + @abstractmethod + def __hash__(self) -> int: ... + + def __iter__(self) -> Iterator[Any]: + return iter(self.column_names) + + def __len__(self) -> int: + return self.number_of_columns + + @abstractmethod + def __sizeof__(self) -> int: ... + + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + @abstractmethod + def column_names(self) -> list[str]: + """The names of the columns in the row.""" + + @property + @abstractmethod + def number_of_columns(self) -> int: + """The number of columns in the row.""" + + @property + @abstractmethod + def schema(self) -> Schema: # TODO: rethink return type + """The schema of the row.""" + + # ------------------------------------------------------------------------------------------------------------------ + # Column operations + # ------------------------------------------------------------------------------------------------------------------ + + @abstractmethod + def get_value(self, name: str) -> ExperimentalPolarsCell: + """ + Get the value of the specified column. + + Parameters + ---------- + name: + The name of the column. + + Returns + ------- + value: + The value of the column. + """ + + @abstractmethod + def get_column_type(self, name: str) -> ColumnType: # TODO: rethink return type + """ + Get the type of the specified column. + + Parameters + ---------- + name: + The name of the column. + + Returns + ------- + type: + The type of the column. + """ + + @abstractmethod + def has_column(self, name: str) -> bool: + """ + Check if the row has a column with the specified name. + + Parameters + ---------- + name: + The name of the column. + + Returns + ------- + has_column: + Whether the row has a column with the specified name. + """ diff --git a/src/safeds/data/tabular/containers/_experimental_polars_table.py b/src/safeds/data/tabular/containers/_experimental_polars_table.py index cfff17824..1375b3b95 100644 --- a/src/safeds/data/tabular/containers/_experimental_polars_table.py +++ b/src/safeds/data/tabular/containers/_experimental_polars_table.py @@ -3,9 +3,10 @@ from typing import TYPE_CHECKING, Any, Literal from safeds._utils import _check_and_normalize_file_path -from safeds.data.tabular.containers import Table from safeds.exceptions import ColumnLengthMismatchError +from ._table import Table + if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence from pathlib import Path @@ -28,12 +29,13 @@ class ExperimentalPolarsTable: To create a `Table` call the constructor or use one of the following static methods: - | Method | Description | - | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------- | - | [from_csv_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_csv_file] | Create a table from a CSV file. | - | [from_json_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_json_file] | Create a table from a JSON file. | - | [from_dict][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_dict] | Create a table from a dictionary. | - | [from_columns][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_columns] | Create a table from a list of columns. | + | Method | Description | + | ------------------------------------------------------------------------------------------------------------------------ | -------------------------------------- | + | [from_csv_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_csv_file] | Create a table from a CSV file. | + | [from_json_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_json_file] | Create a table from a JSON file. | + | [from_parquet_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_parquet_file] | Create a table from a Parquet file. | + | [from_columns][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_columns] | Create a table from a list of columns. | + | [from_dict][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_dict] | Create a table from a dictionary. | Parameters ---------- @@ -212,7 +214,7 @@ def __str__(self) -> str: @property def column_names(self) -> list[str]: """ - Names of the columns in the table. + The names of the columns in the table. Examples -------- @@ -354,6 +356,11 @@ def remove_duplicate_rows(self) -> ExperimentalPolarsTable: """ Remove duplicate rows from the table. + Returns + ------- + filtered_table: + The table without duplicate rows. + Examples -------- >>> from safeds.data.tabular.containers import ExperimentalPolarsTable diff --git a/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py b/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py new file mode 100644 index 000000000..0a9708d25 --- /dev/null +++ b/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py @@ -0,0 +1,11 @@ +from ._experimental_polars_cell import ExperimentalPolarsCell +from ._experimental_polars_column import ExperimentalPolarsColumn + + +class _VectorizedCell(ExperimentalPolarsCell): + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, column: ExperimentalPolarsColumn): + self._column: ExperimentalPolarsColumn = column diff --git a/src/safeds/data/tabular/containers/_experimental_vectorized_row.py b/src/safeds/data/tabular/containers/_experimental_vectorized_row.py new file mode 100644 index 000000000..001f7fa71 --- /dev/null +++ b/src/safeds/data/tabular/containers/_experimental_vectorized_row.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ._experimental_polars_row import ExperimentalPolarsRow +from ._experimental_vectorized_cell import _VectorizedCell + +if TYPE_CHECKING: + from safeds.data.tabular.typing import ColumnType, Schema + + from ._experimental_polars_table import ExperimentalPolarsTable + + +class _VectorizedRow(ExperimentalPolarsRow): + """ + A row is a one-dimensional collection of named, heterogeneous values. + + This implementation treats an entire table as a row, where each column is a "cell" in the row. This greatly speeds + up operations on the row. + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, table: ExperimentalPolarsTable): + self._table: ExperimentalPolarsTable = table + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _VectorizedRow): + return NotImplemented + if self is other: + return True + return self._table == other._table + + def __hash__(self) -> int: + return self._table.__hash__() + + def __sizeof__(self) -> int: + return self._table.__sizeof__() + + # ------------------------------------------------------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------------------------------------------------------ + + @property + def column_names(self) -> list[str]: + """The names of the columns in the row.""" + return self._table.column_names + + @property + def number_of_columns(self) -> int: + """The number of columns in the row.""" + return self._table.number_of_columns + + @property + def schema(self) -> Schema: # TODO: rethink return type + """The schema of the row.""" + return self._table.schema + + # ------------------------------------------------------------------------------------------------------------------ + # Column operations + # ------------------------------------------------------------------------------------------------------------------ + + def get_value(self, name: str) -> _VectorizedCell: + """ + Get the value of the specified column. + + Parameters + ---------- + name: + The name of the column. + + Returns + ------- + value: + The value of the column. + """ + return _VectorizedCell(self._table.get_column(name)) + + def get_column_type(self, name: str) -> ColumnType: # TODO: rethink return type + """ + Get the type of the specified column. + + Parameters + ---------- + name: + The name of the column. + + Returns + ------- + type: + The type of the column. + """ + return self._table.get_column_type(name) + + def has_column(self, name: str) -> bool: + """ + Check if the row has a column with the specified name. + + Parameters + ---------- + name: + The name of the column. + + Returns + ------- + has_column: + Whether the row has a column with the specified name. + """ + return self._table.has_column(name)