Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: polars implementation of a row #733

Merged
merged 4 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 106 additions & 3 deletions src/safeds/data/tabular/containers/_experimental_polars_row.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,110 @@
from __future__ import annotations

from abc import ABC
from abc import ABC, abstractmethod
from collections.abc import Iterator, Mapping
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
from safeds.data.tabular.typing import ColumnType, Schema

class ExperimentalPolarsRow(ABC): # noqa: B024
pass
from ._experimental_polars_cell import ExperimentalPolarsCell


class ExperimentalPolarsRow(ABC, Mapping[str, Any]):
"""A row is a one-dimensional collection of named, heterogeneous values."""

# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __contains__(self, name: Any) -> bool:
return self.has_column(name)

@abstractmethod
def __eq__(self, other: object) -> bool: ...

def __getitem__(self, name: str) -> ExperimentalPolarsCell:
return self.get_value(name)

@abstractmethod
def __hash__(self) -> int: ...

def __iter__(self) -> Iterator[Any]:
return iter(self.column_names)

def __len__(self) -> int:
return self.number_of_columns

@abstractmethod
def __sizeof__(self) -> int: ...

# ------------------------------------------------------------------------------------------------------------------
# Properties
# ------------------------------------------------------------------------------------------------------------------

@property
@abstractmethod
def column_names(self) -> list[str]:
"""The names of the columns in the row."""

@property
@abstractmethod
def number_of_columns(self) -> int:
"""The number of columns in the row."""

@property
@abstractmethod
def schema(self) -> Schema: # TODO: rethink return type
"""The schema of the row."""

# ------------------------------------------------------------------------------------------------------------------
# Column operations
# ------------------------------------------------------------------------------------------------------------------

@abstractmethod
def get_value(self, name: str) -> ExperimentalPolarsCell:
"""
Get the value of the specified column.

Parameters
----------
name:
The name of the column.

Returns
-------
value:
The value of the column.
"""

@abstractmethod
def get_column_type(self, name: str) -> ColumnType: # TODO: rethink return type
"""
Get the type of the specified column.

Parameters
----------
name:
The name of the column.

Returns
-------
type:
The type of the column.
"""

@abstractmethod
def has_column(self, name: str) -> bool:
"""
Check if the row has a column with the specified name.

Parameters
----------
name:
The name of the column.

Returns
-------
has_column:
Whether the row has a column with the specified name.
"""
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
from typing import TYPE_CHECKING, Any, Literal

from safeds._utils import _check_and_normalize_file_path
from safeds.data.tabular.containers import Table
from safeds.exceptions import ColumnLengthMismatchError

from ._table import Table

if TYPE_CHECKING:
from collections.abc import Callable, Mapping, Sequence
from pathlib import Path
Expand All @@ -28,12 +29,13 @@ class ExperimentalPolarsTable:

To create a `Table` call the constructor or use one of the following static methods:

| Method | Description |
| ------------------------------------------------------------------------------------------------------------------ | -------------------------------------- |
| [from_csv_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_csv_file] | Create a table from a CSV file. |
| [from_json_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_json_file] | Create a table from a JSON file. |
| [from_dict][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_dict] | Create a table from a dictionary. |
| [from_columns][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_columns] | Create a table from a list of columns. |
| Method | Description |
| ------------------------------------------------------------------------------------------------------------------------ | -------------------------------------- |
| [from_csv_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_csv_file] | Create a table from a CSV file. |
| [from_json_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_json_file] | Create a table from a JSON file. |
| [from_parquet_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_parquet_file] | Create a table from a Parquet file. |
| [from_columns][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_columns] | Create a table from a list of columns. |
| [from_dict][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_dict] | Create a table from a dictionary. |

Parameters
----------
Expand Down Expand Up @@ -212,7 +214,7 @@ def __str__(self) -> str:
@property
def column_names(self) -> list[str]:
"""
Names of the columns in the table.
The names of the columns in the table.

Examples
--------
Expand Down Expand Up @@ -354,6 +356,11 @@ def remove_duplicate_rows(self) -> ExperimentalPolarsTable:
"""
Remove duplicate rows from the table.

Returns
-------
filtered_table:
The table without duplicate rows.

Examples
--------
>>> from safeds.data.tabular.containers import ExperimentalPolarsTable
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from ._experimental_polars_cell import ExperimentalPolarsCell
from ._experimental_polars_column import ExperimentalPolarsColumn


class _VectorizedCell(ExperimentalPolarsCell):
# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self, column: ExperimentalPolarsColumn):
self._column: ExperimentalPolarsColumn = column
111 changes: 111 additions & 0 deletions src/safeds/data/tabular/containers/_experimental_vectorized_row.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from ._experimental_polars_row import ExperimentalPolarsRow
from ._experimental_vectorized_cell import _VectorizedCell

if TYPE_CHECKING:
from safeds.data.tabular.typing import ColumnType, Schema

from ._experimental_polars_table import ExperimentalPolarsTable


class _VectorizedRow(ExperimentalPolarsRow):
"""
A row is a one-dimensional collection of named, heterogeneous values.

This implementation treats an entire table as a row, where each column is a "cell" in the row. This greatly speeds
up operations on the row.
"""

# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self, table: ExperimentalPolarsTable):
self._table: ExperimentalPolarsTable = table

def __eq__(self, other: object) -> bool:
if not isinstance(other, _VectorizedRow):
return NotImplemented
if self is other:
return True
return self._table == other._table

def __hash__(self) -> int:
return self._table.__hash__()

def __sizeof__(self) -> int:
return self._table.__sizeof__()

# ------------------------------------------------------------------------------------------------------------------
# Properties
# ------------------------------------------------------------------------------------------------------------------

@property
def column_names(self) -> list[str]:
"""The names of the columns in the row."""
return self._table.column_names

@property
def number_of_columns(self) -> int:
"""The number of columns in the row."""
return self._table.number_of_columns

@property
def schema(self) -> Schema: # TODO: rethink return type
"""The schema of the row."""
return self._table.schema

# ------------------------------------------------------------------------------------------------------------------
# Column operations
# ------------------------------------------------------------------------------------------------------------------

def get_value(self, name: str) -> _VectorizedCell:
"""
Get the value of the specified column.

Parameters
----------
name:
The name of the column.

Returns
-------
value:
The value of the column.
"""
return _VectorizedCell(self._table.get_column(name))

def get_column_type(self, name: str) -> ColumnType: # TODO: rethink return type
"""
Get the type of the specified column.

Parameters
----------
name:
The name of the column.

Returns
-------
type:
The type of the column.
"""
return self._table.get_column_type(name)

def has_column(self, name: str) -> bool:
"""
Check if the row has a column with the specified name.

Parameters
----------
name:
The name of the column.

Returns
-------
has_column:
Whether the row has a column with the specified name.
"""
return self._table.has_column(name)