Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Table.remove_rows #720

Merged
merged 1 commit into from
May 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 46 additions & 3 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,19 +1126,24 @@ def add_rows(self, rows: list[Row] | Table) -> Table:

def filter_rows(self, query: Callable[[Row], bool]) -> Table:
"""
Return a new table with rows filtered by Callable (e.g. lambda function).
Return a new table containing only the rows that satisfy the query.

The original table is not modified.

Parameters
----------
query:
A Callable that is applied to all rows.
A callable that returns True if a row should be included in the new table.

Returns
-------
table:
A table containing only the rows filtered by the query.
A table containing only the rows that satisfy the query.

See Also
--------
remove_rows:
Remove rows that satifsfy a query.

Examples
--------
Expand Down Expand Up @@ -1337,6 +1342,44 @@ def remove_columns_with_non_numerical_values(self) -> Table:
"""
return Table.from_columns([column for column in self.to_columns() if column.type.is_numeric()])

def remove_rows(self, query: Callable[[Row], bool]) -> Table:
"""
Return a new table without the rows that satisfy the query.

The original table is not modified.

Parameters
----------
query:
A callable that returns True if the row should be removed.

Returns
-------
table:
A table without the rows that satisfy the query.

See Also
--------
filter_rows:
Create a table containing only the rows that satisfy a query.

Examples
--------
>>> from safeds.data.tabular.containers import Table
>>> table = Table.from_dict({"a": [1, 3], "b": [2, 4]})
>>> table.remove_rows(lambda x: x["a"] < 2)
a b
0 3 4
"""
import pandas as pd

rows: list[Row] = [row for row in self.to_rows() if not query(row)]
if len(rows) == 0:
result_table = Table._from_pandas_dataframe(pd.DataFrame(), self._schema)
else:
result_table = self.from_rows(rows)
return result_table

def remove_duplicate_rows(self) -> Table:
"""
Return a new table with every duplicate row removed.
Expand Down
21 changes: 9 additions & 12 deletions tests/safeds/data/tabular/containers/_table/test_filter_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
("table1", "filter_column", "filter_value", "table2"),
[
(
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Table(),
"col1",
1,
Table({"col1": [1, 1], "col2": [1, 4]}),
Table._from_pandas_dataframe(pd.DataFrame(), Schema({})),
),
(
Table({"col1": [3, 2, 4], "col2": [1, 2, 4]}),
Expand All @@ -20,22 +20,19 @@
Table._from_pandas_dataframe(pd.DataFrame(), Schema({"col1": Integer(), "col2": Integer()})),
),
(
Table(),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
"col1",
1,
Table._from_pandas_dataframe(pd.DataFrame(), Schema({})),
Table({"col1": [1, 1], "col2": [1, 4]}),
),
],
ids=["filter for col1 = 1", "no finding", "empty table"],
ids=[
"empty table",
"no matches",
"matches",
],
)
def test_should_filter_rows(table1: Table, filter_column: str, filter_value: ColumnType, table2: Table) -> None:
table1 = table1.filter_rows(lambda row: row.get_value(filter_column) == filter_value)
assert table1.schema == table2.schema
assert table2 == table1


# noinspection PyTypeChecker
def test_should_raise_error_if_column_type_invalid() -> None:
table = Table({"col1": [1, 2, 3], "col2": [1, 1, 4]})
with pytest.raises(TypeError, match=r"'Series' object is not callable"):
table.filter_rows(table.get_column("col1")._data > table.get_column("col2")._data)
38 changes: 38 additions & 0 deletions tests/safeds/data/tabular/containers/_table/test_remove_rows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pandas as pd
import pytest
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import ColumnType, Schema


@pytest.mark.parametrize(
("table1", "remove_column", "remove_value", "table2"),
[
(
Table(),
"col1",
1,
Table._from_pandas_dataframe(pd.DataFrame(), Schema({})),
),
(
Table({"col1": [3, 2, 4], "col2": [1, 2, 4]}),
"col1",
1,
Table({"col1": [3, 2, 4], "col2": [1, 2, 4]}),
),
(
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
"col1",
1,
Table({"col1": [2], "col2": [2]}),
),
],
ids=[
"empty table",
"no match",
"matches",
],
)
def test_should_remove_rows(table1: Table, remove_column: str, remove_value: ColumnType, table2: Table) -> None:
table1 = table1.remove_rows(lambda row: row.get_value(remove_column) == remove_value)
assert table1.schema == table2.schema
assert table2 == table1