-
Notifications
You must be signed in to change notification settings - Fork 322
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support the ability to pass in
None
for both get_column_plot
and …
…`get_column_pair_plot` (#2344)
- Loading branch information
Showing
5 changed files
with
203 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import pandas as pd | ||
|
||
|
||
def _prepare_data_visualization(data, metadata, column_names, sample_size): | ||
"""Prepare the data for a plot. | ||
Args: | ||
data (pd.DataFrame or None): | ||
The data to be prepared. | ||
metadata (Metadata): | ||
The metadata of the data. | ||
column_names (str or list[str]): | ||
The column names to plot. | ||
sample_size (int or None): | ||
The number of samples to plot. If ``None``, use the whole dataset. | ||
Returns: | ||
pd.DataFrame or None: | ||
The prepared data. | ||
""" | ||
if data is None: | ||
return None | ||
|
||
col_names = column_names if isinstance(column_names, list) else [column_names] | ||
data = data.copy() | ||
for column_name in col_names: | ||
sdtype = metadata.columns[column_name]['sdtype'] | ||
if sdtype == 'datetime': | ||
datetime_format = metadata.columns[column_name].get('datetime_format') | ||
data[column_name] = pd.to_datetime(data[column_name], format=datetime_format) | ||
|
||
if sample_size and sample_size < len(data): | ||
data = data.sample(n=sample_size) | ||
|
||
return data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import numpy as np | ||
import pandas as pd | ||
|
||
from sdv.evaluation._utils import _prepare_data_visualization | ||
from sdv.metadata import SingleTableMetadata | ||
|
||
|
||
def test__prepare_data_visualization(): | ||
"""Test ``_prepare_data_visualization``.""" | ||
# Setup | ||
np.random.seed(0) | ||
metadata = SingleTableMetadata.load_from_dict({ | ||
'columns': { | ||
'col1': {'sdtype': 'datetime', 'datetime_format': '%Y-%m-%d'}, | ||
'col2': {'sdtype': 'numerical'}, | ||
} | ||
}) | ||
column_names = ['col1', 'col2'] | ||
sample_size = 2 | ||
data = pd.DataFrame({ | ||
'col1': ['2021-01-01', '2021-02-01', '2021-03-01'], | ||
'col2': [4, 5, 6], | ||
}) | ||
|
||
# Run | ||
result = _prepare_data_visualization(data, metadata, column_names, sample_size) | ||
|
||
# Assert | ||
expected_result = pd.DataFrame( | ||
{ | ||
'col1': pd.to_datetime(['2021-03-01', '2021-02-01']), | ||
'col2': [6, 5], | ||
}, | ||
index=[2, 1], | ||
) | ||
pd.testing.assert_frame_equal(result, expected_result) |
Oops, something went wrong.