diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index e4dcd87c9..7ee67c8e0 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -70,7 +70,7 @@ jobs: - name: Cache pip dependencies id: cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }} diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 4a10744fe..51bd336ff 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -41,7 +41,7 @@ jobs: with: python-version: "3.10" - - uses: actions/cache@v3 + - uses: actions/cache@v4 name: Cache pip dependencies with: path: ~/.cache/pip @@ -95,7 +95,7 @@ jobs: - name: Cache pip dependencies id: cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }} diff --git a/.github/workflows/release-deprecated.yml b/.github/workflows/release-deprecated.yml index 70083b9bc..f6fe44e03 100644 --- a/.github/workflows/release-deprecated.yml +++ b/.github/workflows/release-deprecated.yml @@ -23,7 +23,7 @@ jobs: with: python-version: "3.8" - - uses: actions/cache@v3 + - uses: actions/cache@v4 name: Cache pip dependencies with: path: ~/.cache/pip diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 889d8b6bd..a19736c1e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -24,7 +24,7 @@ jobs: with: python-version: "3.10" - - uses: actions/cache@v3 + - uses: actions/cache@v4 name: Cache pip dependencies with: path: ~/.cache/pip diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 590552b9e..5655be5a0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -58,21 +58,21 @@ jobs: python-version: ${{ matrix.python-version }} architecture: x64 - - uses: actions/cache@v3 + - uses: actions/cache@v4 if: startsWith(runner.os, 'Linux') with: path: ~/.cache/pip key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v3 + - uses: actions/cache@v4 if: startsWith(runner.os, 'macOS') with: path: ~/Library/Caches/pip key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v3 + - uses: actions/cache@v4 if: startsWith(runner.os, 'Windows') with: path: ~\AppData\Local\pip\Cache @@ -106,21 +106,21 @@ jobs: python-version: ${{ matrix.python-version }} architecture: x64 - - uses: actions/cache@v3 + - uses: actions/cache@v4 if: startsWith(runner.os, 'Linux') with: path: ~/.cache/pip key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v3 + - uses: actions/cache@v4 if: startsWith(runner.os, 'macOS') with: path: ~/Library/Caches/pip key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v3 + - uses: actions/cache@v4 if: startsWith(runner.os, 'Windows') with: path: ~\AppData\Local\pip\Cache @@ -135,7 +135,7 @@ jobs: - run: make test_cov - - uses: actions/cache@v3 + - uses: actions/cache@v4 if: startsWith(runner.os, 'Windows') with: path: ~\AppData\Local\pip\Cache @@ -189,21 +189,21 @@ jobs: with: python-version: ${{ matrix.python-version }} architecture: x64 - - uses: actions/cache@v3 + - uses: actions/cache@v4 if: startsWith(runner.os, 'Linux') with: path: ~/.cache/pip key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-${{ matrix.pandas }}-pip-\ - - uses: actions/cache@v3 + - uses: actions/cache@v4 if: startsWith(runner.os, 'macOS') with: path: ~/Library/Caches/pip key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-${{ matrix.pandas }}-pip- - - uses: actions/cache@v3 + - uses: actions/cache@v4 if: startsWith(runner.os, 'Windows') with: path: ~\AppData\Local\pip\Cache diff --git a/requirements.txt b/requirements.txt index 4892182b8..8310fc49f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,4 +24,4 @@ typeguard>=4.1.2, <5 imagehash==4.3.1 wordcloud>=1.9.1 dacite>=1.8 -numba>=0.56.0,<0.59.0 +numba>=0.59.0,<0.60.0 diff --git a/src/ydata_profiling/expectations_report.py b/src/ydata_profiling/expectations_report.py index 7979e510b..d14f0ec4a 100644 --- a/src/ydata_profiling/expectations_report.py +++ b/src/ydata_profiling/expectations_report.py @@ -79,9 +79,7 @@ def to_expectation_suite( if not data_context: data_context = ge.data_context.DataContext() - suite = data_context.create_expectation_suite( - suite_name, overwrite_existing=True - ) + suite = data_context.add_expectation_suite(suite_name, overwrite_existing=True) # Instantiate an in-memory pandas dataset batch = ge.dataset.PandasDataset(self.df, expectation_suite=suite) diff --git a/src/ydata_profiling/model/pandas/describe_timeseries_pandas.py b/src/ydata_profiling/model/pandas/describe_timeseries_pandas.py index 8bd6e2a58..5ffe99a9f 100644 --- a/src/ydata_profiling/model/pandas/describe_timeseries_pandas.py +++ b/src/ydata_profiling/model/pandas/describe_timeseries_pandas.py @@ -177,14 +177,16 @@ def compute_gap_stats(series: pd.Series) -> pd.Series: is_datetime = isinstance(series.index, pd.DatetimeIndex) gap_stats, gaps = identify_gaps(gap, is_datetime) + has_gaps = len(gap_stats) > 0 stats = { - "min": gap_stats.min(), - "max": gap_stats.max(), - "mean": gap_stats.mean(), + "min": gap_stats.min() if has_gaps else 0, + "max": gap_stats.max() if has_gaps else 0, + "mean": gap_stats.mean() if has_gaps else 0, "std": gap_stats.std() if len(gap_stats) > 1 else 0, "series": series, "gaps": gaps, + "n_gaps": len(gaps), } return stats diff --git a/src/ydata_profiling/profile_report.py b/src/ydata_profiling/profile_report.py index 79b94aa00..4825a4567 100644 --- a/src/ydata_profiling/profile_report.py +++ b/src/ydata_profiling/profile_report.py @@ -450,7 +450,9 @@ def encode_it(o: Any) -> Any: return [encode_it(v) for v in o] elif isinstance(o, set): return {encode_it(v) for v in o} - elif isinstance(o, (pd.DataFrame, pd.Series)): + elif isinstance(o, pd.Series): + return encode_it(o.to_list()) + elif isinstance(o, pd.DataFrame): return encode_it(o.to_dict(orient="records")) elif isinstance(o, np.ndarray): return encode_it(o.tolist()) diff --git a/src/ydata_profiling/report/structure/overview.py b/src/ydata_profiling/report/structure/overview.py index 47ba8c4bc..38b129934 100644 --- a/src/ydata_profiling/report/structure/overview.py +++ b/src/ydata_profiling/report/structure/overview.py @@ -273,7 +273,8 @@ def get_dataset_alerts(config: Settings, alerts: list) -> Alerts: def get_timeseries_items(config: Settings, summary: BaseDescription) -> Container: - def format_tsindex_limit(limit: Any) -> str: + @list_args + def fmt_tsindex_limit(limit: Any) -> str: if isinstance(limit, datetime): return limit.strftime("%Y-%m-%d %H:%M:%S") else: @@ -291,11 +292,11 @@ def format_tsindex_limit(limit: Any) -> str: }, { "name": "Starting point", - "value": format_tsindex_limit(summary.time_index_analysis.start), + "value": fmt_tsindex_limit(summary.time_index_analysis.start), }, { "name": "Ending point", - "value": format_tsindex_limit(summary.time_index_analysis.end), + "value": fmt_tsindex_limit(summary.time_index_analysis.end), }, { "name": "Period", diff --git a/src/ydata_profiling/report/structure/variables/render_timeseries.py b/src/ydata_profiling/report/structure/variables/render_timeseries.py index ef9b42c52..6f3bc27cd 100644 --- a/src/ydata_profiling/report/structure/variables/render_timeseries.py +++ b/src/ydata_profiling/report/structure/variables/render_timeseries.py @@ -28,7 +28,7 @@ def _render_gap_tab(config: Settings, summary: dict) -> Container: { "name": "number of gaps", "value": fmt_numeric( - len(summary["gap_stats"]["gaps"]), precision=config.report.precision + summary["gap_stats"]["n_gaps"], precision=config.report.precision ), }, { diff --git a/tests/issues/test_issue1529.py b/tests/issues/test_issue1529.py new file mode 100644 index 000000000..e43199430 --- /dev/null +++ b/tests/issues/test_issue1529.py @@ -0,0 +1,34 @@ +""" +Test for issue 1529: +https://github.com/ydataai/ydata-profiling/issues/1529 +""" +import json + +import pandas as pd + +from ydata_profiling import ProfileReport + + +def test_issue1529(): + previous_dataset = pd.DataFrame( + data=[(1000, 42), (900, 30), (1500, 40), (1800, 38)], + columns=["rent_per_month", "total_area"], + ) + current_dataset = pd.DataFrame( + data=[(5000, 350), (9000, 600), (5000, 400), (3500, 500), (6000, 600)], + columns=["rent_per_month", "total_area"], + ) + previous_dataset_report = ProfileReport( + previous_dataset, title="Previous dataset report" + ) + current_dataset_report = ProfileReport( + current_dataset, title="Current dataset report" + ) + comparison_report = previous_dataset_report.compare(current_dataset_report) + json_str = comparison_report.to_json() + compare_dict = json.loads(json_str) + assert compare_dict is not None and len(compare_dict) > 0 + assert ( + compare_dict["analysis"]["title"] + == "Comparing Previous dataset report and Current dataset report" + )