diff --git a/README.md b/README.md index f2bf68b51..914cf6521 100644 --- a/README.md +++ b/README.md @@ -220,7 +220,7 @@ pip install -e . The profiling report is written in HTML and CSS, which means a modern browser is required. -You need [Python 3](https://python3statement.org/) to run the package. Other dependencies can be found in the requirements files: +You need [Python 3](https://python3statement.github.io/) to run the package. Other dependencies can be found in the requirements files: | Filename | Requirements| |----------|-------------| diff --git a/src/ydata_profiling/model/spark/summary_spark.py b/src/ydata_profiling/model/spark/summary_spark.py index f9ce848f3..13a85f4c3 100644 --- a/src/ydata_profiling/model/spark/summary_spark.py +++ b/src/ydata_profiling/model/spark/summary_spark.py @@ -87,11 +87,17 @@ def multiprocess_1d(args: tuple) -> Tuple[str, dict]: column, df = args return column, describe_1d(config, df.select(column), summarizer, typeset) + # Rename the df column names to prevent potential conflicts + for col in df.columns: + df = df.withColumnRenamed(col, f"{col}_customer") + args = [(name, df) for name in df.columns] with multiprocessing.pool.ThreadPool(12) as executor: for i, (column, description) in enumerate( executor.imap_unordered(multiprocess_1d, args) ): + if column.endswith("_customer"): + column = column[:-9] pbar.set_postfix_str(f"Describe variable:{column}") # summary clean up for spark @@ -99,7 +105,7 @@ def multiprocess_1d(args: tuple) -> Tuple[str, dict]: series_description[column] = description pbar.update() - series_description = {k: series_description[k] for k in df.columns} + series_description = {k[:-9]: series_description[k[:-9]] for k in df.columns} # Mapping from column name to variable type series_description = sort_column_names(series_description, config.sort) diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_constant.html b/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_constant.html index cbe578dc7..2689418bd 100644 --- a/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_constant.html +++ b/src/ydata_profiling/report/presentation/flavours/html/templates/alerts/alert_constant.html @@ -1 +1 @@ -{{ alert.column_name }} has constant value "{{ alert.values['mode'] }}" +{{ alert.column_name }} has constant value "{{ alert.values['value_counts_without_nan'].index[0] }}"