Skip to content

Commit

Permalink
fix(chart-data-api): support numeric temporal columns (#13138)
Browse files Browse the repository at this point in the history
  • Loading branch information
villebro authored Feb 16, 2021
1 parent c9f76d5 commit d8c32b8
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 39 deletions.
18 changes: 6 additions & 12 deletions superset/common/query_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import copy
import logging
import math
from datetime import timedelta
from typing import Any, cast, ClassVar, Dict, List, Optional, Union

import numpy as np
Expand Down Expand Up @@ -112,17 +111,12 @@ def get_query_result(self, query_object: QueryObject) -> Dict[str, Any]:
# If the datetime format is unix, the parse will use the corresponding
# parsing logic
if not df.empty:
if DTTM_ALIAS in df.columns:
if timestamp_format in ("epoch_s", "epoch_ms"):
# Column has already been formatted as a timestamp.
df[DTTM_ALIAS] = df[DTTM_ALIAS].apply(pd.Timestamp)
else:
df[DTTM_ALIAS] = pd.to_datetime(
df[DTTM_ALIAS], utc=False, format=timestamp_format
)
if self.datasource.offset:
df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset)
df[DTTM_ALIAS] += query_object.time_shift
df = utils.normalize_dttm_col(
df=df,
timestamp_format=timestamp_format,
offset=self.datasource.offset,
time_shift=query_object.time_shift,
)

if self.enforce_numerical_metrics:
self.df_metrics_to_num(df, query_object)
Expand Down
32 changes: 32 additions & 0 deletions superset/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
from flask_babel import gettext as __
from flask_babel.speaklater import LazyString
from pandas.api.types import infer_dtype
from pandas.core.dtypes.common import is_numeric_dtype
from sqlalchemy import event, exc, select, Text
from sqlalchemy.dialects.mysql import MEDIUMTEXT
from sqlalchemy.engine import Connection, Engine
Expand Down Expand Up @@ -1579,3 +1580,34 @@ def format_list(items: Sequence[str], sep: str = ", ", quote: str = '"') -> str:
def find_duplicates(items: Iterable[InputType]) -> List[InputType]:
"""Find duplicate items in an iterable."""
return [item for item, count in collections.Counter(items).items() if count > 1]


def normalize_dttm_col(
df: pd.DataFrame,
timestamp_format: Optional[str],
offset: int,
time_shift: Optional[timedelta],
) -> pd.DataFrame:
if DTTM_ALIAS not in df.columns:
return df
df = df.copy()
if timestamp_format in ("epoch_s", "epoch_ms"):
dttm_col = df[DTTM_ALIAS]
if is_numeric_dtype(dttm_col):
# Column is formatted as a numeric value
unit = timestamp_format.replace("epoch_", "")
df[DTTM_ALIAS] = pd.to_datetime(
dttm_col, utc=False, unit=unit, origin="unix"
)
else:
# Column has already been formatted as a timestamp.
df[DTTM_ALIAS] = dttm_col.apply(pd.Timestamp)
else:
df[DTTM_ALIAS] = pd.to_datetime(
df[DTTM_ALIAS], utc=False, format=timestamp_format
)
if offset:
df[DTTM_ALIAS] += timedelta(hours=offset)
if time_shift is not None:
df[DTTM_ALIAS] += time_shift
return df
33 changes: 6 additions & 27 deletions superset/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,33 +284,12 @@ def get_df(self, query_obj: Optional[QueryObjectDict] = None) -> pd.DataFrame:
# If the datetime format is unix, the parse will use the corresponding
# parsing logic.
if not df.empty:
if DTTM_ALIAS in df.columns:
if timestamp_format in ("epoch_s", "epoch_ms"):
# Column has already been formatted as a timestamp.
dttm_col = df[DTTM_ALIAS]
one_ts_val = dttm_col[0]

# convert time column to pandas Timestamp, but different
# ways to convert depending on string or int types
try:
int(one_ts_val)
is_integral = True
except (ValueError, TypeError):
is_integral = False
if is_integral:
unit = "s" if timestamp_format == "epoch_s" else "ms"
df[DTTM_ALIAS] = pd.to_datetime(
dttm_col, utc=False, unit=unit, origin="unix"
)
else:
df[DTTM_ALIAS] = dttm_col.apply(pd.Timestamp)
else:
df[DTTM_ALIAS] = pd.to_datetime(
df[DTTM_ALIAS], utc=False, format=timestamp_format
)
if self.datasource.offset:
df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset)
df[DTTM_ALIAS] += self.time_shift
df = utils.normalize_dttm_col(
df=df,
timestamp_format=timestamp_format,
offset=self.datasource.offset,
time_shift=self.time_shift,
)

if self.enforce_numerical_metrics:
self.df_metrics_to_num(df)
Expand Down
29 changes: 29 additions & 0 deletions tests/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
cast_to_num,
convert_legacy_filters_into_adhoc,
create_ssl_cert_file,
DTTM_ALIAS,
format_timedelta,
GenericDataType,
get_form_data_token,
Expand All @@ -59,6 +60,7 @@
merge_extra_filters,
merge_extra_form_data,
merge_request_params,
normalize_dttm_col,
parse_ssl_cert,
parse_js_uri_path_item,
extract_dataframe_dtypes,
Expand Down Expand Up @@ -1131,3 +1133,30 @@ def test_extract_dataframe_dtypes(self):

df = pd.DataFrame(data={col[0]: col[2] for col in cols})
assert extract_dataframe_dtypes(df) == [col[1] for col in cols]

def test_normalize_dttm_col(self):
ts = pd.Timestamp(2021, 2, 15, 19, 0, 0, 0)
df = pd.DataFrame([{"__timestamp": ts, "a": 1}])

# test regular (non-numeric) format
assert normalize_dttm_col(df, None, 0, None)[DTTM_ALIAS][0] == ts
assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts

# test offset
assert normalize_dttm_col(df, None, 1, None)[DTTM_ALIAS][0] == pd.Timestamp(
2021, 2, 15, 20, 0, 0, 0
)

# test offset and timedelta
assert normalize_dttm_col(df, None, 1, timedelta(minutes=30))[DTTM_ALIAS][
0
] == pd.Timestamp(2021, 2, 15, 20, 30, 0, 0)

# test numeric epoch_s format
df = pd.DataFrame([{"__timestamp": ts.timestamp(), "a": 1}])
assert normalize_dttm_col(df, "epoch_s", 0, None)[DTTM_ALIAS][0] == ts

# test numeric epoch_ms format
df = pd.DataFrame([{"__timestamp": ts.timestamp() * 1000, "a": 1}])
assert normalize_dttm_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts

0 comments on commit d8c32b8

Please sign in to comment.