1.7.4

- fix for #63, periods in column names - added json_loader CLI options - updated moving/locking of columns to be persisted to back-end as well as front-end - added the ability to show/hide columns - added column builder popup (#61)
man-group · Feb 19, 2020 · 1cd248f · 1cd248f
1 parent 57b1e24
commit 1cd248f
Show file tree

Hide file tree

Showing 40 changed files with 1,302 additions and 306 deletions.
diff --git a/dtale/cli/loaders/__init__.py b/dtale/cli/loaders/__init__.py
@@ -4,7 +4,7 @@
 
 import click
 
-from dtale.cli.loaders import arctic_loader, csv_loader
+from dtale.cli.loaders import arctic_loader, csv_loader, json_loader
 
 logger = getLogger(__name__)
 
@@ -72,7 +72,8 @@ def custom_module_loader():
 
 LOADERS = {
     arctic_loader.LOADER_KEY: arctic_loader,
-    csv_loader.LOADER_KEY: csv_loader
+    csv_loader.LOADER_KEY: csv_loader,
+    json_loader.LOADER_KEY: json_loader,
 }
 
 

diff --git a/dtale/cli/loaders/json_loader.py b/dtale/cli/loaders/json_loader.py
@@ -0,0 +1,34 @@
+import pandas as pd
+
+from dtale.cli.clickutils import get_loader_options
+
+'''
+  IMPORTANT!!! These global variables are required for building any customized CLI loader.
+  When build_loaders runs startup it will search for any modules containing the global variable LOADER_KEY.
+'''
+LOADER_KEY = 'json'
+LOADER_PROPS = [
+    dict(name='path', help='path to JSON file or URL to JSON endpoint'),
+    dict(name='convert_dates', help='comma-separated string of column names which should be parsed as dates')
+]
+
+
+# IMPORTANT!!! This function is required for building any customized CLI loader.
+def find_loader(kwargs):
+    """
+    CSV implementation of data loader which will return a function if any of the
+    `click` options based on LOADER_KEY & LOADER_PROPS have been used, otherwise return None
+
+    :param kwargs: Optional keyword arguments to be passed from `click`
+    :return: data loader function for CSV implementation
+    """
+    json_opts = get_loader_options(LOADER_KEY, kwargs)
+    if len([f for f in json_opts.values() if f]):
+        def _json_loader():
+            json_arg_parsers = {  # TODO: add additional arg parsers
+                'parse_dates': lambda v: v.split(',') if v else None
+            }
+            kwargs = {k: json_arg_parsers.get(k, lambda v: v)(v) for k, v in json_opts.items() if k != 'path'}
+            return pd.read_json(json_opts['path'], **kwargs)
+        return _json_loader
+    return None
diff --git a/dtale/dash_application/charts.py b/dtale/dash_application/charts.py
@@ -20,7 +20,7 @@
                                            update_label_for_freq)
 from dtale.utils import (classify_type, dict_merge, divide_chunks,
                          flatten_lists, get_dtypes, make_list,
-                         make_timeout_request)
+                         make_timeout_request, run_query)
 from dtale.views import DATA
 from dtale.views import build_chart as build_chart_data
 
@@ -720,7 +720,7 @@ def build_figure_data(data_id, chart_type=None, query=None, x=None, y=None, z=No
                                   rolling_comp=rolling_comp)):
             return None
 
-        data = DATA[data_id] if (query or '') == '' else DATA[data_id].query(query)
+        data = run_query(DATA[data_id], query)
         chart_kwargs = dict(group_col=group, agg=agg, allow_duplicates=chart_type == 'scatter', rolling_win=window,
                             rolling_comp=rolling_comp)
         if chart_type in ZAXIS_CHARTS:

diff --git a/dtale/dash_application/views.py b/dtale/dash_application/views.py
@@ -16,8 +16,8 @@
                                            show_chart_per_group,
                                            show_input_handler,
                                            show_yaxis_ranges)
-from dtale.utils import dict_merge, make_list
-from dtale.views import DATA, _test_filter
+from dtale.utils import dict_merge, make_list, run_query
+from dtale.views import DATA
 
 logger = getLogger(__name__)
 
@@ -139,8 +139,7 @@ def query_input(query, pathname, curr_query):
         :rtype: tuple of (str, str, str)
         """
         try:
-            if query is not None and query != '':
-                _test_filter(DATA[get_data_id(pathname)], query)
+            run_query(DATA[get_data_id(pathname)], query)
             return query, {'line-height': 'inherit'}, ''
         except BaseException as ex:
             return curr_query, {'line-height': 'inherit', 'background-color': 'pink'}, str(ex)

diff --git a/dtale/utils.py b/dtale/utils.py
@@ -538,21 +538,25 @@ def filter_df_for_grid(df, params):
                 df = df[stringified_col.astype(str) == filter_val[1:]]
             else:
                 df = df[stringified_col.astype(str).str.lower().str.contains(filter_val.lower(), na=False)]
-    if params.get('query'):
-        df = df.query(params['query'])
+    df = run_query(df, params.get('query'))
     return df
 
 
+def find_dtype(s):
+    if s.dtype.name == 'object':
+        return pd.api.types.infer_dtype(s, skipna=True)
+    else:
+        return s.dtype.name
+
+
 def get_dtypes(df):
     """
     Build dictionary of column/dtype name pairs from :class:`pandas:pandas.DataFrame`
     """
     def _load():
-        for col, dtype in df.dtypes.to_dict().items():
-            if dtype.name == 'object':
-                yield col, pd.api.types.infer_dtype(df[col], skipna=True)
-            else:
-                yield col, dtype.name
+        for col in df.columns:
+            yield col, find_dtype(df[col])
+
     return dict(list(_load()))
 
 
@@ -749,3 +753,27 @@ def make_timeout_request(target, args=None, kwargs=None, timeout=60):
             'Request took longer than {} seconds. Please try adding additional filtering...'.format(timeout)
         )
     return results
+
+
+def run_query(df, query):
+    if (query or '') == '':
+        return df
+    # https://stackoverflow.com/a/40083013/12616360
+    invalid_column_names = [x for x in df.columns.values if not x.isidentifier()]
+
+    # Make replacements in the query and keep track
+    # NOTE: This method fails if the frame has columns called REPL_0 etc.
+    replacements = dict()
+    final_query = str(query)
+    for cn in invalid_column_names:
+        r = 'REPL_{}'.format(str(invalid_column_names.index(cn)))
+        final_query = final_query.replace(cn, r)
+        replacements[cn] = r
+
+    inv_replacements = {replacements[k]: k for k in replacements.keys()}
+    df = df.rename(columns=replacements)  # Rename the columns
+    df = df.query(final_query)  # Carry out query
+    if not len(df):
+        raise Exception('query "{}" found no data, please alter'.format(query))
+    df = df.rename(columns=inv_replacements)
+    return df