From b6bdefca6a95bf4a0f16a23823b14f9e0f491dbd Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 24 Oct 2019 13:28:52 +0100 Subject: [PATCH 1/9] [db engine] Add support for Elasticsearch --- docs/index.rst | 1 + docs/installation.rst | 34 +++++++++++++++++ superset/db_engine_specs/elasticsearch.py | 46 +++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 superset/db_engine_specs/elasticsearch.py diff --git a/docs/index.rst b/docs/index.rst index fbbe57f48766d..98da660bbc40a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -109,6 +109,7 @@ The following RDBMS are currently suppored: - `Apache Spark SQL `_ - `BigQuery `_ - `ClickHouse `_ +- `Elasticsearch `_ - `Exasol `_ - `Google Sheets `_ - `Greenplum `_ diff --git a/docs/installation.rst b/docs/installation.rst index 569277ed47b63..50f30c46ea861 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -377,6 +377,8 @@ Here's a list of some of the recommended packages. +------------------+---------------------------------------+-------------------------------------------------+ | ClickHouse | ``pip install sqlalchemy-clickhouse`` | | +------------------+---------------------------------------+-------------------------------------------------+ +| Elasticsearch | ``pip install elasticsearch-dbapi`` | ``elasticsearch+http://`` | ++------------------+---------------------------------------+-------------------------------------------------+ | Exasol | ``pip install sqlalchemy-exasol`` | ``exa+pyodbc://`` | +------------------+---------------------------------------+-------------------------------------------------+ | Google Sheets | ``pip install gsheetsdb`` | ``gsheets://`` | @@ -434,6 +436,38 @@ The connection string for BigQuery looks like this :: To be able to upload data, e.g. sample data, the python library `pandas_gbq` is required. +Elasticsearch +------------- + +The connection string for Elasticsearch looks like this :: + + elasticsearch+http://{user}:{password}@{host}:9200/ + +Using HTTPS :: + + elasticsearch+https://{user}:{password}@{host}:9200/ + + +Elasticsearch as a default limit of 10000 rows, so you can increase this limit on your cluster +or set Superset's row limit on config :: + + ROW_LIMIT = 10000 + +You can query multiple indices on SQLLab for example :: + + select timestamp, agent from "logstash-*" + +But, to use visualizations for multiple indices you need to create an alias index on your cluster :: + + POST /_aliases +{ + "actions" : [ + { "add" : { "index" : "logstash-**", "alias" : "logstash_all" } } + ] +} + +Then register you table with the ``alias`` name ``logstasg_all`` + Snowflake --------- diff --git a/superset/db_engine_specs/elasticsearch.py b/superset/db_engine_specs/elasticsearch.py new file mode 100644 index 0000000000000..ac96c287c8dfe --- /dev/null +++ b/superset/db_engine_specs/elasticsearch.py @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=C,R,W +from datetime import datetime +from typing import Dict + +from superset.db_engine_specs.base import BaseEngineSpec + + +class ElasticSearchEngineSpec(BaseEngineSpec): + engine = "elasticsearch" + time_groupby_inline = True + time_secondary_columns = True + allows_joins = False + allows_subqueries = True + + _time_grain_functions = { + None: "{col}", + "PT1S": "HISTOGRAM({col}, INTERVAL 1 SECOND)", + "PT1M": "HISTOGRAM({col}, INTERVAL 1 MINUTE)", + "PT1H": "HISTOGRAM({col}, INTERVAL 1 HOUR)", + "P1D": "HISTOGRAM({col}, INTERVAL 1 DAY)", + "P1M": "HISTOGRAM({col}, INTERVAL 1 MONTH)", + "P1Y": "HISTOGRAM({col}, INTERVAL 1 YEAR)", + } + + type_code_map: Dict[int, str] = {} # loaded from get_datatype only if needed + + @classmethod + def convert_dttm(cls, target_type: str, dttm: datetime) -> str: + if target_type.upper() in ("DATETIME", "DATE"): + return f"'{dttm.isoformat()}'" From 208a0f6db00fe0fe2f94cb756648419e022205e2 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 24 Oct 2019 13:44:37 +0100 Subject: [PATCH 2/9] [setup] add extra requirement for elasticsearch --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index fe59631a2c1cc..31fdaf8e6b1f9 100644 --- a/setup.py +++ b/setup.py @@ -116,6 +116,7 @@ def get_git_sha(): "postgres": ["psycopg2-binary==2.7.5"], "presto": ["pyhive[presto]>=0.4.0"], "druid": ["pydruid==0.5.2", "requests==2.22.0"], + "elasticsearch": ["elasticsearch-dbapi>=0.1.0, <0.2.0"], }, python_requires="~=3.6", author="Apache Software Foundation", From 55ac3baa1ee89d61492d2ccb26666260a53172c0 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 24 Oct 2019 13:51:35 +0100 Subject: [PATCH 3/9] [db engine] Fix, mypy lint --- superset/db_engine_specs/elasticsearch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/superset/db_engine_specs/elasticsearch.py b/superset/db_engine_specs/elasticsearch.py index ac96c287c8dfe..25f3b7713559f 100644 --- a/superset/db_engine_specs/elasticsearch.py +++ b/superset/db_engine_specs/elasticsearch.py @@ -44,3 +44,4 @@ class ElasticSearchEngineSpec(BaseEngineSpec): def convert_dttm(cls, target_type: str, dttm: datetime) -> str: if target_type.upper() in ("DATETIME", "DATE"): return f"'{dttm.isoformat()}'" + return "'{}'".format(dttm.strftime("%Y-%m-%d %H:%M:%S")) From 81a33a1e510702ed3956c3b4559c6fc80e1ee920 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 24 Oct 2019 14:13:08 +0100 Subject: [PATCH 4/9] kick travis --- superset/db_engine_specs/elasticsearch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/superset/db_engine_specs/elasticsearch.py b/superset/db_engine_specs/elasticsearch.py index 25f3b7713559f..323c35dbec31a 100644 --- a/superset/db_engine_specs/elasticsearch.py +++ b/superset/db_engine_specs/elasticsearch.py @@ -45,3 +45,4 @@ def convert_dttm(cls, target_type: str, dttm: datetime) -> str: if target_type.upper() in ("DATETIME", "DATE"): return f"'{dttm.isoformat()}'" return "'{}'".format(dttm.strftime("%Y-%m-%d %H:%M:%S")) + From 08b9a9a1c972b70ff912b36fe4241382e1b89807 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 24 Oct 2019 14:14:33 +0100 Subject: [PATCH 5/9] kick travis --- superset/db_engine_specs/elasticsearch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/superset/db_engine_specs/elasticsearch.py b/superset/db_engine_specs/elasticsearch.py index 323c35dbec31a..25f3b7713559f 100644 --- a/superset/db_engine_specs/elasticsearch.py +++ b/superset/db_engine_specs/elasticsearch.py @@ -45,4 +45,3 @@ def convert_dttm(cls, target_type: str, dttm: datetime) -> str: if target_type.upper() in ("DATETIME", "DATE"): return f"'{dttm.isoformat()}'" return "'{}'".format(dttm.strftime("%Y-%m-%d %H:%M:%S")) - From 205337852231774d4de98da94b2d933fdd1df935 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 24 Oct 2019 18:57:16 +0100 Subject: [PATCH 6/9] Fix, instalation typo --- docs/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/installation.rst b/docs/installation.rst index 50f30c46ea861..2259196252343 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -466,7 +466,7 @@ But, to use visualizations for multiple indices you need to create an alias inde ] } -Then register you table with the ``alias`` name ``logstasg_all`` +Then register your table with the ``alias`` name ``logstasg_all`` Snowflake --------- From 04fb4e1c938012d2cc4c026d862727df3bdc2c4c Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Fri, 25 Oct 2019 08:39:14 +0100 Subject: [PATCH 7/9] Fix, doc block indentation --- docs/installation.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 7e254e9fe213d..32b209c5d1617 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -460,11 +460,11 @@ You can query multiple indices on SQLLab for example :: But, to use visualizations for multiple indices you need to create an alias index on your cluster :: POST /_aliases -{ - "actions" : [ - { "add" : { "index" : "logstash-**", "alias" : "logstash_all" } } - ] -} + { + "actions" : [ + { "add" : { "index" : "logstash-**", "alias" : "logstash_all" } } + ] + } Then register your table with the ``alias`` name ``logstasg_all`` From 082cac7c03cd698f3c502fc76e664450a1dc8fa7 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Sat, 26 Oct 2019 19:25:22 +0100 Subject: [PATCH 8/9] Fix, use f string --- superset/db_engine_specs/elasticsearch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/db_engine_specs/elasticsearch.py b/superset/db_engine_specs/elasticsearch.py index 25f3b7713559f..1c56e08b55350 100644 --- a/superset/db_engine_specs/elasticsearch.py +++ b/superset/db_engine_specs/elasticsearch.py @@ -44,4 +44,4 @@ class ElasticSearchEngineSpec(BaseEngineSpec): def convert_dttm(cls, target_type: str, dttm: datetime) -> str: if target_type.upper() in ("DATETIME", "DATE"): return f"'{dttm.isoformat()}'" - return "'{}'".format(dttm.strftime("%Y-%m-%d %H:%M:%S")) + return f"\'{dttm.strftime('%Y-%m-%d %H:%M:%S')}\'" From d805120d678f7b194c54ad39ad360d18f1df2e86 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 28 Oct 2019 08:33:06 +0000 Subject: [PATCH 9/9] Fix, black it --- superset/db_engine_specs/elasticsearch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/db_engine_specs/elasticsearch.py b/superset/db_engine_specs/elasticsearch.py index 1c56e08b55350..7a016d6f7e226 100644 --- a/superset/db_engine_specs/elasticsearch.py +++ b/superset/db_engine_specs/elasticsearch.py @@ -44,4 +44,4 @@ class ElasticSearchEngineSpec(BaseEngineSpec): def convert_dttm(cls, target_type: str, dttm: datetime) -> str: if target_type.upper() in ("DATETIME", "DATE"): return f"'{dttm.isoformat()}'" - return f"\'{dttm.strftime('%Y-%m-%d %H:%M:%S')}\'" + return f"'{dttm.strftime('%Y-%m-%d %H:%M:%S')}'"