From 735f24dad949c3e946740763b5554cfb0f2354d3 Mon Sep 17 00:00:00 2001 From: santoshk_qubole Date: Mon, 30 Mar 2020 15:35:52 +0530 Subject: [PATCH 1/3] fix: dev: SDK-397: add dbtap config for mlflow cluster --- qds_sdk/engine.py | 15 ++++++++++++--- tests/test_clusterv2.py | 5 +++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index b7594b3e..6ac665e2 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -78,8 +78,11 @@ def set_engine_config(self, default_pool, enable_rubix) self.set_presto_settings(presto_version, custom_presto_config) self.set_spark_settings(spark_version, custom_spark_config) - self.set_airflow_settings(dbtap_id, fernet_key, overrides, airflow_version, airflow_python_version) - self.set_mlflow_settings(mlflow_version) + # because of same config dbtap_id used in mlflow and airflow, configs are getting created for both cluster type + if mlflow_version: + self.set_mlflow_settings(mlflow_version, dbtap_id) + if airflow_version: + self.set_airflow_settings(dbtap_id, fernet_key, overrides, airflow_version, airflow_python_version) def set_fairscheduler_settings(self, fairscheduler_config_xml=None, @@ -127,8 +130,10 @@ def set_airflow_settings(self, self.airflow_settings['airflow_python_version'] = airflow_python_version def set_mlflow_settings(self, - mlflow_version="1.5"): + mlflow_version="1.7", + dbtap_id=None): self.mlflow_settings['version'] = mlflow_version + self.mlflow_settings['dbtap_id'] = dbtap_id def set_engine_config_settings(self, arguments): custom_hadoop_config = util._read_file(arguments.custom_hadoop_config_file) @@ -253,4 +258,8 @@ def engine_parser(argparser): dest="mlflow_version", default=None, help="mlflow version for mlflow cluster", ) + mlflow_settings_group.add_argument("--mlflow-dbtap-id", + dest="dbtap_id", + default=None, + help="dbtap id for mlflow cluster", ) diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 0c791908..70b52ce5 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -548,7 +548,7 @@ def test_airflow_engine_config(self): def test_mlflow_engine_config(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--flavour', 'mlflow', '--mlflow-version', '1.5'] + '--flavour', 'mlflow', '--mlflow-version', '1.7', '--dbtap-id', '-1'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={}) @@ -557,7 +557,8 @@ def test_mlflow_engine_config(self): {'engine_config': {'flavour': 'mlflow', 'mlflow_settings': { - 'version': '1.5' + 'version': '1.7', + 'dbtap_id': '-1' }}, 'cluster_info': {'label': ['test_label'], }}) From 2549e4124c4f1c1f1ae980882bc108b3ab329f3d Mon Sep 17 00:00:00 2001 From: santoshk_qubole Date: Mon, 30 Mar 2020 16:11:46 +0530 Subject: [PATCH 2/3] fix lint errors --- qds_sdk/engine.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index 6ac665e2..cb9b6bc5 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -78,11 +78,16 @@ def set_engine_config(self, default_pool, enable_rubix) self.set_presto_settings(presto_version, custom_presto_config) self.set_spark_settings(spark_version, custom_spark_config) - # because of same config dbtap_id used in mlflow and airflow, configs are getting created for both cluster type + # because of same config dbtap_id used in mlflow and airflow, + # configs are getting created for both cluster type if mlflow_version: self.set_mlflow_settings(mlflow_version, dbtap_id) if airflow_version: - self.set_airflow_settings(dbtap_id, fernet_key, overrides, airflow_version, airflow_python_version) + self.set_airflow_settings(dbtap_id, + fernet_key, + overrides, + airflow_version, + airflow_python_version) def set_fairscheduler_settings(self, fairscheduler_config_xml=None, @@ -259,7 +264,7 @@ def engine_parser(argparser): default=None, help="mlflow version for mlflow cluster", ) mlflow_settings_group.add_argument("--mlflow-dbtap-id", - dest="dbtap_id", - default=None, - help="dbtap id for mlflow cluster", ) + dest="dbtap_id", + default=None, + help="dbtap id for mlflow cluster", ) From 97c8313b8818aa8b93630bcf63f4d74457e89507 Mon Sep 17 00:00:00 2001 From: santoshk_qubole Date: Mon, 30 Mar 2020 16:55:43 +0530 Subject: [PATCH 3/3] review fixes --- qds_sdk/engine.py | 24 +++++++++--------------- tests/test_clusterv2.py | 2 +- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/qds_sdk/engine.py b/qds_sdk/engine.py index cb9b6bc5..c84ca366 100644 --- a/qds_sdk/engine.py +++ b/qds_sdk/engine.py @@ -33,7 +33,8 @@ def set_engine_config(self, airflow_python_version=None, is_ha=None, enable_rubix=None, - mlflow_version=None): + mlflow_version=None, + mlflow_dbtap_id=None): ''' Args: @@ -78,16 +79,8 @@ def set_engine_config(self, default_pool, enable_rubix) self.set_presto_settings(presto_version, custom_presto_config) self.set_spark_settings(spark_version, custom_spark_config) - # because of same config dbtap_id used in mlflow and airflow, - # configs are getting created for both cluster type - if mlflow_version: - self.set_mlflow_settings(mlflow_version, dbtap_id) - if airflow_version: - self.set_airflow_settings(dbtap_id, - fernet_key, - overrides, - airflow_version, - airflow_python_version) + self.set_airflow_settings(dbtap_id, fernet_key, overrides, airflow_version, airflow_python_version) + self.set_mlflow_settings(mlflow_version, mlflow_dbtap_id) def set_fairscheduler_settings(self, fairscheduler_config_xml=None, @@ -136,9 +129,9 @@ def set_airflow_settings(self, def set_mlflow_settings(self, mlflow_version="1.7", - dbtap_id=None): + mlflow_dbtap_id=None): self.mlflow_settings['version'] = mlflow_version - self.mlflow_settings['dbtap_id'] = dbtap_id + self.mlflow_settings['dbtap_id'] = mlflow_dbtap_id def set_engine_config_settings(self, arguments): custom_hadoop_config = util._read_file(arguments.custom_hadoop_config_file) @@ -160,7 +153,8 @@ def set_engine_config_settings(self, arguments): airflow_version=arguments.airflow_version, airflow_python_version=arguments.airflow_python_version, enable_rubix=arguments.enable_rubix, - mlflow_version=arguments.mlflow_version) + mlflow_version=arguments.mlflow_version, + mlflow_dbtap_id=arguments.mlflow_dbtap_id) @staticmethod def engine_parser(argparser): @@ -264,7 +258,7 @@ def engine_parser(argparser): default=None, help="mlflow version for mlflow cluster", ) mlflow_settings_group.add_argument("--mlflow-dbtap-id", - dest="dbtap_id", + dest="mlflow_dbtap_id", default=None, help="dbtap id for mlflow cluster", ) diff --git a/tests/test_clusterv2.py b/tests/test_clusterv2.py index 70b52ce5..078108ff 100644 --- a/tests/test_clusterv2.py +++ b/tests/test_clusterv2.py @@ -548,7 +548,7 @@ def test_airflow_engine_config(self): def test_mlflow_engine_config(self): sys.argv = ['qds.py', '--version', 'v2', 'cluster', 'create', '--label', 'test_label', - '--flavour', 'mlflow', '--mlflow-version', '1.7', '--dbtap-id', '-1'] + '--flavour', 'mlflow', '--mlflow-version', '1.7', '--mlflow-dbtap-id', '-1'] Qubole.cloud = None print_command() Connection._api_call = Mock(return_value={})