From 62b68cf080656a964762edd70c94d41b7569ce7a Mon Sep 17 00:00:00 2001 From: Enya-Yx Date: Tue, 2 Aug 2022 19:37:58 +0800 Subject: [PATCH 1/4] Suppot 'enable' config variables in offline store --- feathr_project/feathr/client.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/feathr_project/feathr/client.py b/feathr_project/feathr/client.py index 7c8784bda..0c209bc96 100644 --- a/feathr_project/feathr/client.py +++ b/feathr_project/feathr/client.py @@ -115,9 +115,24 @@ def __init__(self, config_path:str = "./feathr_config.yaml", local_workspace_dir self.redis_ssl_enabled = self.envutils.get_environment_variable_with_default( 'online_store', 'redis', 'ssl_enabled') + # Offline store enabled configs + self.s3_enabled = self.envutils.get_environment_variable_with_default( + 'offline_store', 's3', 's3_enabled') + self.adls_enabled = self.envutils.get_environment_variable_with_default( + 'offline_store', 'adls', 'adls_enabled') + self.wasb_enabled = self.envutils.get_environment_variable_with_default( + 'offline_store', 'wasb', 'wasb_enabled') + self.jdbc_enabled = self.envutils.get_environment_variable_with_default( + 'offline_store', 'jdbc', 'jdbc_enabled') + self.snowflake_enabled = self.envutils.get_environment_variable_with_default( + 'offline_store', 'snowflake', 'snowflake_enabled') + if not (self.s3_enabled or self.adls_enabled or self.wasb_enabled or self.jdbc_enabled or self.snowflake_enabled): + self.logger.warning("No offline storage enabled.") + # S3 configs - self.s3_endpoint = self.envutils.get_environment_variable_with_default( - 'offline_store', 's3', 's3_endpoint') + if self.s3_enabled: + self.s3_endpoint = self.envutils.get_environment_variable_with_default( + 'offline_store', 's3', 's3_endpoint') # spark configs self.output_num_parts = self.envutils.get_environment_variable_with_default( @@ -665,6 +680,8 @@ def _getRedisConfigStr(self): def _get_s3_config_str(self): """Construct the S3 config string. The endpoint, access key, secret key, and other parameters can be set via environment variables.""" + if not self.s3_enabled: + return "" endpoint = self.s3_endpoint # if s3 endpoint is set in the feathr_config, then we need other environment variables # keys can't be only accessed through environment @@ -681,6 +698,8 @@ def _get_s3_config_str(self): def _get_adls_config_str(self): """Construct the ADLS config string for abfs(s). The Account, access key and other parameters can be set via environment variables.""" + if not self.adls_enabled: + return "" account = self.envutils.get_environment_variable('ADLS_ACCOUNT') # if ADLS Account is set in the feathr_config, then we need other environment variables # keys can't be only accessed through environment @@ -695,6 +714,8 @@ def _get_adls_config_str(self): def _get_blob_config_str(self): """Construct the Blob config string for wasb(s). The Account, access key and other parameters can be set via environment variables.""" + if not self.wasb_enabled: + return "" account = self.envutils.get_environment_variable('BLOB_ACCOUNT') # if BLOB Account is set in the feathr_config, then we need other environment variables # keys can't be only accessed through environment @@ -709,6 +730,8 @@ def _get_blob_config_str(self): def _get_sql_config_str(self): """Construct the SQL config string for jdbc. The dbtable (query), user, password and other parameters can be set via environment variables.""" + if not self.jdbc_enabled: + return "" table = self.envutils.get_environment_variable('JDBC_TABLE') user = self.envutils.get_environment_variable('JDBC_USER') password = self.envutils.get_environment_variable('JDBC_PASSWORD') @@ -745,6 +768,8 @@ def _get_monitoring_config_str(self): def _get_snowflake_config_str(self): """Construct the Snowflake config string for jdbc. The url, user, role and other parameters can be set via yaml config. Password can be set via environment variables.""" + if not self.snowflake_enabled: + return "" sf_url = self.envutils.get_environment_variable_with_default('offline_store', 'snowflake', 'url') sf_user = self.envutils.get_environment_variable_with_default('offline_store', 'snowflake', 'user') sf_role = self.envutils.get_environment_variable_with_default('offline_store', 'snowflake', 'role') From d69909224d3331f69a3ccf69035f059aaa5775fa Mon Sep 17 00:00:00 2001 From: Enya-Yx Date: Mon, 22 Aug 2022 18:08:24 +0800 Subject: [PATCH 2/4] improve previous change --- feathr_project/feathr/client.py | 49 ++++++++++++++++----------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/feathr_project/feathr/client.py b/feathr_project/feathr/client.py index 6a88f3209..2fead135d 100644 --- a/feathr_project/feathr/client.py +++ b/feathr_project/feathr/client.py @@ -115,7 +115,7 @@ def __init__(self, config_path:str = "./feathr_config.yaml", local_workspace_dir self.redis_ssl_enabled = self.envutils.get_environment_variable_with_default( 'online_store', 'redis', 'ssl_enabled') - # Offline store enabled configs + # Offline store enabled configs; false by default self.s3_enabled = self.envutils.get_environment_variable_with_default( 'offline_store', 's3', 's3_enabled') self.adls_enabled = self.envutils.get_environment_variable_with_default( @@ -517,25 +517,39 @@ def _get_offline_features_with_config(self, feature_join_conf_path='feature_join python_files=cloud_udf_paths, job_tags=job_tags, main_class_name='com.linkedin.feathr.offline.job.FeatureJoinJob', - arguments=[ + arguments= [ '--join-config', self.feathr_spark_launcher.upload_or_get_cloud_path( feature_join_job_params.join_config_path), '--input', feature_join_job_params.observation_path, '--output', feature_join_job_params.job_output_path, '--feature-config', self.feathr_spark_launcher.upload_or_get_cloud_path( feature_join_job_params.feature_config), - '--num-parts', self.output_num_parts, - '--s3-config', self._get_s3_config_str(), - '--adls-config', self._get_adls_config_str(), - '--blob-config', self._get_blob_config_str(), - '--sql-config', self._get_sql_config_str(), - '--snowflake-config', self._get_snowflake_config_str() - ], + '--num-parts', self.output_num_parts + ]+self._get_offline_storage_arguments(), reference_files_path=[], configuration=execution_configurations, properties=self._get_system_properties() ) + def _get_offline_storage_arguments(self): + arguments = [] + if self.s3_enabled: + arguments.append('--s3-config') + arguments.append(self._get_s3_config_str()) + if self.adls_enabled: + arguments.append('--adls-config') + arguments.append(self._get_adls_config_str()) + if self.wasb_enabled: + arguments.append('--blob-config') + arguments.append(self._get_blob_config_str()) + if self.jdbc_enabled: + arguments.append('--sql-config') + arguments.append(self._get_sql_config_str()) + if self.snowflake_enabled: + arguments.append('--snowflake-config') + arguments.append(self._get_snowflake_config_str()) + return arguments + def get_job_result_uri(self, block=True, timeout_sec=300) -> str: """Gets the job output URI """ @@ -632,12 +646,7 @@ def _materialize_features_with_config(self, feature_gen_conf_path: str = 'featur '--feature-config', self.feathr_spark_launcher.upload_or_get_cloud_path( generation_config.feature_config), '--redis-config', self._getRedisConfigStr(), - '--s3-config', self._get_s3_config_str(), - '--adls-config', self._get_adls_config_str(), - '--blob-config', self._get_blob_config_str(), - '--sql-config', self._get_sql_config_str(), - '--snowflake-config', self._get_snowflake_config_str(), - ] + optional_params + ] + self._get_offline_storage_arguments()+optional_params monitoring_config_str = self._get_monitoring_config_str() if monitoring_config_str: arguments.append('--monitoring-config') @@ -680,8 +689,6 @@ def _getRedisConfigStr(self): def _get_s3_config_str(self): """Construct the S3 config string. The endpoint, access key, secret key, and other parameters can be set via environment variables.""" - if not self.s3_enabled: - return "" endpoint = self.s3_endpoint # if s3 endpoint is set in the feathr_config, then we need other environment variables # keys can't be only accessed through environment @@ -698,8 +705,6 @@ def _get_s3_config_str(self): def _get_adls_config_str(self): """Construct the ADLS config string for abfs(s). The Account, access key and other parameters can be set via environment variables.""" - if not self.adls_enabled: - return "" account = self.envutils.get_environment_variable('ADLS_ACCOUNT') # if ADLS Account is set in the feathr_config, then we need other environment variables # keys can't be only accessed through environment @@ -714,8 +719,6 @@ def _get_adls_config_str(self): def _get_blob_config_str(self): """Construct the Blob config string for wasb(s). The Account, access key and other parameters can be set via environment variables.""" - if not self.wasb_enabled: - return "" account = self.envutils.get_environment_variable('BLOB_ACCOUNT') # if BLOB Account is set in the feathr_config, then we need other environment variables # keys can't be only accessed through environment @@ -730,8 +733,6 @@ def _get_blob_config_str(self): def _get_sql_config_str(self): """Construct the SQL config string for jdbc. The dbtable (query), user, password and other parameters can be set via environment variables.""" - if not self.jdbc_enabled: - return "" table = self.envutils.get_environment_variable('JDBC_TABLE') user = self.envutils.get_environment_variable('JDBC_USER') password = self.envutils.get_environment_variable('JDBC_PASSWORD') @@ -768,8 +769,6 @@ def _get_monitoring_config_str(self): def _get_snowflake_config_str(self): """Construct the Snowflake config string for jdbc. The url, user, role and other parameters can be set via yaml config. Password can be set via environment variables.""" - if not self.snowflake_enabled: - return "" sf_url = self.envutils.get_environment_variable_with_default('offline_store', 'snowflake', 'url') sf_user = self.envutils.get_environment_variable_with_default('offline_store', 'snowflake', 'user') sf_role = self.envutils.get_environment_variable_with_default('offline_store', 'snowflake', 'role') From 3cfc12ad3530578f7f98ff8e6345d577b81466f1 Mon Sep 17 00:00:00 2001 From: Enya-Yx Date: Wed, 24 Aug 2022 12:02:51 +0800 Subject: [PATCH 3/4] add 'snowflake_enabled' flag for testing cases --- feathr_project/test/test_user_workspace/feathr_config.yaml | 1 + feathr_project/test/test_user_workspace/feathr_config_maven.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/feathr_project/test/test_user_workspace/feathr_config.yaml b/feathr_project/test/test_user_workspace/feathr_config.yaml index e7624b577..3f44ddd0f 100644 --- a/feathr_project/test/test_user_workspace/feathr_config.yaml +++ b/feathr_project/test/test_user_workspace/feathr_config.yaml @@ -60,6 +60,7 @@ offline_store: # snowflake endpoint snowflake: + snowflake_enabled: true url: "dqllago-ol19457.snowflakecomputing.com" user: "feathrintegration" role: "ACCOUNTADMIN" diff --git a/feathr_project/test/test_user_workspace/feathr_config_maven.yaml b/feathr_project/test/test_user_workspace/feathr_config_maven.yaml index 29dc0370e..07b0ee011 100644 --- a/feathr_project/test/test_user_workspace/feathr_config_maven.yaml +++ b/feathr_project/test/test_user_workspace/feathr_config_maven.yaml @@ -60,6 +60,7 @@ offline_store: # snowflake endpoint snowflake: + snowflake_enabled: true url: "dqllago-ol19457.snowflakecomputing.com" user: "feathrintegration" role: "ACCOUNTADMIN" From e2f072eea121f0004b9a2a124cde35e58ed0c6e8 Mon Sep 17 00:00:00 2001 From: Enya-Yx Date: Wed, 24 Aug 2022 14:46:46 +0800 Subject: [PATCH 4/4] Modify documents --- docs/samples/fraud_detection_demo.ipynb | 2 ++ docs/samples/product_recommendation_demo.ipynb | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/samples/fraud_detection_demo.ipynb b/docs/samples/fraud_detection_demo.ipynb index 412141d30..7d61822f1 100644 --- a/docs/samples/fraud_detection_demo.ipynb +++ b/docs/samples/fraud_detection_demo.ipynb @@ -213,6 +213,7 @@ " required_environment_variables:\n", " - 'REDIS_PASSWORD'\n", "offline_store:\n", + "# Please set 'enabled' flags as true (false by default) if any of items under the same paths are expected to be visited\n", " adls:\n", " adls_enabled: true\n", " wasb:\n", @@ -225,6 +226,7 @@ " jdbc_database: 'feathrtestdb'\n", " jdbc_table: 'feathrtesttable'\n", " snowflake:\n", + " snowflake_enabled: true\n", " url: \"dqllago-ol19457.snowflakecomputing.com\"\n", " user: \"feathrintegration\"\n", " role: \"ACCOUNTADMIN\"\n", diff --git a/docs/samples/product_recommendation_demo.ipynb b/docs/samples/product_recommendation_demo.ipynb index 09f19b641..9a6fa3423 100644 --- a/docs/samples/product_recommendation_demo.ipynb +++ b/docs/samples/product_recommendation_demo.ipynb @@ -208,6 +208,7 @@ " required_environment_variables:\n", " - 'REDIS_PASSWORD'\n", "offline_store:\n", + "# Please set 'enabled' flags as true (false by default) if any of items under the same paths are expected to be visited\n", " adls:\n", " adls_enabled: true\n", " wasb:\n", @@ -220,6 +221,7 @@ " jdbc_database: 'feathrtestdb'\n", " jdbc_table: 'feathrtesttable'\n", " snowflake:\n", + " snowflake_enabled: true\n", " url: \"dqllago-ol19457.snowflakecomputing.com\"\n", " user: \"feathrintegration\"\n", " role: \"ACCOUNTADMIN\"\n",