From 7c34418ad94dc785c77900f9369417003ee81717 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Tue, 20 Sep 2022 20:37:45 -0700 Subject: [PATCH 01/10] Browse Paths Upgrade V2 --- .../source/aws/sagemaker_processors/feature_groups.py | 2 +- .../datahub/ingestion/source/aws/sagemaker_processors/jobs.py | 2 +- .../ingestion/source/aws/sagemaker_processors/models.py | 2 +- metadata-ingestion/src/datahub/ingestion/source/feast.py | 4 ++-- .../tests/integration/feast/feast_repository_mces_golden.json | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py index 381ab4ef88af8a..3c043deb51a02b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py @@ -92,7 +92,7 @@ def get_feature_group_wu( feature_group_snapshot = MLFeatureTableSnapshot( urn=builder.make_ml_feature_table_urn("sagemaker", feature_group_name), aspects=[ - BrowsePathsClass(paths=[f"/sagemaker/{feature_group_name}"]), + BrowsePathsClass(paths=[f"/sagemaker"]), ], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py index 8ec02403e497d2..283186e0eae151 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py @@ -418,7 +418,7 @@ def create_common_job_snapshot( "jobType": job_type.value, }, ), - BrowsePathsClass(paths=[f"/{job_type.value}/{name}"]), + BrowsePathsClass(paths=[f"/{job_type.value}"]), ], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py index 7e2862a02c7171..8791e05d5b30f7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py @@ -287,7 +287,7 @@ def get_group_wu( }, ), OwnershipClass(owners), - BrowsePathsClass(paths=[f"/sagemaker/{group_name}"]), + BrowsePathsClass(paths=[f"/sagemaker"]), ], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/feast.py b/metadata-ingestion/src/datahub/ingestion/source/feast.py index 9671d9b80c14cc..393838af996f36 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/feast.py +++ b/metadata-ingestion/src/datahub/ingestion/source/feast.py @@ -278,7 +278,7 @@ def _get_feature_view_workunit(self, feature_view: FeatureView) -> MetadataWorkU urn=builder.make_ml_feature_table_urn("feast", feature_view_name), aspects=[ BrowsePathsClass( - paths=[f"/feast/{self.feature_store.project}/{feature_view_name}"] + paths=[f"/feast/{self.feature_store.project}"] ), StatusClass(removed=False), ], @@ -320,7 +320,7 @@ def _get_on_demand_feature_view_workunit( aspects=[ BrowsePathsClass( paths=[ - f"/feast/{self.feature_store.project}/{on_demand_feature_view_name}" + f"/feast/{self.feature_store.project}" ] ), StatusClass(removed=False), diff --git a/metadata-ingestion/tests/integration/feast/feast_repository_mces_golden.json b/metadata-ingestion/tests/integration/feast/feast_repository_mces_golden.json index 860d4e47693a1f..d69ecda7473ea4 100644 --- a/metadata-ingestion/tests/integration/feast/feast_repository_mces_golden.json +++ b/metadata-ingestion/tests/integration/feast/feast_repository_mces_golden.json @@ -173,7 +173,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/feast/feature_store/feature_store.driver_hourly_stats" + "/feast/feature_store" ] } }, @@ -286,7 +286,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/feast/feature_store/feature_store.transformed_conv_rate" + "/feast/feature_store" ] } }, From 09cd05946f32b420de50e51533bff8cd2ca38ddf Mon Sep 17 00:00:00 2001 From: John Joyce Date: Wed, 21 Sep 2022 09:05:11 -0700 Subject: [PATCH 02/10] Fixing lint --- .../source/aws/sagemaker_processors/feature_groups.py | 2 +- .../source/aws/sagemaker_processors/models.py | 2 +- .../src/datahub/ingestion/source/feast.py | 10 ++-------- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py index 3c043deb51a02b..924a8aa7945b2f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py @@ -92,7 +92,7 @@ def get_feature_group_wu( feature_group_snapshot = MLFeatureTableSnapshot( urn=builder.make_ml_feature_table_urn("sagemaker", feature_group_name), aspects=[ - BrowsePathsClass(paths=[f"/sagemaker"]), + BrowsePathsClass(paths=["/sagemaker"]), ], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py index 8791e05d5b30f7..304d159d911476 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py @@ -287,7 +287,7 @@ def get_group_wu( }, ), OwnershipClass(owners), - BrowsePathsClass(paths=[f"/sagemaker"]), + BrowsePathsClass(paths=["/sagemaker"]), ], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/feast.py b/metadata-ingestion/src/datahub/ingestion/source/feast.py index 393838af996f36..f7974d2ec3b2d0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/feast.py +++ b/metadata-ingestion/src/datahub/ingestion/source/feast.py @@ -277,9 +277,7 @@ def _get_feature_view_workunit(self, feature_view: FeatureView) -> MetadataWorkU feature_view_snapshot = MLFeatureTableSnapshot( urn=builder.make_ml_feature_table_urn("feast", feature_view_name), aspects=[ - BrowsePathsClass( - paths=[f"/feast/{self.feature_store.project}"] - ), + BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]), StatusClass(removed=False), ], ) @@ -318,11 +316,7 @@ def _get_on_demand_feature_view_workunit( on_demand_feature_view_snapshot = MLFeatureTableSnapshot( urn=builder.make_ml_feature_table_urn("feast", on_demand_feature_view_name), aspects=[ - BrowsePathsClass( - paths=[ - f"/feast/{self.feature_store.project}" - ] - ), + BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]), StatusClass(removed=False), ], ) From a711072b65a0db7a55016669a847e309f3597074 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 15 Dec 2022 15:51:15 -0800 Subject: [PATCH 03/10] Fix browse path in golden file --- .../unit/sagemaker/sagemaker_mces_golden.json | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json index 8c5da056cc2d94..00d701f64a68fa 100644 --- a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json +++ b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json @@ -77,7 +77,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/test-2" + "/sagemaker" ] } }, @@ -290,7 +290,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/test" + "/sagemaker" ] } }, @@ -904,7 +904,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/auto_ml/an-auto-ml-job" + "/auto_ml" ] } }, @@ -986,7 +986,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/compilation/a-compilation-job" + "/compilation" ] } }, @@ -1069,7 +1069,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/edge_packaging/an-edge-packaging-job" + "/edge_packaging" ] } }, @@ -1149,7 +1149,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/hyper_parameter_tuning/a-hyper-parameter-tuning-job" + "/hyper_parameter_tuning" ] } }, @@ -1229,7 +1229,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/labeling/a-labeling-job" + "/labeling" ] } }, @@ -1318,7 +1318,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/processing/a-processing-job" + "/processing" ] } }, @@ -1425,7 +1425,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/training/a-training-job" + "/training" ] } }, @@ -1517,7 +1517,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/transform/a-transform-job" + "/transform" ] } }, @@ -1812,4 +1812,4 @@ "proposedDelta": null, "systemMetadata": null } -] \ No newline at end of file +] From 0eb6486ec7d13a83b9acf6649e2fbb127d47d877 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 15 Dec 2022 15:59:56 -0800 Subject: [PATCH 04/10] Update sagemaker_mces_golden.json --- .../tests/unit/sagemaker/sagemaker_mces_golden.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json index 00d701f64a68fa..246db2d871540e 100644 --- a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json +++ b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json @@ -1641,7 +1641,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/a-model-package-group" + "/sagemaker" ] } } @@ -1720,7 +1720,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/a-model-package-group/the-first-model" + "/sagemaker/a-model-package-group" ] } } @@ -1802,7 +1802,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/a-model-package-group/the-second-model" + "/sagemaker/a-model-package-group" ] } } From d8fb535e203d05a7475f381e1815d8a9b32bd717 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 15 Dec 2022 16:24:34 -0800 Subject: [PATCH 05/10] Update sagemaker_mces_golden.json --- .../tests/unit/sagemaker/sagemaker_mces_golden.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json index 246db2d871540e..e95ab41c3d90a6 100644 --- a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json +++ b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json @@ -1720,7 +1720,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/a-model-package-group" + "/sagemaker" ] } } From 3d96c5f4c62f20be2a7ddb60b175f5bba46be63e Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 15 Dec 2022 16:30:42 -0800 Subject: [PATCH 06/10] Update sagemaker_mces_golden.json --- .../tests/unit/sagemaker/sagemaker_mces_golden.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json index e95ab41c3d90a6..f429bbf987d577 100644 --- a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json +++ b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json @@ -193,7 +193,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/test-1" + "/sagemaker" ] } }, From 9188f084aa8e1c90445b21efb65b3e2ec60d47f7 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 15 Dec 2022 16:34:41 -0800 Subject: [PATCH 07/10] Update sagemaker_mces_golden.json --- .../tests/unit/sagemaker/sagemaker_mces_golden.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json index f429bbf987d577..ceda91359b8d04 100644 --- a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json +++ b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json @@ -1720,7 +1720,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker" + "/sagemaker/a-model-package-group" ] } } From 51d85fd0b09f2a67f27d92b5801bb4216be7a0b9 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 15 Dec 2022 16:36:19 -0800 Subject: [PATCH 08/10] Update models.py --- .../ingestion/source/aws/sagemaker_processors/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py index 304d159d911476..8be714933092fa 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py @@ -438,12 +438,12 @@ def get_model_wu( ] model_browsepaths = [ - f"/sagemaker/{x}/{model_details['ModelName']}" for x in model_group_names + f"/sagemaker/{x}" for x in model_group_names ] # if model is not in any groups, set a single browsepath with the model as the first entity if not model_browsepaths: - model_browsepaths.append(f"/sagemaker/{model_details['ModelName']}") + model_browsepaths.append(f"/sagemaker") model_snapshot = MLModelSnapshot( urn=builder.make_ml_model_urn( From a5dc0c266f44ba58202d86f646993ada060ecd0e Mon Sep 17 00:00:00 2001 From: John Joyce Date: Mon, 19 Dec 2022 07:51:59 -0800 Subject: [PATCH 09/10] addressing lint --- .../ingestion/source/aws/sagemaker_processors/models.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py index 8be714933092fa..6588d8fdd94308 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py @@ -437,9 +437,7 @@ def get_model_wu( for x in model_group_names ] - model_browsepaths = [ - f"/sagemaker/{x}" for x in model_group_names - ] + model_browsepaths = [f"/sagemaker/{x}" for x in model_group_names] # if model is not in any groups, set a single browsepath with the model as the first entity if not model_browsepaths: From de0677349b3485eed63e70735271c3cbda4e2afa Mon Sep 17 00:00:00 2001 From: John Joyce Date: Tue, 20 Dec 2022 07:56:32 -0800 Subject: [PATCH 10/10] Update models.py --- .../datahub/ingestion/source/aws/sagemaker_processors/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py index 9950f0cbe71b97..d0bbedac14b451 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py @@ -436,7 +436,7 @@ def get_model_wu( # if model is not in any groups, set a single browsepath with the model as the first entity if not model_browsepaths: - model_browsepaths.append(f"/sagemaker") + model_browsepaths.append("/sagemaker") model_snapshot = MLModelSnapshot( urn=builder.make_ml_model_urn(