From 4ed199679160e087511d3c70df0573a706a2f826 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Wed, 21 Dec 2022 08:02:59 -0800 Subject: [PATCH] refactor(ingestion): Browse Paths Upgrade V2 Feast & Sagemaker (#6002) --- .../sagemaker_processors/feature_groups.py | 2 +- .../source/aws/sagemaker_processors/jobs.py | 2 +- .../source/aws/sagemaker_processors/models.py | 8 ++--- .../src/datahub/ingestion/source/feast.py | 10 ++----- .../feast/feast_repository_mces_golden.json | 4 +-- .../unit/sagemaker/sagemaker_mces_golden.json | 30 +++++++++---------- 6 files changed, 24 insertions(+), 32 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py index ffaa8a5e978624..020b390e9fefd8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py @@ -91,7 +91,7 @@ def get_feature_group_wu( feature_group_snapshot = MLFeatureTableSnapshot( urn=builder.make_ml_feature_table_urn("sagemaker", feature_group_name), aspects=[ - BrowsePathsClass(paths=[f"/sagemaker/{feature_group_name}"]), + BrowsePathsClass(paths=["/sagemaker"]), ], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py index a42e1950e49657..80255d9e0267ea 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py @@ -406,7 +406,7 @@ def create_common_job_snapshot( "jobType": job_type.value, }, ), - BrowsePathsClass(paths=[f"/{job_type.value}/{name}"]), + BrowsePathsClass(paths=[f"/{job_type.value}"]), ], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py index 78a3727740b6f9..d0bbedac14b451 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py @@ -284,7 +284,7 @@ def get_group_wu( }, ), OwnershipClass(owners), - BrowsePathsClass(paths=[f"/sagemaker/{group_name}"]), + BrowsePathsClass(paths=["/sagemaker"]), ], ) @@ -432,13 +432,11 @@ def get_model_wu( for x in model_group_names ] - model_browsepaths = [ - f"/sagemaker/{x}/{model_details['ModelName']}" for x in model_group_names - ] + model_browsepaths = [f"/sagemaker/{x}" for x in model_group_names] # if model is not in any groups, set a single browsepath with the model as the first entity if not model_browsepaths: - model_browsepaths.append(f"/sagemaker/{model_details['ModelName']}") + model_browsepaths.append("/sagemaker") model_snapshot = MLModelSnapshot( urn=builder.make_ml_model_urn( diff --git a/metadata-ingestion/src/datahub/ingestion/source/feast.py b/metadata-ingestion/src/datahub/ingestion/source/feast.py index db3b8b94715724..b5cf10c53b3867 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/feast.py +++ b/metadata-ingestion/src/datahub/ingestion/source/feast.py @@ -301,9 +301,7 @@ def _get_feature_view_workunit(self, feature_view: FeatureView) -> MetadataWorkU feature_view_snapshot = MLFeatureTableSnapshot( urn=builder.make_ml_feature_table_urn("feast", feature_view_name), aspects=[ - BrowsePathsClass( - paths=[f"/feast/{self.feature_store.project}/{feature_view_name}"] - ), + BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]), StatusClass(removed=False), ], ) @@ -342,11 +340,7 @@ def _get_on_demand_feature_view_workunit( on_demand_feature_view_snapshot = MLFeatureTableSnapshot( urn=builder.make_ml_feature_table_urn("feast", on_demand_feature_view_name), aspects=[ - BrowsePathsClass( - paths=[ - f"/feast/{self.feature_store.project}/{on_demand_feature_view_name}" - ] - ), + BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]), StatusClass(removed=False), ], ) diff --git a/metadata-ingestion/tests/integration/feast/feast_repository_mces_golden.json b/metadata-ingestion/tests/integration/feast/feast_repository_mces_golden.json index 860d4e47693a1f..d69ecda7473ea4 100644 --- a/metadata-ingestion/tests/integration/feast/feast_repository_mces_golden.json +++ b/metadata-ingestion/tests/integration/feast/feast_repository_mces_golden.json @@ -173,7 +173,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/feast/feature_store/feature_store.driver_hourly_stats" + "/feast/feature_store" ] } }, @@ -286,7 +286,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/feast/feature_store/feature_store.transformed_conv_rate" + "/feast/feature_store" ] } }, diff --git a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json index 8c5da056cc2d94..ceda91359b8d04 100644 --- a/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json +++ b/metadata-ingestion/tests/unit/sagemaker/sagemaker_mces_golden.json @@ -77,7 +77,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/test-2" + "/sagemaker" ] } }, @@ -193,7 +193,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/test-1" + "/sagemaker" ] } }, @@ -290,7 +290,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/test" + "/sagemaker" ] } }, @@ -904,7 +904,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/auto_ml/an-auto-ml-job" + "/auto_ml" ] } }, @@ -986,7 +986,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/compilation/a-compilation-job" + "/compilation" ] } }, @@ -1069,7 +1069,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/edge_packaging/an-edge-packaging-job" + "/edge_packaging" ] } }, @@ -1149,7 +1149,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/hyper_parameter_tuning/a-hyper-parameter-tuning-job" + "/hyper_parameter_tuning" ] } }, @@ -1229,7 +1229,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/labeling/a-labeling-job" + "/labeling" ] } }, @@ -1318,7 +1318,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/processing/a-processing-job" + "/processing" ] } }, @@ -1425,7 +1425,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/training/a-training-job" + "/training" ] } }, @@ -1517,7 +1517,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/transform/a-transform-job" + "/transform" ] } }, @@ -1641,7 +1641,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/a-model-package-group" + "/sagemaker" ] } } @@ -1720,7 +1720,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/a-model-package-group/the-first-model" + "/sagemaker/a-model-package-group" ] } } @@ -1802,7 +1802,7 @@ { "com.linkedin.pegasus2avro.common.BrowsePaths": { "paths": [ - "/sagemaker/a-model-package-group/the-second-model" + "/sagemaker/a-model-package-group" ] } } @@ -1812,4 +1812,4 @@ "proposedDelta": null, "systemMetadata": null } -] \ No newline at end of file +]