Skip to content

Commit

Permalink
feat(ingest): SageMaker jobs and models (#2830)
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinhu authored Jul 8, 2021
1 parent a117b63 commit a2106ca
Show file tree
Hide file tree
Showing 33 changed files with 6,312 additions and 2,156 deletions.
Binary file added datahub-web-react/src/images/feastlogo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added datahub-web-react/src/images/sagemakerlogo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -460,8 +460,8 @@
"PIG" : "Pig type is for running Pig jobs.",
"SQL" : "SQL is for running Presto, mysql queries etc"
}
} ],
"doc" : "Datajob type"
}, "string" ],
"doc" : "Datajob type\n**NOTE**: AzkabanJobType is deprecated. Please use strings instead."
}, {
"name" : "flowUrn",
"type" : "com.linkedin.common.DataFlowUrn",
Expand All @@ -471,6 +471,25 @@
"entityTypes" : [ "dataFlow" ],
"name" : "IsPartOf"
}
}, {
"name" : "status",
"type" : {
"type" : "enum",
"name" : "JobStatus",
"doc" : "Job statuses",
"symbols" : [ "STARTING", "IN_PROGRESS", "STOPPING", "STOPPED", "COMPLETED", "FAILED", "UNKNOWN" ],
"symbolDocs" : {
"COMPLETED" : "Jobs with successful completion.",
"FAILED" : "Jobs that have failed.",
"IN_PROGRESS" : "Jobs currently running.",
"STARTING" : "Jobs being initialized.",
"STOPPED" : "Jobs that have stopped.",
"STOPPING" : "Jobs being stopped.",
"UNKNOWN" : "Jobs with unknown status (either unmappable or unavailable)"
}
},
"doc" : "Status of the job",
"optional" : true
} ],
"Aspect" : {
"name" : "dataJobInfo"
Expand Down Expand Up @@ -586,7 +605,7 @@
"doc" : "Editable properties",
"optional" : true
} ]
}, "com.linkedin.datajob.DataJobInfo", "com.linkedin.datajob.DataJobInputOutput", "com.linkedin.datajob.DataJobKey", "com.linkedin.datajob.EditableDataJobProperties", "com.linkedin.datajob.azkaban.AzkabanJobType", {
}, "com.linkedin.datajob.DataJobInfo", "com.linkedin.datajob.DataJobInputOutput", "com.linkedin.datajob.DataJobKey", "com.linkedin.datajob.EditableDataJobProperties", "com.linkedin.datajob.JobStatus", "com.linkedin.datajob.azkaban.AzkabanJobType", {
"type" : "typeref",
"name" : "DataJobAspect",
"namespace" : "com.linkedin.metadata.aspect",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -978,8 +978,8 @@
"PIG" : "Pig type is for running Pig jobs.",
"SQL" : "SQL is for running Presto, mysql queries etc"
}
} ],
"doc" : "Datajob type"
}, "string" ],
"doc" : "Datajob type\n**NOTE**: AzkabanJobType is deprecated. Please use strings instead."
}, {
"name" : "flowUrn",
"type" : "com.linkedin.common.DataFlowUrn",
Expand All @@ -989,6 +989,25 @@
"entityTypes" : [ "dataFlow" ],
"name" : "IsPartOf"
}
}, {
"name" : "status",
"type" : {
"type" : "enum",
"name" : "JobStatus",
"doc" : "Job statuses",
"symbols" : [ "STARTING", "IN_PROGRESS", "STOPPING", "STOPPED", "COMPLETED", "FAILED", "UNKNOWN" ],
"symbolDocs" : {
"COMPLETED" : "Jobs with successful completion.",
"FAILED" : "Jobs that have failed.",
"IN_PROGRESS" : "Jobs currently running.",
"STARTING" : "Jobs being initialized.",
"STOPPED" : "Jobs that have stopped.",
"STOPPING" : "Jobs being stopped.",
"UNKNOWN" : "Jobs with unknown status (either unmappable or unavailable)"
}
},
"doc" : "Status of the job",
"optional" : true
} ],
"Aspect" : {
"name" : "dataJobInfo"
Expand Down Expand Up @@ -1058,7 +1077,7 @@
"Aspect" : {
"name" : "dataJobInputOutput"
}
}, "com.linkedin.datajob.azkaban.AzkabanJobType", {
}, "com.linkedin.datajob.JobStatus", "com.linkedin.datajob.azkaban.AzkabanJobType", {
"type" : "record",
"name" : "DatasetDeprecation",
"namespace" : "com.linkedin.dataset",
Expand Down Expand Up @@ -2392,6 +2411,7 @@
"name" : "MLModelProperties",
"namespace" : "com.linkedin.ml.metadata",
"doc" : "Properties associated with a ML Model",
"include" : [ "com.linkedin.common.CustomProperties" ],
"fields" : [ {
"name" : "description",
"type" : "string",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1253,8 +1253,8 @@
"PIG" : "Pig type is for running Pig jobs.",
"SQL" : "SQL is for running Presto, mysql queries etc"
}
} ],
"doc" : "Datajob type"
}, "string" ],
"doc" : "Datajob type\n**NOTE**: AzkabanJobType is deprecated. Please use strings instead."
}, {
"name" : "flowUrn",
"type" : "com.linkedin.common.DataFlowUrn",
Expand All @@ -1264,6 +1264,25 @@
"entityTypes" : [ "dataFlow" ],
"name" : "IsPartOf"
}
}, {
"name" : "status",
"type" : {
"type" : "enum",
"name" : "JobStatus",
"doc" : "Job statuses",
"symbols" : [ "STARTING", "IN_PROGRESS", "STOPPING", "STOPPED", "COMPLETED", "FAILED", "UNKNOWN" ],
"symbolDocs" : {
"COMPLETED" : "Jobs with successful completion.",
"FAILED" : "Jobs that have failed.",
"IN_PROGRESS" : "Jobs currently running.",
"STARTING" : "Jobs being initialized.",
"STOPPED" : "Jobs that have stopped.",
"STOPPING" : "Jobs being stopped.",
"UNKNOWN" : "Jobs with unknown status (either unmappable or unavailable)"
}
},
"doc" : "Status of the job",
"optional" : true
} ],
"Aspect" : {
"name" : "dataJobInfo"
Expand Down Expand Up @@ -1371,7 +1390,7 @@
"Aspect" : {
"name" : "editableDataJobProperties"
}
}, "com.linkedin.datajob.azkaban.AzkabanJobType", {
}, "com.linkedin.datajob.JobStatus", "com.linkedin.datajob.azkaban.AzkabanJobType", {
"type" : "record",
"name" : "DataPlatformInfo",
"namespace" : "com.linkedin.dataplatform",
Expand Down Expand Up @@ -2792,6 +2811,7 @@
"name" : "MLModelProperties",
"namespace" : "com.linkedin.ml.metadata",
"doc" : "Properties associated with a ML Model",
"include" : [ "com.linkedin.common.CustomProperties" ],
"fields" : [ {
"name" : "description",
"type" : "string",
Expand Down Expand Up @@ -3452,7 +3472,7 @@
"Aspect" : {
"name" : "mlFeatureTableProperties"
}
}, "com.linkedin.common.Ownership", "com.linkedin.common.InstitutionalMemory", "com.linkedin.common.Status", "com.linkedin.common.Deprecation" ]
}, "com.linkedin.common.Ownership", "com.linkedin.common.InstitutionalMemory", "com.linkedin.common.Status", "com.linkedin.common.Deprecation", "com.linkedin.common.BrowsePaths" ]
}
},
"doc" : "The list of metadata aspects associated with the MLFeatureTable. Depending on the use case, this can either be all, or a selection, of supported aspects."
Expand Down
15 changes: 15 additions & 0 deletions gms/api/src/main/snapshot/com.linkedin.ml.mlModels.snapshot.json
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,20 @@
"name" : "cost"
}
}, "com.linkedin.common.CostType", "com.linkedin.common.CostValue", {
"type" : "record",
"name" : "CustomProperties",
"namespace" : "com.linkedin.common",
"doc" : "Misc. properties about an entity.",
"fields" : [ {
"name" : "customProperties",
"type" : {
"type" : "map",
"values" : "string"
},
"doc" : "Custom property bag.",
"default" : { }
} ]
}, {
"type" : "typeref",
"name" : "DataPlatformUrn",
"namespace" : "com.linkedin.common",
Expand Down Expand Up @@ -522,6 +536,7 @@
"name" : "MLModelProperties",
"namespace" : "com.linkedin.ml.metadata",
"doc" : "Properties associated with a ML Model",
"include" : [ "com.linkedin.common.CustomProperties" ],
"fields" : [ {
"name" : "description",
"type" : "string",
Expand Down
12 changes: 12 additions & 0 deletions metadata-ingestion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,18 @@ source:
aws_secret_access_key: # Optional.
aws_session_token: # Optional.
aws_role: # Optional (Role chaining supported by using a sorted list).
extract_feature_groups: True # if feature groups should be ingested, default True
extract_models: True # if models should be ingested, default True
extract_jobs: # if jobs should be ingested, default True for all
auto_ml: True
compilation: True
edge_packaging: True
hyper_parameter_tuning: True
labeling: True
processing: True
training: True
transform: True
```

### Snowflake `snowflake`
Expand Down
Loading

0 comments on commit a2106ca

Please sign in to comment.