From 6a3e7dd177b77ab23cca9564d041dc040db99c4b Mon Sep 17 00:00:00 2001 From: Mayuri N Date: Thu, 19 May 2022 14:52:04 +0530 Subject: [PATCH 1/2] feat(model): add optional created, lastModified auditstamps to SchemaField --- .../examples/library/dataset_schema.py | 16 ++++++++ .../com/linkedin/schema/SchemaField.pdl | 11 ++++++ .../com.linkedin.entity.aspects.snapshot.json | 30 +++++++++++--- ...com.linkedin.entity.entities.snapshot.json | 39 +++++++++++++++---- .../com.linkedin.entity.runs.snapshot.json | 30 +++++++++++--- ...m.linkedin.platform.platform.snapshot.json | 37 ++++++++++++++---- 6 files changed, 138 insertions(+), 25 deletions(-) diff --git a/metadata-ingestion/examples/library/dataset_schema.py b/metadata-ingestion/examples/library/dataset_schema.py index 79c2706c04611..fec39427d4928 100644 --- a/metadata-ingestion/examples/library/dataset_schema.py +++ b/metadata-ingestion/examples/library/dataset_schema.py @@ -5,6 +5,7 @@ # Imports for metadata model classes from datahub.metadata.schema_classes import ( + AuditStampClass, ChangeTypeClass, DateTypeClass, OtherSchemaClass, @@ -25,24 +26,39 @@ version=0, # when the source system has a notion of versioning of schemas, insert this in, otherwise leave as 0 hash="", # when the source system has a notion of unique schemas identified via hash, include a hash, else leave it as empty string platformSchema=OtherSchemaClass(rawSchema="__insert raw schema here__"), + lastModified=AuditStampClass( + time=1640692800000, actor="urn:li:corpuser:ingestion" + ), fields=[ SchemaFieldClass( fieldPath="address.zipcode", type=SchemaFieldDataTypeClass(type=StringTypeClass()), nativeDataType="VARCHAR(50)", # use this to provide the type of the field in the source system's vernacular description="This is the zipcode of the address. Specified using extended form and limited to addresses in the United States", + lastModified=AuditStampClass( + time=1640692800000, actor="urn:li:corpuser:ingestion" + ), ), SchemaFieldClass( fieldPath="address.street", type=SchemaFieldDataTypeClass(type=StringTypeClass()), nativeDataType="VARCHAR(100)", description="Street corresponding to the address", + lastModified=AuditStampClass( + time=1640692800000, actor="urn:li:corpuser:ingestion" + ), ), SchemaFieldClass( fieldPath="last_sold_date", type=SchemaFieldDataTypeClass(type=DateTypeClass()), nativeDataType="Date", description="Date of the last sale date for this property", + created=AuditStampClass( + time=1640692800000, actor="urn:li:corpuser:ingestion" + ), + lastModified=AuditStampClass( + time=1640692800000, actor="urn:li:corpuser:ingestion" + ), ), ], ), diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl index 1afb973c8e9ad..074e74ba05f2a 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/schema/SchemaField.pdl @@ -3,6 +3,7 @@ namespace com.linkedin.schema import com.linkedin.dataset.SchemaFieldPath import com.linkedin.common.GlobalTags import com.linkedin.common.GlossaryTerms +import com.linkedin.common.AuditStamp /** * SchemaField to describe metadata related to dataset schema. @@ -39,6 +40,16 @@ record SchemaField { } description: optional string + /** + * An AuditStamp corresponding to the creation of this schema field. + */ + created: optional AuditStamp + + /** + * An AuditStamp corresponding to the last modification of this schema field. + */ + lastModified: optional AuditStamp + /** * Platform independent field type of the field. */ diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 1a96c402ec009..4a839ed4f0cc0 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -702,8 +702,10 @@ "doc" : "Instance of the data platform (e.g. db instance)", "optional" : true, "Searchable" : { + "addToFilters" : true, "fieldName" : "platformInstance", - "fieldType" : "URN" + "fieldType" : "URN", + "filterNameOverride" : "Platform Instance" } } ], "Aspect" : { @@ -804,6 +806,10 @@ } }, "doc" : "Urn of the applied tag", + "Relationship" : { + "entityTypes" : [ "tag" ], + "name" : "TaggedWith" + }, "Searchable" : { "addToFilters" : true, "fieldName" : "tags", @@ -881,6 +887,10 @@ } }, "doc" : "Urn of the applied glossary term", + "Relationship" : { + "entityTypes" : [ "glossaryTerm" ], + "name" : "TermedWith" + }, "Searchable" : { "addToFilters" : true, "fieldName" : "glossaryTerms", @@ -2362,6 +2372,16 @@ "fieldName" : "fieldDescriptions", "fieldType" : "TEXT" } + }, { + "name" : "created", + "type" : "com.linkedin.common.AuditStamp", + "doc" : "An AuditStamp corresponding to the creation of this schema field.", + "optional" : true + }, { + "name" : "lastModified", + "type" : "com.linkedin.common.AuditStamp", + "doc" : "An AuditStamp corresponding to the last modification of this schema field.", + "optional" : true }, { "name" : "type", "type" : { @@ -2483,7 +2503,7 @@ "Relationship" : { "/tags/*/tag" : { "entityTypes" : [ "tag" ], - "name" : "FieldTaggedWith" + "name" : "SchemaFieldTaggedWith" } }, "Searchable" : { @@ -2501,7 +2521,7 @@ "Relationship" : { "/terms/*/urn" : { "entityTypes" : [ "glossaryTerm" ], - "name" : "FieldWithGlossaryTerm" + "name" : "SchemaFieldWithGlossaryTerm" } }, "Searchable" : { @@ -2670,7 +2690,7 @@ "Relationship" : { "/tags/*/tag" : { "entityTypes" : [ "tag" ], - "name" : "EditableFieldTaggedWith" + "name" : "EditableSchemaFieldTaggedWith" } }, "Searchable" : { @@ -2688,7 +2708,7 @@ "Relationship" : { "/terms/*/urn" : { "entityTypes" : [ "glossaryTerm" ], - "name" : "EditableFieldWithGlossaryTerm" + "name" : "EditableSchemaFieldWithGlossaryTerm" } }, "Searchable" : { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 791f74196a3e3..7b78875464a88 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -581,8 +581,10 @@ "doc" : "Instance of the data platform (e.g. db instance)", "optional" : true, "Searchable" : { + "addToFilters" : true, "fieldName" : "platformInstance", - "fieldType" : "URN" + "fieldType" : "URN", + "filterNameOverride" : "Platform Instance" } } ], "Aspect" : { @@ -785,6 +787,10 @@ } }, "doc" : "Urn of the applied tag", + "Relationship" : { + "entityTypes" : [ "tag" ], + "name" : "TaggedWith" + }, "Searchable" : { "addToFilters" : true, "fieldName" : "tags", @@ -862,6 +868,10 @@ } }, "doc" : "Urn of the applied glossary term", + "Relationship" : { + "entityTypes" : [ "glossaryTerm" ], + "name" : "TermedWith" + }, "Searchable" : { "addToFilters" : true, "fieldName" : "glossaryTerms", @@ -2755,6 +2765,16 @@ "fieldName" : "fieldDescriptions", "fieldType" : "TEXT" } + }, { + "name" : "created", + "type" : "com.linkedin.common.AuditStamp", + "doc" : "An AuditStamp corresponding to the creation of this schema field.", + "optional" : true + }, { + "name" : "lastModified", + "type" : "com.linkedin.common.AuditStamp", + "doc" : "An AuditStamp corresponding to the last modification of this schema field.", + "optional" : true }, { "name" : "type", "type" : { @@ -2876,7 +2896,7 @@ "Relationship" : { "/tags/*/tag" : { "entityTypes" : [ "tag" ], - "name" : "FieldTaggedWith" + "name" : "SchemaFieldTaggedWith" } }, "Searchable" : { @@ -2894,7 +2914,7 @@ "Relationship" : { "/terms/*/urn" : { "entityTypes" : [ "glossaryTerm" ], - "name" : "FieldWithGlossaryTerm" + "name" : "SchemaFieldWithGlossaryTerm" } }, "Searchable" : { @@ -3063,7 +3083,7 @@ "Relationship" : { "/tags/*/tag" : { "entityTypes" : [ "tag" ], - "name" : "EditableFieldTaggedWith" + "name" : "EditableSchemaFieldTaggedWith" } }, "Searchable" : { @@ -3081,7 +3101,7 @@ "Relationship" : { "/terms/*/urn" : { "entityTypes" : [ "glossaryTerm" ], - "name" : "EditableFieldWithGlossaryTerm" + "name" : "EditableSchemaFieldWithGlossaryTerm" } }, "Searchable" : { @@ -4602,12 +4622,15 @@ "type" : "string", "doc" : "Display name of the Policy", "Searchable" : { - "fieldType" : "KEYWORD" + "fieldType" : "TEXT_PARTIAL" } }, { "name" : "description", "type" : "string", - "doc" : "Description of the Policy" + "doc" : "Description of the Policy", + "Searchable" : { + "fieldType" : "TEXT" + } }, { "name" : "type", "type" : "string", @@ -5223,7 +5246,7 @@ "fields" : [ { "name" : "entity", "type" : "com.linkedin.common.Urn", - "doc" : " Urn of the entity containing a related aspect" + "doc" : " Urn of the entity that is referenced by the aspect." }, { "name" : "aspect", "type" : "string" diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index 32bb9dfc48140..b39b0889ba19f 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -462,8 +462,10 @@ "doc" : "Instance of the data platform (e.g. db instance)", "optional" : true, "Searchable" : { + "addToFilters" : true, "fieldName" : "platformInstance", - "fieldType" : "URN" + "fieldType" : "URN", + "filterNameOverride" : "Platform Instance" } } ], "Aspect" : { @@ -564,6 +566,10 @@ } }, "doc" : "Urn of the applied tag", + "Relationship" : { + "entityTypes" : [ "tag" ], + "name" : "TaggedWith" + }, "Searchable" : { "addToFilters" : true, "fieldName" : "tags", @@ -641,6 +647,10 @@ } }, "doc" : "Urn of the applied glossary term", + "Relationship" : { + "entityTypes" : [ "glossaryTerm" ], + "name" : "TermedWith" + }, "Searchable" : { "addToFilters" : true, "fieldName" : "glossaryTerms", @@ -2109,6 +2119,16 @@ "fieldName" : "fieldDescriptions", "fieldType" : "TEXT" } + }, { + "name" : "created", + "type" : "com.linkedin.common.AuditStamp", + "doc" : "An AuditStamp corresponding to the creation of this schema field.", + "optional" : true + }, { + "name" : "lastModified", + "type" : "com.linkedin.common.AuditStamp", + "doc" : "An AuditStamp corresponding to the last modification of this schema field.", + "optional" : true }, { "name" : "type", "type" : { @@ -2230,7 +2250,7 @@ "Relationship" : { "/tags/*/tag" : { "entityTypes" : [ "tag" ], - "name" : "FieldTaggedWith" + "name" : "SchemaFieldTaggedWith" } }, "Searchable" : { @@ -2248,7 +2268,7 @@ "Relationship" : { "/terms/*/urn" : { "entityTypes" : [ "glossaryTerm" ], - "name" : "FieldWithGlossaryTerm" + "name" : "SchemaFieldWithGlossaryTerm" } }, "Searchable" : { @@ -2417,7 +2437,7 @@ "Relationship" : { "/tags/*/tag" : { "entityTypes" : [ "tag" ], - "name" : "EditableFieldTaggedWith" + "name" : "EditableSchemaFieldTaggedWith" } }, "Searchable" : { @@ -2435,7 +2455,7 @@ "Relationship" : { "/terms/*/urn" : { "entityTypes" : [ "glossaryTerm" ], - "name" : "EditableFieldWithGlossaryTerm" + "name" : "EditableSchemaFieldWithGlossaryTerm" } }, "Searchable" : { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index 2a5b1a96b2fc1..fd353b4223006 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -581,8 +581,10 @@ "doc" : "Instance of the data platform (e.g. db instance)", "optional" : true, "Searchable" : { + "addToFilters" : true, "fieldName" : "platformInstance", - "fieldType" : "URN" + "fieldType" : "URN", + "filterNameOverride" : "Platform Instance" } } ], "Aspect" : { @@ -785,6 +787,10 @@ } }, "doc" : "Urn of the applied tag", + "Relationship" : { + "entityTypes" : [ "tag" ], + "name" : "TaggedWith" + }, "Searchable" : { "addToFilters" : true, "fieldName" : "tags", @@ -862,6 +868,10 @@ } }, "doc" : "Urn of the applied glossary term", + "Relationship" : { + "entityTypes" : [ "glossaryTerm" ], + "name" : "TermedWith" + }, "Searchable" : { "addToFilters" : true, "fieldName" : "glossaryTerms", @@ -2755,6 +2765,16 @@ "fieldName" : "fieldDescriptions", "fieldType" : "TEXT" } + }, { + "name" : "created", + "type" : "com.linkedin.common.AuditStamp", + "doc" : "An AuditStamp corresponding to the creation of this schema field.", + "optional" : true + }, { + "name" : "lastModified", + "type" : "com.linkedin.common.AuditStamp", + "doc" : "An AuditStamp corresponding to the last modification of this schema field.", + "optional" : true }, { "name" : "type", "type" : { @@ -2876,7 +2896,7 @@ "Relationship" : { "/tags/*/tag" : { "entityTypes" : [ "tag" ], - "name" : "FieldTaggedWith" + "name" : "SchemaFieldTaggedWith" } }, "Searchable" : { @@ -2894,7 +2914,7 @@ "Relationship" : { "/terms/*/urn" : { "entityTypes" : [ "glossaryTerm" ], - "name" : "FieldWithGlossaryTerm" + "name" : "SchemaFieldWithGlossaryTerm" } }, "Searchable" : { @@ -3063,7 +3083,7 @@ "Relationship" : { "/tags/*/tag" : { "entityTypes" : [ "tag" ], - "name" : "EditableFieldTaggedWith" + "name" : "EditableSchemaFieldTaggedWith" } }, "Searchable" : { @@ -3081,7 +3101,7 @@ "Relationship" : { "/terms/*/urn" : { "entityTypes" : [ "glossaryTerm" ], - "name" : "EditableFieldWithGlossaryTerm" + "name" : "EditableSchemaFieldWithGlossaryTerm" } }, "Searchable" : { @@ -4602,12 +4622,15 @@ "type" : "string", "doc" : "Display name of the Policy", "Searchable" : { - "fieldType" : "KEYWORD" + "fieldType" : "TEXT_PARTIAL" } }, { "name" : "description", "type" : "string", - "doc" : "Description of the Policy" + "doc" : "Description of the Policy", + "Searchable" : { + "fieldType" : "TEXT" + } }, { "name" : "type", "type" : "string", From b4deafe60a3947ec0223be721d385c5d81ec9f50 Mon Sep 17 00:00:00 2001 From: Mayuri N Date: Thu, 19 May 2022 18:53:07 +0530 Subject: [PATCH 2/2] feat(model): add ALTER operation type --- datahub-graphql-core/src/main/resources/entity.graphql | 10 ++++++++-- .../main/pegasus/com/linkedin/common/OperationType.pdl | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 92e3eec48a330..ff641fe84ab25 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -5744,14 +5744,20 @@ enum OperationType { DELETE """ - When data is created. + When table is created. """ CREATE """ - When data is dropped + When table is altered + """ + ALTER + + """ + When table is dropped """ DROP + """ Unknown operation """ diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/OperationType.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/OperationType.pdl index 0b2f0bc31677f..13d8d466c8a20 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/OperationType.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/OperationType.pdl @@ -8,6 +8,7 @@ enum OperationType { UPDATE DELETE CREATE + ALTER DROP UNKNOWN } \ No newline at end of file