Skip to content

Commit

Permalink
feat(model): add audit timestamps to dataset schemaFields
Browse files Browse the repository at this point in the history
  • Loading branch information
mayurinehate committed May 18, 2022
1 parent bb341f7 commit 7ce8f00
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 25 deletions.
30 changes: 30 additions & 0 deletions metadata-ingestion/examples/library/dataset_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

# Imports for metadata model classes
from datahub.metadata.schema_classes import (
ChangeAuditStampsClass,
AuditStampClass,
ChangeTypeClass,
DateTypeClass,
OtherSchemaClass,
Expand All @@ -25,24 +27,52 @@
version=0, # when the source system has a notion of versioning of schemas, insert this in, otherwise leave as 0
hash="", # when the source system has a notion of unique schemas identified via hash, include a hash, else leave it as empty string
platformSchema=OtherSchemaClass(rawSchema="__insert raw schema here__"),
created=AuditStampClass(time=1640692800000, actor="urn:li:corpuser:ingestion"),
lastModified=AuditStampClass(
time=1640692800000, actor="urn:li:corpuser:ingestion"
),
fields=[
SchemaFieldClass(
fieldPath="address.zipcode",
type=SchemaFieldDataTypeClass(type=StringTypeClass()),
nativeDataType="VARCHAR(50)", # use this to provide the type of the field in the source system's vernacular
description="This is the zipcode of the address. Specified using extended form and limited to addresses in the United States",
lastModified=ChangeAuditStampsClass(
created=AuditStampClass(
time=1640692800000, actor="urn:li:corpuser:ingestion"
),
lastModified=AuditStampClass(
time=1640692800000, actor="urn:li:corpuser:ingestion"
),
),
),
SchemaFieldClass(
fieldPath="address.street",
type=SchemaFieldDataTypeClass(type=StringTypeClass()),
nativeDataType="VARCHAR(100)",
description="Street corresponding to the address",
lastModified=ChangeAuditStampsClass(
created=AuditStampClass(
time=1640692800000, actor="urn:li:corpuser:ingestion"
),
lastModified=AuditStampClass(
time=1640692800000, actor="urn:li:corpuser:ingestion"
),
),
),
SchemaFieldClass(
fieldPath="last_sold_date",
type=SchemaFieldDataTypeClass(type=DateTypeClass()),
nativeDataType="Date",
description="Date of the last sale date for this property",
lastModified=ChangeAuditStampsClass(
created=AuditStampClass(
time=1640692800000, actor="urn:li:corpuser:ingestion"
),
lastModified=AuditStampClass(
time=1640692800000, actor="urn:li:corpuser:ingestion"
),
),
),
],
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ namespace com.linkedin.schema
import com.linkedin.dataset.SchemaFieldPath
import com.linkedin.common.GlobalTags
import com.linkedin.common.GlossaryTerms
import com.linkedin.common.ChangeAuditStamps

/**
* SchemaField to describe metadata related to dataset schema.
Expand Down Expand Up @@ -39,6 +40,11 @@ record SchemaField {
}
description: optional string

/**
* Captures information about who created/last modified/deleted this schema field and when
*/
lastModified: optional ChangeAuditStamps

/**
* Platform independent field type of the field.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -702,8 +702,10 @@
"doc" : "Instance of the data platform (e.g. db instance)",
"optional" : true,
"Searchable" : {
"addToFilters" : true,
"fieldName" : "platformInstance",
"fieldType" : "URN"
"fieldType" : "URN",
"filterNameOverride" : "Platform Instance"
}
} ],
"Aspect" : {
Expand Down Expand Up @@ -804,6 +806,10 @@
}
},
"doc" : "Urn of the applied tag",
"Relationship" : {
"entityTypes" : [ "tag" ],
"name" : "TaggedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "tags",
Expand Down Expand Up @@ -881,6 +887,10 @@
}
},
"doc" : "Urn of the applied glossary term",
"Relationship" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "TermedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "glossaryTerms",
Expand Down Expand Up @@ -2362,6 +2372,11 @@
"fieldName" : "fieldDescriptions",
"fieldType" : "TEXT"
}
}, {
"name" : "lastModified",
"type" : "com.linkedin.common.ChangeAuditStamps",
"doc" : "Captures information about who created/last modified/deleted this schema field and when",
"optional" : true
}, {
"name" : "type",
"type" : {
Expand Down Expand Up @@ -2483,7 +2498,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "FieldTaggedWith"
"name" : "SchemaFieldTaggedWith"
}
},
"Searchable" : {
Expand All @@ -2501,7 +2516,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "FieldWithGlossaryTerm"
"name" : "SchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
Expand Down Expand Up @@ -2670,7 +2685,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "EditableFieldTaggedWith"
"name" : "EditableSchemaFieldTaggedWith"
}
},
"Searchable" : {
Expand All @@ -2688,7 +2703,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "EditableFieldWithGlossaryTerm"
"name" : "EditableSchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -581,8 +581,10 @@
"doc" : "Instance of the data platform (e.g. db instance)",
"optional" : true,
"Searchable" : {
"addToFilters" : true,
"fieldName" : "platformInstance",
"fieldType" : "URN"
"fieldType" : "URN",
"filterNameOverride" : "Platform Instance"
}
} ],
"Aspect" : {
Expand Down Expand Up @@ -785,6 +787,10 @@
}
},
"doc" : "Urn of the applied tag",
"Relationship" : {
"entityTypes" : [ "tag" ],
"name" : "TaggedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "tags",
Expand Down Expand Up @@ -862,6 +868,10 @@
}
},
"doc" : "Urn of the applied glossary term",
"Relationship" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "TermedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "glossaryTerms",
Expand Down Expand Up @@ -2755,6 +2765,11 @@
"fieldName" : "fieldDescriptions",
"fieldType" : "TEXT"
}
}, {
"name" : "lastModified",
"type" : "com.linkedin.common.ChangeAuditStamps",
"doc" : "Captures information about who created/last modified/deleted this schema field and when",
"optional" : true
}, {
"name" : "type",
"type" : {
Expand Down Expand Up @@ -2876,7 +2891,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "FieldTaggedWith"
"name" : "SchemaFieldTaggedWith"
}
},
"Searchable" : {
Expand All @@ -2894,7 +2909,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "FieldWithGlossaryTerm"
"name" : "SchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
Expand Down Expand Up @@ -3063,7 +3078,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "EditableFieldTaggedWith"
"name" : "EditableSchemaFieldTaggedWith"
}
},
"Searchable" : {
Expand All @@ -3081,7 +3096,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "EditableFieldWithGlossaryTerm"
"name" : "EditableSchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
Expand Down Expand Up @@ -4602,12 +4617,15 @@
"type" : "string",
"doc" : "Display name of the Policy",
"Searchable" : {
"fieldType" : "KEYWORD"
"fieldType" : "TEXT_PARTIAL"
}
}, {
"name" : "description",
"type" : "string",
"doc" : "Description of the Policy"
"doc" : "Description of the Policy",
"Searchable" : {
"fieldType" : "TEXT"
}
}, {
"name" : "type",
"type" : "string",
Expand Down Expand Up @@ -5223,7 +5241,7 @@
"fields" : [ {
"name" : "entity",
"type" : "com.linkedin.common.Urn",
"doc" : " Urn of the entity containing a related aspect"
"doc" : " Urn of the entity that is referenced by the aspect."
}, {
"name" : "aspect",
"type" : "string"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,10 @@
"doc" : "Instance of the data platform (e.g. db instance)",
"optional" : true,
"Searchable" : {
"addToFilters" : true,
"fieldName" : "platformInstance",
"fieldType" : "URN"
"fieldType" : "URN",
"filterNameOverride" : "Platform Instance"
}
} ],
"Aspect" : {
Expand Down Expand Up @@ -564,6 +566,10 @@
}
},
"doc" : "Urn of the applied tag",
"Relationship" : {
"entityTypes" : [ "tag" ],
"name" : "TaggedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "tags",
Expand Down Expand Up @@ -641,6 +647,10 @@
}
},
"doc" : "Urn of the applied glossary term",
"Relationship" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "TermedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "glossaryTerms",
Expand Down Expand Up @@ -2109,6 +2119,11 @@
"fieldName" : "fieldDescriptions",
"fieldType" : "TEXT"
}
}, {
"name" : "lastModified",
"type" : "com.linkedin.common.ChangeAuditStamps",
"doc" : "Captures information about who created/last modified/deleted this schema field and when",
"optional" : true
}, {
"name" : "type",
"type" : {
Expand Down Expand Up @@ -2230,7 +2245,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "FieldTaggedWith"
"name" : "SchemaFieldTaggedWith"
}
},
"Searchable" : {
Expand All @@ -2248,7 +2263,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "FieldWithGlossaryTerm"
"name" : "SchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
Expand Down Expand Up @@ -2417,7 +2432,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "EditableFieldTaggedWith"
"name" : "EditableSchemaFieldTaggedWith"
}
},
"Searchable" : {
Expand All @@ -2435,7 +2450,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "EditableFieldWithGlossaryTerm"
"name" : "EditableSchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
Expand Down
Loading

0 comments on commit 7ce8f00

Please sign in to comment.