-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(models) : Joins (Datasets) Schema #7961
Changes from 1 commit
da18715
bdf2bc6
eeba4cb
89e8782
0c22848
35ea6ca
5ff65c5
03319c1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package com.linkedin.common.urn; | ||
|
||
import com.linkedin.data.template.Custom; | ||
import com.linkedin.data.template.DirectCoercer; | ||
import com.linkedin.data.template.TemplateOutputCastException; | ||
import java.net.URISyntaxException; | ||
|
||
|
||
public class JoinUrn extends Urn { | ||
public static final String ENTITY_TYPE = "join"; | ||
|
||
private final String _joinId; | ||
|
||
public JoinUrn(String name) { | ||
super(ENTITY_TYPE, TupleKey.create(name)); | ||
this._joinId = name; | ||
} | ||
|
||
public String getName() { | ||
return _joinId; | ||
} | ||
|
||
public static JoinUrn createFromString(String rawUrn) throws URISyntaxException { | ||
Check warning Code scanning / QDJVMC Method tries to override 'static' method of superclass
Method 'createFromString()' tries to override a static method of a superclass
|
||
return createFromUrn(Urn.createFromString(rawUrn)); | ||
} | ||
|
||
public static JoinUrn createFromUrn(Urn urn) throws URISyntaxException { | ||
if (!"li".equals(urn.getNamespace())) { | ||
throw new URISyntaxException(urn.toString(), "Urn namespace type should be 'li'."); | ||
} else if (!ENTITY_TYPE.equals(urn.getEntityType())) { | ||
throw new URISyntaxException(urn.toString(), "Urn entity type should be 'join'."); | ||
} else { | ||
TupleKey key = urn.getEntityKey(); | ||
if (key.size() != 1) { | ||
throw new URISyntaxException(urn.toString(), "Invalid number of keys."); | ||
} else { | ||
try { | ||
return new JoinUrn((String) key.getAs(0, String.class)); | ||
} catch (Exception var3) { | ||
throw new URISyntaxException(urn.toString(), "Invalid URN Parameter: '" + var3.getMessage()); | ||
} | ||
} | ||
} | ||
} | ||
|
||
public static JoinUrn deserialize(String rawUrn) throws URISyntaxException { | ||
return createFromString(rawUrn); | ||
} | ||
|
||
static { | ||
Custom.initializeCustomClass(JoinUrn.class); | ||
Custom.registerCoercer(new DirectCoercer<JoinUrn>() { | ||
public Object coerceInput(JoinUrn object) throws ClassCastException { | ||
return object.toString(); | ||
} | ||
|
||
public JoinUrn coerceOutput(Object object) throws TemplateOutputCastException { | ||
try { | ||
return JoinUrn.createFromString((String) object); | ||
} catch (URISyntaxException e) { | ||
throw new TemplateOutputCastException("Invalid URN syntax: " + e.getMessage(), e); | ||
} | ||
} | ||
}, JoinUrn.class); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
namespace com.linkedin.common | ||
|
||
/** | ||
* Standardized join identifier. | ||
*/ | ||
@java.class = "com.linkedin.common.urn.JoinUrn" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't seem necessary to me. Can you explain the rationale here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please see above comment. |
||
@validate.`com.linkedin.common.validator.TypedUrnValidator` = { | ||
"accessible" : true, | ||
"owningTeam" : "urn:li:internalTeam:datahub", | ||
"entityType" : "join", | ||
"constructable" : true, | ||
"namespace" : "li", | ||
"name" : "Join", | ||
"doc" : "Standardized Join identifier.", | ||
"owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], | ||
"fields" : [ { | ||
"name" : "joinId", | ||
"doc" : "Join native name e.g. <db>.<table>, /dir/subdir/<name>, or <name>", | ||
"type" : "string", | ||
"maxLength" : 284 | ||
}], | ||
"maxLength" : 284 | ||
} | ||
typeref JoinUrn = string |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
namespace com.linkedin.join | ||
|
||
import com.linkedin.common.ChangeAuditStamps | ||
|
||
|
||
/** | ||
* EditableJoinProperties stores editable changes made to join properties. This separates changes made from | ||
* ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines | ||
*/ | ||
@Aspect = { | ||
"name": "editableJoinProperties" | ||
} | ||
record EditableJoinProperties includes ChangeAuditStamps { | ||
/** | ||
* Documentation of the join | ||
*/ | ||
@Searchable = { | ||
"fieldType": "TEXT", | ||
"fieldName": "editedDescription", | ||
} | ||
description: optional string | ||
|
||
/** | ||
* Display name of the Join | ||
*/ | ||
@Searchable = { | ||
"fieldType": "TEXT_PARTIAL", | ||
"fieldName": "editedName", | ||
} | ||
name: optional string | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
namespace com.linkedin.join | ||
|
||
import com.linkedin.dataset.SchemaFieldPath | ||
|
||
/** | ||
* Field Mapping of 1:1 field | ||
*/ | ||
record FieldMap { | ||
/** | ||
* All fields from dataset A that are required for the join, maps to bFields 1:1 | ||
*/ | ||
afield: SchemaFieldPath | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we call this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dataset A and Dataset B are symmetric. You can look at this Join from either dataset. If you look from Dataset A's perspective, your names left and right are ok. Where as if you look from Dataset B's perspective, the left and right are not correct. |
||
|
||
/** | ||
* All fields from dataset B that are required for the join, maps to aFields 1:1 | ||
*/ | ||
bfield: SchemaFieldPath | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
namespace com.linkedin.join | ||
|
||
import com.linkedin.dataset.SchemaFieldPath | ||
|
||
/** | ||
* Field Mapping about a join between two datasets | ||
*/ | ||
record JoinFieldMapping { | ||
/** | ||
* All fields from dataset A that are required for the join to dataset B | ||
*/ | ||
fieldMapping: array[FieldMap] | ||
|
||
/** | ||
* Any transformation logic or notes pertaining to this specific join | ||
*/ | ||
details: string | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
namespace com.linkedin.join | ||
|
||
import com.linkedin.common.TimeStamp | ||
import com.linkedin.common.DatasetUrn | ||
import com.linkedin.common.CustomProperties | ||
|
||
/** | ||
* Properties associated with a Join | ||
*/ | ||
@Aspect = { | ||
"name": "joinProperties" | ||
} | ||
record JoinProperties includes CustomProperties { | ||
|
||
/** | ||
* Display name of the Join | ||
*/ | ||
@Searchable = { | ||
"fieldType": "TEXT_PARTIAL", | ||
"enableAutocomplete": true, | ||
"boostScore": 10.0 | ||
} | ||
name: optional string | ||
|
||
/** | ||
* First dataset in the join (no directionality) | ||
*/ | ||
@Relationship = { | ||
"name": "joinA", | ||
"entityTypes": [ "dataset" ] | ||
} | ||
datasetA: DatasetUrn | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please see comment above. |
||
|
||
/** | ||
* Second dataset in the join (no directionality) | ||
*/ | ||
@Relationship = { | ||
"name": "joinB", | ||
"entityTypes": [ "dataset" ] | ||
} | ||
datasetB: DatasetUrn | ||
|
||
/** | ||
* Array of JoinFieldMapping | ||
*/ | ||
joinFieldMappings: array[JoinFieldMapping] | ||
|
||
/** | ||
* A timestamp documenting when the asset was created in the source Data Platform (not on DataHub) | ||
*/ | ||
@Searchable = { | ||
"/time": { | ||
"fieldName": "createdAt", | ||
"fieldType": "DATETIME" | ||
} | ||
} | ||
created: optional TimeStamp | ||
|
||
/** | ||
* A timestamp documenting when the asset was last modified in the source Data Platform (not on DataHub) | ||
*/ | ||
@Searchable = { | ||
"/time": { | ||
"fieldName": "lastModifiedAt", | ||
"fieldType": "DATETIME" | ||
} | ||
} | ||
lastModified: optional TimeStamp | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
namespace com.linkedin.join | ||
import com.linkedin.common.JoinUrn | ||
|
||
/** | ||
* Joins information of an entity. | ||
*/ | ||
@Aspect = { | ||
"name": "joins" | ||
} | ||
record Joins { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it doesn't look like you've added this aspect to any entities in entity-registry.yml - did you mean to add this to datasets? or what were your thoughts there? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right, it is not used. In my initial design, just like Ownership, I wanted to add Join aspect to the Dataset, but our team members suggested that we can use relationships annotations in the pdl. So, I decided not to use Joins.pdl. I will remove it. |
||
/** | ||
* Join | ||
*/ | ||
joins: array[JoinUrn] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
namespace com.linkedin.metadata.aspect | ||
|
||
import com.linkedin.metadata.key.JoinKey | ||
import com.linkedin.join.JoinProperties | ||
import com.linkedin.join.EditableJoinProperties | ||
|
||
import com.linkedin.common.InstitutionalMemory | ||
import com.linkedin.common.Ownership | ||
import com.linkedin.common.Status | ||
import com.linkedin.container.Container | ||
import com.linkedin.common.GlobalTags | ||
import com.linkedin.common.GlossaryTerms | ||
import com.linkedin.common.BrowsePaths | ||
|
||
|
||
/** | ||
* A union of all supported metadata aspects for a Join | ||
*/ | ||
typeref JoinAspect = union[ | ||
JoinKey, | ||
JoinProperties, | ||
EditableJoinProperties, | ||
InstitutionalMemory, | ||
Ownership, | ||
Status, | ||
Container, | ||
GlobalTags, | ||
GlossaryTerms, | ||
BrowsePaths | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
namespace com.linkedin.metadata.key | ||
|
||
/** | ||
* Key for a Join | ||
*/ | ||
@Aspect = { | ||
"name": "joinKey" | ||
} | ||
record JoinKey { | ||
/* | ||
* Unique guid for Join | ||
*/ | ||
@Searchable = { | ||
"fieldType": "TEXT", | ||
} | ||
joinId: string | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
namespace com.linkedin.metadata.snapshot | ||
|
||
import com.linkedin.common.JoinUrn | ||
import com.linkedin.metadata.aspect.JoinAspect | ||
|
||
/** | ||
* A metadata snapshot for a specific join entity. | ||
*/ | ||
@Entity = { | ||
"name": "join", | ||
"keyAspect": "joinKey" | ||
} | ||
record JoinSnapshot { | ||
|
||
/** | ||
* URN for the entity the metadata snapshot is associated with. | ||
*/ | ||
urn: JoinUrn | ||
|
||
/** | ||
* The list of metadata aspects associated with the Join. Depending on the use case, this can either be all, or a selection, of supported aspects. | ||
*/ | ||
aspects: array[JoinAspect] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm a little confused, why is this class needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Join will be a entity by itself.
This was proposed in the model:
https://docs.google.com/document/d/1JPtHbNEY-_E9ZwsCsFNPks35g1WVyWQEFd_K0fLF0wk/edit#heading=h.1t4gdnvyeais
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So that is totally fine, but why is there a JoinUrn? I don't see an Urn like this for any other EntityType.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can check this directory:
https://github.com/datahub-project/datahub/tree/master/li-utils/src/main/javaPegasus/com/linkedin/common/urn