-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(lineage) Implement CLL impact analysis for inputFields #6426
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,11 @@ | |
|
||
import com.fasterxml.jackson.core.JsonProcessingException; | ||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.linkedin.common.InputField; | ||
import com.linkedin.common.InputFields; | ||
import com.linkedin.common.Status; | ||
import com.linkedin.common.urn.Urn; | ||
import com.linkedin.common.urn.UrnUtils; | ||
import com.linkedin.data.template.RecordTemplate; | ||
import com.linkedin.dataset.FineGrainedLineage; | ||
import com.linkedin.dataset.UpstreamLineage; | ||
|
@@ -180,15 +183,45 @@ private void updateFineGrainedEdgesAndRelationships( | |
} | ||
} | ||
|
||
private Urn generateSchemaFieldUrn(@Nonnull String resourceUrn, @Nonnull String fieldPath) { | ||
// we rely on schemaField fieldPaths to be encoded since we do that with fineGrainedLineage on the ingestion side | ||
String encodedFieldPath = fieldPath.replaceAll("\\(", "%28").replaceAll("\\)", "%29").replaceAll(",", "%2C"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: always good to make any fields that aren't going to change final There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also for any function parameters! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sounds good! |
||
String urnString = String.format("urn:li:schemaField:(%s,%s)", resourceUrn, encodedFieldPath); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of hardcoding There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good call and in fact i'll do you one better and use |
||
return UrnUtils.getUrn(urnString); | ||
} | ||
|
||
private void updateInputFieldEdgesAndRelationships( | ||
Urn urn, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit - can any of these arguments be null? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right, adding this nonnull annotations now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
RecordTemplate aspect, | ||
List<Edge> edgesToAdd, | ||
HashMap<Urn, Set<String>> urnToRelationshipTypesBeingAdded | ||
) { | ||
InputFields inputFields = new InputFields(aspect.data()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider passing InputFields as a parameter type, instead of RecordTemplate |
||
if (inputFields.hasFields()) { | ||
for (InputField field : inputFields.getFields()) { | ||
if (field.hasSchemaFieldUrn() && field.hasSchemaField() && field.getSchemaField().hasFieldPath()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't this already have a schemaFieldUrn in this case? Why cannot we use this URN? Is it not encoded? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is part of the confusing modeling thing here - There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. got it ty |
||
Urn sourceFieldUrn = generateSchemaFieldUrn(urn.toString(), field.getSchemaField().getFieldPath()); | ||
edgesToAdd.add(new Edge(sourceFieldUrn, field.getSchemaFieldUrn(), DOWNSTREAM_OF)); | ||
Set<String> relationshipTypes = urnToRelationshipTypesBeingAdded.getOrDefault(sourceFieldUrn, new HashSet<>()); | ||
relationshipTypes.add(DOWNSTREAM_OF); | ||
urnToRelationshipTypesBeingAdded.put(sourceFieldUrn, relationshipTypes); | ||
} | ||
} | ||
} | ||
} | ||
|
||
private Pair<List<Edge>, HashMap<Urn, Set<String>>> getEdgesAndRelationshipTypesFromAspect(Urn urn, AspectSpec aspectSpec, RecordTemplate aspect) { | ||
final List<Edge> edgesToAdd = new ArrayList<>(); | ||
final HashMap<Urn, Set<String>> urnToRelationshipTypesBeingAdded = new HashMap<>(); | ||
|
||
// we need to manually set schemaField <-> schemaField edges for fineGrainedLineage and inputFields | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor: Ideally this domain-specific schema field logic does not reside inside a much more generic UpdateIndicesHook. There should be some abstraction for encapsulating such special case logic and a way to register this logic with the index updater. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (For a future refactor) The idea of UpdateIndicesHook is to be completely agnostic of domain-specific logic that exists There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah this all makes sense to me and is good to call out |
||
// since @Relationship only links between the parent entity urn and something else. | ||
if (aspectSpec.getName().equals(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) { | ||
// we need to manually set schemaField <-> schemaField edges for fineGrainedLineage since | ||
// @Relationship only links between the parent entity urn and something else. | ||
updateFineGrainedEdgesAndRelationships(aspect, edgesToAdd, urnToRelationshipTypesBeingAdded); | ||
} | ||
if (aspectSpec.getName().equals(Constants.INPUT_FIELDS_ASPECT_NAME)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Qq - Do we have unit tests for this class? If not, we absolutely need to backfill since it's so important There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we do not have anything for this class... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we add a TODO to do this on this file? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah I'm about to push up the beginning of a test file for this, but will add a TODO at the top of the class to backfill the rest of the functionality! |
||
updateInputFieldEdgesAndRelationships(urn, aspect, edgesToAdd, urnToRelationshipTypesBeingAdded); | ||
} | ||
|
||
Map<RelationshipFieldSpec, List<Object>> extractedFields = | ||
FieldExtractor.extractFields(aspect, aspectSpec.getRelationshipFieldSpecs()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice - thanks for the explanation