Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(lineage) Filter dataset -> dataset lineage edges if data is transformed #4732

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 28 additions & 4 deletions datahub-web-react/src/Mocks.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import { GetGlossaryTermDocument, GetGlossaryTermQuery } from './graphql/glossar
import { GetEntityCountsDocument } from './graphql/app.generated';
import { GetMeDocument } from './graphql/me.generated';
import { ListRecommendationsDocument } from './graphql/recommendations.generated';
import { FetchedEntity } from './app/lineage/types';

const user1 = {
username: 'sdas',
Expand Down Expand Up @@ -115,7 +116,7 @@ const dataPlatform = {
},
};

const dataset1 = {
export const dataset1 = {
urn: 'urn:li:dataset:1',
type: EntityType.Dataset,
platform: {
Expand Down Expand Up @@ -208,7 +209,7 @@ const dataset1 = {
deprecation: null,
};

const dataset2 = {
export const dataset2 = {
urn: 'urn:li:dataset:2',
type: EntityType.Dataset,
platform: {
Expand All @@ -231,6 +232,7 @@ const dataset2 = {
name: 'Some Other Dataset',
description: 'This is some other dataset, so who cares!',
customProperties: [],
origin: 'PROD',
},
editableProperties: null,
created: {
Expand Down Expand Up @@ -1055,8 +1057,8 @@ export const dataJob1 = {
editableProperties: null,
inputOutput: {
__typename: 'DataJobInputOutput',
inputDatasets: [dataset3],
outputDatasets: [dataset3],
inputDatasets: [dataset5],
outputDatasets: [dataset6],
inputDatajobs: [],
},
globalTags: {
Expand Down Expand Up @@ -1298,6 +1300,28 @@ export const mlModel = {
deprecation: null,
} as MlModel;

export const dataset1FetchedEntity = {
urn: dataset1.urn,
name: dataset1.name,
type: dataset1.type,
upstreamChildren: [],
downstreamChildren: [
{ type: EntityType.Dataset, entity: dataset2 },
{ type: EntityType.DataJob, entity: dataJob1 },
],
} as FetchedEntity;

export const dataset2FetchedEntity = {
urn: dataset2.urn,
name: 'test name',
type: dataset2.type,
upstreamChildren: [
{ type: EntityType.Dataset, entity: dataset1 },
{ type: EntityType.DataJob, entity: dataJob1 },
],
downstreamChildren: [],
} as FetchedEntity;

export const mlModelGroup = {
__typename: 'MLModelGroup',
urn: 'urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,another-group,PROD)',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { dataset1, dataset2, dataJob1, dataset1FetchedEntity, dataset2FetchedEntity } from '../../../Mocks';
import { EntityType } from '../../../types.generated';
import { Direction, EntityAndType, FetchedEntity } from '../types';
import { shouldIncludeChildEntity } from '../utils/constructFetchedNode';

describe('shouldIncludeChildEntity', () => {
const parentChildren = [
{ entity: dataset1, type: dataset1.type },
{ entity: dataJob1, type: dataJob1.type },
] as EntityAndType[];

it('should return false if parent and child are datasets and the child has a datajob child that belongs to the parent children', () => {
const shouldBeIncluded = shouldIncludeChildEntity(
Direction.Upstream,
parentChildren,
dataset1FetchedEntity,
dataset2FetchedEntity,
);

expect(shouldBeIncluded).toBe(false);
});

it('should return true if the datajob is not a child of the parent', () => {
const parentChild = [{ entity: dataset1, type: dataset1.type }] as EntityAndType[];
const shouldBeIncluded = shouldIncludeChildEntity(
Direction.Upstream,
parentChild,
dataset1FetchedEntity,
dataset2FetchedEntity,
);

expect(shouldBeIncluded).toBe(true);
});

it('should return true if either parent or child is not a dataset', () => {
const fetchedDatajobEntity = { ...dataset1FetchedEntity, type: EntityType.DataJob };
let shouldBeIncluded = shouldIncludeChildEntity(
Direction.Upstream,
parentChildren,
fetchedDatajobEntity,
dataset2FetchedEntity,
);
expect(shouldBeIncluded).toBe(true);

const fetchedDashboardEntity = { ...dataset2FetchedEntity, type: EntityType.Dashboard };
shouldBeIncluded = shouldIncludeChildEntity(
Direction.Upstream,
parentChildren,
dataset1FetchedEntity,
fetchedDashboardEntity,
);
expect(shouldBeIncluded).toBe(true);
});

it('should return true if the parent has a datajob child that is not a child of the dataset child', () => {
const updatedDataset1FetchedEntity = {
...dataset1FetchedEntity,
downstreamChildren: [{ type: EntityType.Dataset, entity: dataset2 }],
} as FetchedEntity;

const shouldBeIncluded = shouldIncludeChildEntity(
Direction.Upstream,
parentChildren,
updatedDataset1FetchedEntity,
dataset2FetchedEntity,
);

expect(shouldBeIncluded).toBe(true);
});
});
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {
dataJob1,
dataset3,
dataset3WithLineage,
dataset4,
Expand All @@ -7,7 +8,7 @@ import {
dataset5WithLineage,
dataset6WithLineage,
} from '../../../Mocks';
import { EntityType } from '../../../types.generated';
import { EntityType, RelationshipDirection } from '../../../types.generated';
import { getTestEntityRegistry } from '../../../utils/test-utils/TestPageContainer';
import { Direction, FetchedEntities } from '../types';
import constructTree from '../utils/constructTree';
Expand Down Expand Up @@ -299,4 +300,89 @@ describe('constructTree', () => {
],
});
});

it('should not include a Dataset as a child if that Dataset has a Datajob child which points to the parent', () => {
// dataset6 is downstream of dataset5 and datajob1, datajob 1 is downstream of dataset 5
const updatedDataset6WithLineage = {
...dataset6WithLineage,
downstream: null,
upstream: {
start: 0,
count: 2,
total: 2,
relationships: [
{
type: 'DownstreamOf',
direction: RelationshipDirection.Incoming,
entity: dataset5,
},
{
type: 'DownstreamOf',
direction: RelationshipDirection.Incoming,
entity: dataJob1,
},
],
},
};
const updatedDataset5WithLineage = {
...dataset5WithLineage,
downstream: {
...dataset5WithLineage.downstream,
relationships: [
...dataset5WithLineage.downstream.relationships,
{
type: 'DownstreamOf',
direction: RelationshipDirection.Outgoing,
entity: dataJob1,
},
],
},
};
const fetchedEntities = [
{ entity: updatedDataset5WithLineage, direction: Direction.Upstream, fullyFetched: true },
{ entity: dataJob1, direction: Direction.Upstream, fullyFetched: true },
];
const mockFetchedEntities = fetchedEntities.reduce(
(acc, entry) =>
extendAsyncEntities(
acc,
testEntityRegistry,
{ entity: entry.entity, type: entry.entity.type },
entry.fullyFetched,
),
{} as FetchedEntities,
);
expect(
constructTree(
{ entity: updatedDataset6WithLineage, type: EntityType.Dataset },
mockFetchedEntities,
Direction.Upstream,
testEntityRegistry,
),
).toEqual({
name: 'Display Name of Sixth',
expandedName: 'Fully Qualified Name of Sixth Test Dataset',
urn: 'urn:li:dataset:6',
type: EntityType.Dataset,
unexploredChildren: 0,
icon: undefined,
platform: 'Kafka',
subtype: undefined,
children: [
{
name: 'DataJobInfoName',
expandedName: undefined,
type: EntityType.DataJob,
unexploredChildren: 0,
urn: dataJob1.urn,
children: [],
countercurrentChildrenUrns: [],
icon: '',
status: null,
platform: 'Airflow',
subtype: undefined,
},
],
});
});
});
39 changes: 34 additions & 5 deletions datahub-web-react/src/app/lineage/utils/constructFetchedNode.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,28 @@
import { Direction, FetchedEntities, NodeData } from '../types';
import { EntityType } from '../../../types.generated';
import { Direction, EntityAndType, FetchedEntities, FetchedEntity, NodeData } from '../types';

// If there are nodes A, B, C and A -> B, B -> C, A -> C, where A and C are Datasets and B is a DataJob, we don't want to show edge A -> C
export function shouldIncludeChildEntity(
direction: Direction,
parentChildren?: EntityAndType[],
childEntity?: FetchedEntity | null,
parentEntity?: FetchedEntity,
) {
if (
parentEntity?.type === EntityType.Dataset &&
childEntity?.type === EntityType.Dataset &&
childEntity &&
parentChildren
) {
// we want the children of this child entity in the opposite direction of the parent to see if we connect back to the parent
const childrenKey = direction === Direction.Upstream ? 'downstreamChildren' : 'upstreamChildren';
return !childEntity[childrenKey]?.some(
(child) =>
child.type === EntityType.DataJob && parentChildren.some((c) => c.entity.urn === child.entity.urn),
);
}
return true;
}

export default function constructFetchedNode(
urn: string,
Expand All @@ -18,6 +42,8 @@ export default function constructFetchedNode(
return constructedNodes[urn];
}

const childrenKey = direction === Direction.Upstream ? 'upstreamChildren' : 'downstreamChildren';

if (fetchedNode && !constructedNodes[urn]) {
const node: NodeData = {
name: fetchedNode.name,
Expand All @@ -27,9 +53,7 @@ export default function constructFetchedNode(
subtype: fetchedNode.subtype,
icon: fetchedNode.icon,
unexploredChildren:
fetchedNode?.[direction === Direction.Upstream ? 'upstreamChildren' : 'downstreamChildren']?.filter(
(childUrn) => !(childUrn.entity.urn in fetchedEntities),
).length || 0,
fetchedNode?.[childrenKey]?.filter((childUrn) => !(childUrn.entity.urn in fetchedEntities)).length || 0,
countercurrentChildrenUrns:
fetchedNode?.[direction === Direction.Downstream ? 'upstreamChildren' : 'downstreamChildren']?.map(
(child) => child.entity.urn,
Expand All @@ -43,7 +67,7 @@ export default function constructFetchedNode(
constructedNodes[urn] = node;

node.children =
(fetchedNode?.[direction === Direction.Upstream ? 'upstreamChildren' : 'downstreamChildren']
(fetchedNode?.[childrenKey]
?.map((child) => {
if (child.entity.urn === node.urn) {
return null;
Expand All @@ -56,6 +80,11 @@ export default function constructFetchedNode(
newConstructionPath,
);
})
?.filter((child) => {
const childEntity = fetchedEntities[child?.urn || ''];
const parentChildren = fetchedNode[childrenKey];
return shouldIncludeChildEntity(direction, parentChildren, childEntity, fetchedNode);
})
.filter(Boolean) as Array<NodeData>) || [];

return node;
Expand Down
6 changes: 5 additions & 1 deletion datahub-web-react/src/app/lineage/utils/constructTree.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import EntityRegistry from '../../entity/EntityRegistry';
import { Direction, EntityAndType, FetchedEntities, NodeData } from '../types';
import constructFetchedNode from './constructFetchedNode';
import constructFetchedNode, { shouldIncludeChildEntity } from './constructFetchedNode';

export default function constructTree(
entityAndType: EntityAndType | null | undefined,
Expand Down Expand Up @@ -41,6 +41,10 @@ export default function constructTree(
root.urn || '',
]);
})
?.filter((child) => {
const childEntity = fetchedEntities[child?.urn || ''];
return shouldIncludeChildEntity(direction, children, childEntity, fetchedEntity);
})
?.filter(Boolean) as Array<NodeData>;
return root;
}