Skip to content

Commit

Permalink
fix(removed): Make sure removed entities do not appear on recommendat…
Browse files Browse the repository at this point in the history
…ions (#4353)
  • Loading branch information
Dexter Lee authored Mar 11, 2022
1 parent 3ea7286 commit fab9c23
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ private List<AnalyticsChart> getProductAnalyticsCharts(Authentication authentica

final List<NamedLine> wauTimeseries =
_analyticsService.getTimeseriesChart(_analyticsService.getUsageIndexName(), twoMonthsDateRange, weeklyInterval,
Optional.empty(), ImmutableMap.of(), Optional.of("browserId"));
Optional.empty(), ImmutableMap.of(), Collections.emptyMap(), Optional.of("browserId"));
charts.add(TimeSeriesChart.builder()
.setTitle(wauTitle)
.setDateRange(twoMonthsDateRange)
Expand All @@ -90,7 +90,8 @@ private List<AnalyticsChart> getProductAnalyticsCharts(Authentication authentica

final List<NamedLine> searchesTimeseries =
_analyticsService.getTimeseriesChart(_analyticsService.getUsageIndexName(), lastWeekDateRange, dailyInterval,
Optional.empty(), ImmutableMap.of("type", ImmutableList.of(searchEventType)), Optional.empty());
Optional.empty(), ImmutableMap.of("type", ImmutableList.of(searchEventType)), Collections.emptyMap(),
Optional.empty());
charts.add(TimeSeriesChart.builder()
.setTitle(searchesTitle)
.setDateRange(lastWeekDateRange)
Expand All @@ -104,24 +105,26 @@ private List<AnalyticsChart> getProductAnalyticsCharts(Authentication authentica

final List<Row> topSearchQueries =
_analyticsService.getTopNTableChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange),
"query.keyword", ImmutableMap.of("type", ImmutableList.of(searchEventType)), Optional.empty(), 10,
AnalyticsUtil::buildCellWithSearchLandingPage);
"query.keyword", ImmutableMap.of("type", ImmutableList.of(searchEventType)), Collections.emptyMap(),
Optional.empty(), 10, AnalyticsUtil::buildCellWithSearchLandingPage);
charts.add(TableChart.builder().setTitle(topSearchTitle).setColumns(columns).setRows(topSearchQueries).build());

// Chart 4: Bar Graph Chart
final String sectionViewsTitle = "Section Views across Entity Types";
final List<NamedBar> sectionViewsPerEntityType =
_analyticsService.getBarChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange),
ImmutableList.of("entityType.keyword", "section.keyword"),
ImmutableMap.of("type", ImmutableList.of("EntitySectionViewEvent")), Optional.empty(), true);
ImmutableMap.of("type", ImmutableList.of("EntitySectionViewEvent")), Collections.emptyMap(),
Optional.empty(), true);
charts.add(BarChart.builder().setTitle(sectionViewsTitle).setBars(sectionViewsPerEntityType).build());

// Chart 5: Bar Graph Chart
final String actionsByTypeTitle = "Actions by Entity Type";
final List<NamedBar> eventsByEventType =
_analyticsService.getBarChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange),
ImmutableList.of("entityType.keyword", "actionType.keyword"),
ImmutableMap.of("type", ImmutableList.of("EntityActionEvent")), Optional.empty(), true);
ImmutableMap.of("type", ImmutableList.of("EntityActionEvent")), Collections.emptyMap(), Optional.empty(),
true);
charts.add(BarChart.builder().setTitle(actionsByTypeTitle).setBars(eventsByEventType).build());

// Chart 6: Table Chart
Expand All @@ -131,7 +134,7 @@ private List<AnalyticsChart> getProductAnalyticsCharts(Authentication authentica
final List<Row> topViewedDatasets =
_analyticsService.getTopNTableChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange),
"entityUrn.keyword", ImmutableMap.of("type", ImmutableList.of("EntityViewEvent"), "entityType.keyword",
ImmutableList.of(EntityType.DATASET.name())), Optional.empty(), 10,
ImmutableList.of(EntityType.DATASET.name())), Collections.emptyMap(), Optional.empty(), 10,
AnalyticsUtil::buildCellWithEntityLandingPage);
AnalyticsUtil.hydrateDisplayNameForTable(_entityClient, topViewedDatasets, Constants.DATASET_ENTITY_NAME,
ImmutableSet.of(Constants.DATASET_KEY_ASPECT_NAME), AnalyticsUtil::getDatasetName, authentication);
Expand All @@ -145,7 +148,8 @@ private List<AnalyticsChart> getGlobalMetadataAnalyticsCharts(Authentication aut
// Chart 1: Entities per domain
final List<NamedBar> entitiesPerDomain =
_analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(),
ImmutableList.of("domains.keyword", "platform.keyword"), Collections.emptyMap(), Optional.empty(), false);
ImmutableList.of("domains.keyword", "platform.keyword"), Collections.emptyMap(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty(), false);
AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, entitiesPerDomain, Constants.DOMAIN_ENTITY_NAME,
ImmutableSet.of(Constants.DOMAIN_PROPERTIES_ASPECT_NAME), AnalyticsUtil::getDomainName, authentication);
AnalyticsUtil.hydrateDisplayNameForSegments(_entityClient, entitiesPerDomain, Constants.DATA_PLATFORM_ENTITY_NAME,
Expand All @@ -157,7 +161,8 @@ private List<AnalyticsChart> getGlobalMetadataAnalyticsCharts(Authentication aut
// Chart 2: Entities per platform
final List<NamedBar> entitiesPerPlatform =
_analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(),
ImmutableList.of("platform.keyword"), Collections.emptyMap(), Optional.empty(), false);
ImmutableList.of("platform.keyword"), Collections.emptyMap(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty(), false);
AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, entitiesPerPlatform, Constants.DATA_PLATFORM_ENTITY_NAME,
ImmutableSet.of(Constants.DATA_PLATFORM_INFO_ASPECT_NAME), AnalyticsUtil::getPlatformName, authentication);
if (!entitiesPerPlatform.isEmpty()) {
Expand All @@ -167,7 +172,8 @@ private List<AnalyticsChart> getGlobalMetadataAnalyticsCharts(Authentication aut
// Chart 3: Entities per term
final List<NamedBar> entitiesPerTerm =
_analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(),
ImmutableList.of("glossaryTerms.keyword"), Collections.emptyMap(), Optional.empty(), false);
ImmutableList.of("glossaryTerms.keyword"), Collections.emptyMap(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty(), false);
AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, entitiesPerTerm, Constants.GLOSSARY_TERM_ENTITY_NAME,
ImmutableSet.of(Constants.GLOSSARY_TERM_KEY_ASPECT_NAME), AnalyticsUtil::getTermName, authentication);
if (!entitiesPerTerm.isEmpty()) {
Expand All @@ -177,7 +183,8 @@ private List<AnalyticsChart> getGlobalMetadataAnalyticsCharts(Authentication aut
// Chart 4: Entities per fabric type
final List<NamedBar> entitiesPerEnv =
_analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(),
ImmutableList.of("origin.keyword"), Collections.emptyMap(), Optional.empty(), false);
ImmutableList.of("origin.keyword"), Collections.emptyMap(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty(), false);
if (entitiesPerEnv.size() > 1) {
charts.add(BarChart.builder().setTitle("Entities per Environment").setBars(entitiesPerEnv).build());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package com.linkedin.datahub.graphql.analytics.service;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.linkedin.datahub.graphql.generated.BarSegment;
import com.linkedin.datahub.graphql.generated.Cell;
import com.linkedin.datahub.graphql.generated.DateInterval;
Expand Down Expand Up @@ -75,14 +74,14 @@ public String getUsageIndexName() {

public List<NamedLine> getTimeseriesChart(String indexName, DateRange dateRange, DateInterval granularity,
Optional<String> dimension, // Length 1 for now
Map<String, List<String>> filters, Optional<String> uniqueOn) {
Map<String, List<String>> filters, Map<String, List<String>> mustNotFilters, Optional<String> uniqueOn) {

log.debug(
String.format("Invoked getTimeseriesChart with indexName: %s, dateRange: %s, granularity: %s, dimension: %s,",
indexName, dateRange, granularity, dimension) + String.format("filters: %s, uniqueOn: %s", filters,
uniqueOn));

AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), Optional.of(dateRange));
AggregationBuilder filteredAgg = getFilteredAggregation(filters, mustNotFilters, Optional.of(dateRange));

AggregationBuilder dateHistogram = AggregationBuilders.dateHistogram(DATE_HISTOGRAM)
.field("timestamp")
Expand Down Expand Up @@ -128,22 +127,22 @@ private List<NumericDataPoint> extractPointsFromAggregations(Aggregations aggreg

public List<NamedBar> getBarChart(String indexName, Optional<DateRange> dateRange, List<String> dimensions,
// Length 1 or 2
Map<String, List<String>> filters, Optional<String> uniqueOn, boolean showMissing) {
Map<String, List<String>> filters, Map<String, List<String>> mustNotFilters, Optional<String> uniqueOn,
boolean showMissing) {
log.debug(
String.format("Invoked getBarChart with indexName: %s, dateRange: %s, dimensions: %s,", indexName, dateRange,
dimensions) + String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));

assert (dimensions.size() == 1 || dimensions.size() == 2);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), dateRange);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, mustNotFilters, dateRange);

TermsAggregationBuilder termAgg = AggregationBuilders.terms(DIMENSION).field(dimensions.get(0));
if (showMissing) {
termAgg.missing(NA);
}

if (dimensions.size() == 2) {
TermsAggregationBuilder secondTermAgg =
AggregationBuilders.terms(SECOND_DIMENSION).field(dimensions.get(1));
TermsAggregationBuilder secondTermAgg = AggregationBuilders.terms(SECOND_DIMENSION).field(dimensions.get(1));
if (showMissing) {
secondTermAgg.missing(NA);
}
Expand Down Expand Up @@ -194,13 +193,13 @@ public Row buildRow(String groupByValue, Function<String, Cell> groupByValueToCe
}

public List<Row> getTopNTableChart(String indexName, Optional<DateRange> dateRange, String groupBy,
Map<String, List<String>> filters, Optional<String> uniqueOn, int maxRows,
Function<String, Cell> groupByValueToCell) {
Map<String, List<String>> filters, Map<String, List<String>> mustNotFilters, Optional<String> uniqueOn,
int maxRows, Function<String, Cell> groupByValueToCell) {
log.debug(
String.format("Invoked getTopNTableChart with indexName: %s, dateRange: %s, groupBy: %s", indexName, dateRange,
groupBy) + String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));

AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), dateRange);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, mustNotFilters, dateRange);

TermsAggregationBuilder termAgg = AggregationBuilders.terms(DIMENSION).field(groupBy).size(maxRows);
if (uniqueOn.isPresent()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package com.linkedin.metadata.entity;

import com.linkedin.common.Status;
import com.linkedin.common.urn.Urn;
import com.linkedin.entity.EnvelopedAspect;
import lombok.extern.slf4j.Slf4j;


@Slf4j
public class EntityUtils {
private EntityUtils() {
}

/**
* Check if entity is removed (removed=true in Status aspect)
*/
public static boolean checkIfRemoved(EntityService entityService, Urn entityUrn) {
try {
EnvelopedAspect statusAspect =
entityService.getLatestEnvelopedAspect(entityUrn.getEntityType(), entityUrn, "status");
if (statusAspect == null) {
return false;
}
Status status = new Status(statusAspect.getValue().data());
return status.isRemoved();
} catch (Exception e) {
log.error("Error while checking if {} is removed", entityUrn, e);
return false;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
import com.codahale.metrics.Timer;
import com.datahub.util.exception.ESQueryException;
import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.metadata.datahubusage.DataHubUsageEventConstants;
import com.linkedin.metadata.datahubusage.DataHubUsageEventType;
import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.entity.EntityUtils;
import com.linkedin.metadata.recommendation.EntityProfileParams;
import com.linkedin.metadata.recommendation.RecommendationContent;
import com.linkedin.metadata.recommendation.RecommendationParams;
Expand All @@ -16,7 +19,6 @@
import com.linkedin.metadata.utils.metrics.MetricUtils;
import io.opentelemetry.extension.annotations.WithSpan;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
Expand All @@ -41,6 +43,7 @@
public class MostPopularSource implements RecommendationSource {
private final RestHighLevelClient _searchClient;
private final IndexConvention _indexConvention;
private final EntityService _entityService;

private static final String DATAHUB_USAGE_INDEX = "datahub_usage_event";
private static final String ENTITY_AGG_NAME = "entity";
Expand Down Expand Up @@ -87,6 +90,7 @@ public List<RecommendationContent> getRecommendations(@Nonnull Urn userUrn,
.map(bucket -> buildContent(bucket.getKeyAsString()))
.filter(Optional::isPresent)
.map(Optional::get)
.limit(MAX_CONTENT)
.collect(Collectors.toList());
} catch (Exception e) {
log.error("Search query to get most popular entities failed", e);
Expand All @@ -106,7 +110,7 @@ private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) {
// Find the entities with the most views
AggregationBuilder aggregation = AggregationBuilders.terms(ENTITY_AGG_NAME)
.field(DataHubUsageEventConstants.ENTITY_URN + ESUtils.KEYWORD_SUFFIX)
.size(MAX_CONTENT);
.size(MAX_CONTENT * 2);
source.aggregation(aggregation);
source.size(0);

Expand All @@ -116,13 +120,11 @@ private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) {
}

private Optional<RecommendationContent> buildContent(@Nonnull String entityUrn) {
Urn entity;
try {
entity = Urn.createFromString(entityUrn);
} catch (URISyntaxException e) {
log.error("Error decoding entity URN: {}", entityUrn, e);
Urn entity = UrnUtils.getUrn(entityUrn);
if (EntityUtils.checkIfRemoved(_entityService, entity)) {
return Optional.empty();
}

return Optional.of(new RecommendationContent().setEntity(entity)
.setValue(entityUrn)
.setParams(new RecommendationParams().setEntityProfileParams(new EntityProfileParams().setUrn(entity))));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
import com.codahale.metrics.Timer;
import com.datahub.util.exception.ESQueryException;
import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.metadata.datahubusage.DataHubUsageEventConstants;
import com.linkedin.metadata.datahubusage.DataHubUsageEventType;
import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.entity.EntityUtils;
import com.linkedin.metadata.recommendation.EntityProfileParams;
import com.linkedin.metadata.recommendation.RecommendationContent;
import com.linkedin.metadata.recommendation.RecommendationParams;
Expand All @@ -16,7 +19,6 @@
import com.linkedin.metadata.utils.metrics.MetricUtils;
import io.opentelemetry.extension.annotations.WithSpan;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
Expand All @@ -42,6 +44,7 @@
public class RecentlyViewedSource implements RecommendationSource {
private final RestHighLevelClient _searchClient;
private final IndexConvention _indexConvention;
private final EntityService _entityService;

private static final String DATAHUB_USAGE_INDEX = "datahub_usage_event";
private static final String ENTITY_AGG_NAME = "entity";
Expand Down Expand Up @@ -87,7 +90,7 @@ public List<RecommendationContent> getRecommendations(@Nonnull Urn userUrn,
.stream()
.map(bucket -> buildContent(bucket.getKeyAsString()))
.filter(Optional::isPresent)
.map(Optional::get)
.map(Optional::get).limit(MAX_CONTENT)
.collect(Collectors.toList());
} catch (Exception e) {
log.error("Search query to get most recently viewed entities failed", e);
Expand Down Expand Up @@ -122,13 +125,11 @@ private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) {
}

private Optional<RecommendationContent> buildContent(@Nonnull String entityUrn) {
Urn entity;
try {
entity = Urn.createFromString(entityUrn);
} catch (URISyntaxException e) {
log.error("Error decoding entity URN: {}", entityUrn, e);
Urn entity = UrnUtils.getUrn(entityUrn);
if (EntityUtils.checkIfRemoved(_entityService, entity)) {
return Optional.empty();
}

return Optional.of(new RecommendationContent().setEntity(entity)
.setValue(entityUrn)
.setParams(new RecommendationParams().setEntityProfileParams(new EntityProfileParams().setUrn(entity))));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import com.google.common.collect.ImmutableList;
import com.linkedin.gms.factory.recommendation.candidatesource.DomainsCandidateSourceFactory;
import com.linkedin.gms.factory.recommendation.candidatesource.HighUsageCandidateSourceFactory;
import com.linkedin.gms.factory.recommendation.candidatesource.MostPopularCandidateSourceFactory;
import com.linkedin.gms.factory.recommendation.candidatesource.RecentlyViewedCandidateSourceFactory;
import com.linkedin.gms.factory.recommendation.candidatesource.TopPlatformsCandidateSourceFactory;
import com.linkedin.gms.factory.recommendation.candidatesource.TopTagsCandidateSourceFactory;
Expand All @@ -27,7 +27,7 @@

@Configuration
@Import({TopPlatformsCandidateSourceFactory.class, RecentlyViewedCandidateSourceFactory.class,
HighUsageCandidateSourceFactory.class, TopTagsCandidateSourceFactory.class, TopTermsCandidateSourceFactory.class, DomainsCandidateSourceFactory.class})
MostPopularCandidateSourceFactory.class, TopTagsCandidateSourceFactory.class, TopTermsCandidateSourceFactory.class, DomainsCandidateSourceFactory.class})
public class RecommendationServiceFactory {

@Autowired
Expand All @@ -39,7 +39,7 @@ public class RecommendationServiceFactory {
private RecentlyViewedSource recentlyViewedCandidateSource;

@Autowired
@Qualifier("highUsageCandidateSource")
@Qualifier("mostPopularCandidateSource")
private MostPopularSource _mostPopularCandidateSource;

@Autowired
Expand Down
Loading

0 comments on commit fab9c23

Please sign in to comment.