Skip to content

Commit

Permalink
Consolidate RetrieveSegmentsToReplaceAction into RetrieveUsedSegments…
Browse files Browse the repository at this point in the history
…Action (#15699) (#15784)

Consolidate RetrieveSegmentsToReplaceAction into RetrieveUsedSegmentsAction
  • Loading branch information
AmatyaAvadhanula authored Jan 30, 2024
1 parent 32f6f49 commit b2510f2
Show file tree
Hide file tree
Showing 18 changed files with 265 additions and 300 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
import org.apache.druid.indexing.common.actions.LockListAction;
import org.apache.druid.indexing.common.actions.LockReleaseAction;
import org.apache.druid.indexing.common.actions.MarkSegmentsAsUnusedAction;
import org.apache.druid.indexing.common.actions.RetrieveSegmentsToReplaceAction;
import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction;
import org.apache.druid.indexing.common.actions.SegmentAllocateAction;
import org.apache.druid.indexing.common.actions.SegmentTransactionalAppendAction;
import org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction;
Expand Down Expand Up @@ -1233,10 +1233,15 @@ private DataSegmentTimelineView makeDataSegmentTimelineView()
// any segment created after the lock was acquired for its interval will not be considered.
final Collection<DataSegment> publishedUsedSegments;
try {
publishedUsedSegments = context.taskActionClient().submit(new RetrieveSegmentsToReplaceAction(
dataSource,
intervals
));
// Additional check as the task action does not accept empty intervals
if (intervals.isEmpty()) {
publishedUsedSegments = Collections.emptySet();
} else {
publishedUsedSegments = context.taskActionClient().submit(new RetrieveUsedSegmentsAction(
dataSource,
intervals
));
}
}
catch (IOException e) {
throw new MSQException(e, UnknownFault.forException(e));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.partition.DimensionRangeShardSpec;
import org.easymock.EasyMock;
import org.joda.time.Interval;
import org.junit.Test;
import org.junit.runner.RunWith;
Expand Down Expand Up @@ -98,6 +99,28 @@ public void testReplaceOnFooWithAll()
.add("m1", ColumnType.FLOAT)
.build();

DataSegment existingDataSegment0 = DataSegment.builder()
.interval(Intervals.of("2000-01-01T/2000-01-04T"))
.size(50)
.version(MSQTestTaskActionClient.VERSION)
.dataSource("foo")
.build();

DataSegment existingDataSegment1 = DataSegment.builder()
.interval(Intervals.of("2001-01-01T/2001-01-04T"))
.size(50)
.version(MSQTestTaskActionClient.VERSION)
.dataSource("foo")
.build();

Mockito.doCallRealMethod()
.doReturn(ImmutableSet.of(existingDataSegment0, existingDataSegment1))
.when(testTaskActionClient)
.submit(new RetrieveUsedSegmentsAction(
EasyMock.eq("foo"),
EasyMock.eq(ImmutableList.of(Intervals.ETERNITY))
));

testIngestQuery().setSql(" REPLACE INTO foo OVERWRITE ALL "
+ "SELECT __time, m1 "
+ "FROM foo "
Expand Down Expand Up @@ -418,6 +441,28 @@ public void testReplaceSegmentsRepartitionTable()
.add("m1", ColumnType.FLOAT)
.build();

DataSegment existingDataSegment0 = DataSegment.builder()
.interval(Intervals.of("2000-01-01T/2000-01-04T"))
.size(50)
.version(MSQTestTaskActionClient.VERSION)
.dataSource("foo")
.build();
DataSegment existingDataSegment1 = DataSegment.builder()
.interval(Intervals.of("2001-01-01T/2001-01-04T"))
.size(50)
.version(MSQTestTaskActionClient.VERSION)
.dataSource("foo")
.build();

Mockito.doCallRealMethod()
.doReturn(ImmutableSet.of(existingDataSegment0, existingDataSegment1))
.when(testTaskActionClient)
.submit(new RetrieveUsedSegmentsAction(
EasyMock.eq("foo"),
EasyMock.eq(ImmutableList.of(Intervals.ETERNITY))
));


testIngestQuery().setSql(" REPLACE INTO foo "
+ "OVERWRITE ALL "
+ "SELECT __time, m1 "
Expand Down Expand Up @@ -479,6 +524,20 @@ public void testReplaceWithWhereClause()
.add("m1", ColumnType.FLOAT)
.build();

DataSegment existingDataSegment0 = DataSegment.builder()
.interval(Intervals.of("2000-01-01T/2000-01-04T"))
.size(50)
.version(MSQTestTaskActionClient.VERSION)
.dataSource("foo")
.build();

Mockito.doReturn(ImmutableSet.of(existingDataSegment0))
.when(testTaskActionClient)
.submit(new RetrieveUsedSegmentsAction(
EasyMock.eq("foo"),
EasyMock.eq(ImmutableList.of(Intervals.of("2000-01-01/2000-03-01")))
));

testIngestQuery().setSql(" REPLACE INTO foo "
+ "OVERWRITE WHERE __time >= TIMESTAMP '2000-01-01' AND __time < TIMESTAMP '2000-03-01' "
+ "SELECT __time, m1 "
Expand Down Expand Up @@ -538,6 +597,28 @@ public void testReplaceWhereClauseLargerThanData()
.add("m1", ColumnType.FLOAT)
.build();

DataSegment existingDataSegment0 = DataSegment.builder()
.interval(Intervals.of("2000-01-01T/2000-01-04T"))
.size(50)
.version(MSQTestTaskActionClient.VERSION)
.dataSource("foo")
.build();

DataSegment existingDataSegment1 = DataSegment.builder()
.interval(Intervals.of("2001-01-01T/2001-01-04T"))
.size(50)
.version(MSQTestTaskActionClient.VERSION)
.dataSource("foo")
.build();

Mockito.doReturn(ImmutableSet.of(existingDataSegment0, existingDataSegment1))
.when(testTaskActionClient)
.submit(new RetrieveUsedSegmentsAction(
EasyMock.eq("foo"),
EasyMock.eq(ImmutableList.of(Intervals.of("2000-01-01/2002-01-01")))
));


testIngestQuery().setSql(" REPLACE INTO foo "
+ "OVERWRITE WHERE __time >= TIMESTAMP '2000-01-01' AND __time < TIMESTAMP '2002-01-01' "
+ "SELECT __time, m1 "
Expand Down Expand Up @@ -625,6 +706,19 @@ public void testReplaceTimeChunks()
.add("m1", ColumnType.FLOAT)
.build();

final DataSegment existingDataSegment = DataSegment.builder()
.dataSource("foo")
.interval(Intervals.of("2000-01-01/2000-01-04"))
.version(MSQTestTaskActionClient.VERSION)
.size(1)
.build();
Mockito.doReturn(ImmutableSet.of(existingDataSegment))
.when(testTaskActionClient)
.submit(new RetrieveUsedSegmentsAction(
EasyMock.eq("foo"),
EasyMock.eq(ImmutableList.of(Intervals.of("2000-01-01/2000-03-01")))
));

testIngestQuery().setSql(" REPLACE INTO foo "
+ "OVERWRITE WHERE __time >= TIMESTAMP '2000-01-01' AND __time < TIMESTAMP '2000-03-01'"
+ "SELECT __time, m1 "
Expand Down Expand Up @@ -659,6 +753,26 @@ public void testReplaceTimeChunksLargerThanData()
.add("m1", ColumnType.FLOAT)
.build();

DataSegment existingDataSegment0 = DataSegment.builder()
.interval(Intervals.of("2000-01-01T/2000-01-04T"))
.size(50)
.version(MSQTestTaskActionClient.VERSION)
.dataSource("foo")
.build();
DataSegment existingDataSegment1 = DataSegment.builder()
.interval(Intervals.of("2001-01-01T/2001-01-04T"))
.size(50)
.version(MSQTestTaskActionClient.VERSION)
.dataSource("foo")
.build();

Mockito.doReturn(ImmutableSet.of(existingDataSegment0, existingDataSegment1))
.when(testTaskActionClient)
.submit(new RetrieveUsedSegmentsAction(
EasyMock.eq("foo"),
EasyMock.eq(ImmutableList.of(Intervals.of("2000/2002")))
));

testIngestQuery().setSql(" REPLACE INTO foo "
+ "OVERWRITE WHERE __time >= TIMESTAMP '2000-01-01' AND __time < TIMESTAMP '2002-01-01'"
+ "SELECT __time, m1 "
Expand Down Expand Up @@ -939,6 +1053,26 @@ public void testReplaceUnnestSegmentWithTimeFilter()
.add("d", ColumnType.STRING)
.build();

DataSegment existingDataSegment0 = DataSegment.builder()
.interval(Intervals.of("2000-01-01T/2000-01-04T"))
.size(50)
.version(MSQTestTaskActionClient.VERSION)
.dataSource("foo")
.build();
DataSegment existingDataSegment1 = DataSegment.builder()
.interval(Intervals.of("2001-01-01T/2001-01-04T"))
.size(50)
.version(MSQTestTaskActionClient.VERSION)
.dataSource("foo")
.build();

Mockito.doReturn(ImmutableSet.of(existingDataSegment0, existingDataSegment1))
.when(testTaskActionClient)
.submit(new RetrieveUsedSegmentsAction(
EasyMock.eq("foo"),
EasyMock.eq(ImmutableList.of(Intervals.of("1999/2002")))
));

testIngestQuery().setSql(" REPLACE INTO foo "
+ "OVERWRITE WHERE __time >= TIMESTAMP '1999-01-01 00:00:00' and __time < TIMESTAMP '2002-01-01 00:00:00'"
+ "SELECT __time, d "
Expand Down Expand Up @@ -1003,7 +1137,10 @@ public void testReplaceTombstonesOverPartiallyOverlappingSegments()

Mockito.doReturn(ImmutableSet.of(existingDataSegment))
.when(testTaskActionClient)
.submit(ArgumentMatchers.isA(RetrieveUsedSegmentsAction.class));
.submit(new RetrieveUsedSegmentsAction(
EasyMock.eq("foo1"),
EasyMock.eq(ImmutableList.of(Intervals.of("2000/2002")))
));

List<Object[]> expectedResults;
if (NullHandling.sqlCompatible()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,10 @@

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Injector;
import org.apache.druid.indexing.common.TaskLockType;
import org.apache.druid.indexing.common.TimeChunkLock;
import org.apache.druid.indexing.common.actions.LockListAction;
import org.apache.druid.indexing.common.actions.RetrieveSegmentsToReplaceAction;
import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction;
import org.apache.druid.indexing.common.actions.SegmentAllocateAction;
import org.apache.druid.indexing.common.actions.SegmentTransactionalAppendAction;
Expand All @@ -47,8 +44,6 @@
import org.joda.time.Interval;

import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
Expand All @@ -60,10 +55,6 @@ public class MSQTestTaskActionClient implements TaskActionClient
public static final String VERSION = "test";
private final ObjectMapper mapper;
private final ConcurrentHashMap<SegmentId, AtomicInteger> segmentIdPartitionIdMap = new ConcurrentHashMap<>();
private final Map<String, List<Interval>> usedIntervals = ImmutableMap.of(
"foo", ImmutableList.of(Intervals.of("2001-01-01/2001-01-04"), Intervals.of("2000-01-01/2000-01-04")),
"foo2", ImmutableList.of(Intervals.of("2000-01-01/P1D"))
);
private final Set<DataSegment> publishedSegments = new HashSet<>();
private final Injector injector;

Expand Down Expand Up @@ -115,21 +106,6 @@ public <RetType> RetType submit(TaskAction<RetType> taskAction)
));
} else if (taskAction instanceof RetrieveUsedSegmentsAction) {
String dataSource = ((RetrieveUsedSegmentsAction) taskAction).getDataSource();
if (!usedIntervals.containsKey(dataSource)) {
return (RetType) ImmutableSet.of();
} else {
return (RetType) usedIntervals.get(dataSource)
.stream()
.map(interval -> DataSegment.builder()
.dataSource(dataSource)
.interval(interval)
.version(VERSION)
.size(1)
.build()
).collect(Collectors.toSet());
}
} else if (taskAction instanceof RetrieveSegmentsToReplaceAction) {
String dataSource = ((RetrieveSegmentsToReplaceAction) taskAction).getDataSource();
return (RetType) injector.getInstance(SpecificSegmentsQuerySegmentWalker.class)
.getSegments()
.stream()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import com.google.common.collect.Iterables;
import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction;
import org.apache.druid.indexing.common.actions.TaskActionClient;
import org.apache.druid.indexing.overlord.Segments;
import org.apache.druid.java.util.common.JodaUtils;
import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
import org.apache.druid.segment.realtime.appenderator.UsedSegmentChecker;
Expand Down Expand Up @@ -66,7 +65,7 @@ public Set<DataSegment> findUsedSegments(Set<SegmentIdWithShardSpec> segmentIds)
);

final Collection<DataSegment> usedSegmentsForIntervals = taskActionClient
.submit(new RetrieveUsedSegmentsAction(dataSource, null, intervals, Segments.ONLY_VISIBLE));
.submit(new RetrieveUsedSegmentsAction(dataSource, intervals));

for (DataSegment segment : usedSegmentsForIntervals) {
if (segmentIdsInDataSource.contains(segment.getId())) {
Expand Down
Loading

0 comments on commit b2510f2

Please sign in to comment.