Skip to content

Commit

Permalink
Merge pull request #1960 from binlijin/master
Browse files Browse the repository at this point in the history
optimize index merge
  • Loading branch information
xvrl committed Nov 12, 2015
2 parents 4f46d45 + 286b8f8 commit ef74cd3
Show file tree
Hide file tree
Showing 9 changed files with 431 additions and 55 deletions.
27 changes: 22 additions & 5 deletions processing/src/main/java/io/druid/segment/IndexMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import io.druid.common.utils.JodaUtils;
import io.druid.common.utils.SerializerUtils;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.segment.column.BitmapIndexSeeker;
import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.ColumnCapabilitiesImpl;
import io.druid.segment.column.ValueType;
Expand Down Expand Up @@ -216,8 +217,11 @@ public File mergeQueryableIndex(
ProgressIndicator progress
) throws IOException
{
return merge(
Lists.transform(
// We are materializing the list for performance reasons. Lists.transform
// only creates a "view" of the original list, meaning the function gets
// applied every time you access an element.
List<IndexableAdapter> indexAdapteres = Lists.newArrayList(
Iterables.transform(
indexes,
new Function<QueryableIndex, IndexableAdapter>()
{
Expand All @@ -227,7 +231,10 @@ public IndexableAdapter apply(final QueryableIndex input)
return new QueryableIndexIndexableAdapter(input);
}
}
),
)
);
return merge(
indexAdapteres,
metricAggs,
outDir,
null,
Expand Down Expand Up @@ -842,13 +849,17 @@ public Rowboat apply(@Nullable Rowboat input)
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory);
}

BitmapIndexSeeker[] bitmapIndexSeeker = new BitmapIndexSeeker[indexes.size()];
for (int j = 0; j < indexes.size(); j++) {
bitmapIndexSeeker[j] = indexes.get(j).getBitmapIndexSeeker(dimension);
}
for (String dimVal : IndexedIterable.create(dimVals)) {
progress.progress();
List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(indexes.size());
for (int j = 0; j < indexes.size(); ++j) {
convertedInverteds.add(
new ConvertingIndexedInts(
indexes.get(j).getBitmapIndex(dimension, dimVal), rowNumConversions.get(j)
bitmapIndexSeeker[j].seek(dimVal), rowNumConversions.get(j)
)
);
}
Expand Down Expand Up @@ -998,6 +1009,7 @@ private static class DimValueConverter

private int currIndex;
private String lastVal = null;
private String currValue;

DimValueConverter(
Indexed<String> dimSet
Expand All @@ -1007,6 +1019,7 @@ private static class DimValueConverter
conversionBuf = ByteBuffer.allocateDirect(dimSet.size() * Ints.BYTES).asIntBuffer();

currIndex = 0;
currValue = null;
}

public void convert(String value, int index)
Expand All @@ -1020,7 +1033,9 @@ public void convert(String value, int index)
}
return;
}
String currValue = dimSet.get(currIndex);
if (currValue == null) {
currValue = dimSet.get(currIndex);
}

while (currValue == null) {
conversionBuf.position(conversionBuf.position() + 1);
Expand All @@ -1037,6 +1052,8 @@ public void convert(String value, int index)
++currIndex;
if (currIndex == dimSet.size()) {
lastVal = value;
} else {
currValue = dimSet.get(currIndex);
}
} else if (currValue.compareTo(value) < 0) {
throw new ISE(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package io.druid.segment;

import io.druid.segment.column.BitmapIndexSeeker;
import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts;
Expand All @@ -41,6 +42,8 @@ public interface IndexableAdapter

IndexedInts getBitmapIndex(String dimension, String value);

BitmapIndexSeeker getBitmapIndexSeeker(String dimension);

String getMetricType(String metric);

ColumnCapabilities getCapabilities(String column);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,20 @@
import com.metamx.common.guava.CloseQuietly;
import com.metamx.common.logger.Logger;
import io.druid.segment.column.BitmapIndex;
import io.druid.segment.column.BitmapIndexSeeker;
import io.druid.segment.column.Column;
import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.ComplexColumn;
import io.druid.segment.column.DictionaryEncodedColumn;
import io.druid.segment.column.EmptyBitmapIndexSeeker;
import io.druid.segment.column.GenericColumn;
import io.druid.segment.column.IndexedFloatsGenericColumn;
import io.druid.segment.column.IndexedLongsGenericColumn;
import io.druid.segment.column.ValueType;
import io.druid.segment.data.ArrayBasedIndexedInts;
import io.druid.segment.data.BitmapCompressedIndexedInts;
import io.druid.segment.data.EmptyIndexedInts;
import io.druid.segment.data.GenericIndexed;
import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts;
import io.druid.segment.data.IndexedIterable;
Expand Down Expand Up @@ -331,4 +334,76 @@ public ColumnCapabilities getCapabilities(String column)
{
return input.getColumn(column).getCapabilities();
}

@Override
public BitmapIndexSeeker getBitmapIndexSeeker(String dimension)
{
final Column column = input.getColumn(dimension);

if (column == null) {
return new EmptyBitmapIndexSeeker();
}

final BitmapIndex bitmaps = column.getBitmapIndex();
if (bitmaps == null) {
return new EmptyBitmapIndexSeeker();
}

final Indexed<String> dimSet = getDimValueLookup(dimension);

// BitmapIndexSeeker is the main performance boost comes from.
// In the previous version of index merge, during the creation of invert index, we do something like
// merge sort of multiply bitmap indexes. It simply iterator all the previous sorted values,
// and "binary find" the id in each bitmap indexes, which involves disk IO and is really slow.
// Suppose we have N (which is 100 in our test) small segments, each have M (which is 50000 in our case) rows.
// In high cardinality scenario, we will almost have N * M uniq values. So the complexity will be O(N * M * M * LOG(M)).

// There are 2 properties we did not use during the merging:
// 1. We always travel the dimension values sequentially
// 2. One single dimension value is valid only in one index when cardinality is high enough
// So we introduced the BitmapIndexSeeker, which can only seek value sequentially and can never seek back.
// By using this and the help of "getDimValueLookup", we only need to translate all dimension value to its ID once,
// and the translation is done by self-increase of the integer. We only need to change the CACHED value once after
// previous value is hit, renew the value and increase the ID. The complexity now is O(N * M * LOG(M)).
return new BitmapIndexSeeker()
{
private int currIndex = 0;
private String currVal = null;
private String lastVal = null;

@Override
public IndexedInts seek(String value)
{
if (dimSet == null || dimSet.size() == 0) {
return new EmptyIndexedInts();
}
if (lastVal != null) {
if (GenericIndexed.STRING_STRATEGY.compare(value, lastVal) <= 0) {
throw new ISE("Value[%s] is less than the last value[%s] I have, cannot be.",
value, lastVal);
}
return new EmptyIndexedInts();
}
if (currVal == null) {
currVal = dimSet.get(currIndex);
}
int compareResult = GenericIndexed.STRING_STRATEGY.compare(currVal, value);
if (compareResult == 0) {
IndexedInts ret = new BitmapCompressedIndexedInts(bitmaps.getBitmap(currIndex));
++currIndex;
if (currIndex == dimSet.size()) {
lastVal = value;
} else {
currVal = dimSet.get(currIndex);
}
return ret;
} else if (compareResult < 0) {
throw new ISE("Skipped currValue[%s], currIndex[%,d]; incoming value[%s]",
currVal, currIndex, value);
} else {
return new EmptyIndexedInts();
}
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import io.druid.segment.column.BitmapIndexSeeker;
import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts;
Expand Down Expand Up @@ -92,4 +93,10 @@ public ColumnCapabilities getCapabilities(String column)
{
return baseAdapter.getCapabilities(column);
}

@Override
public BitmapIndexSeeker getBitmapIndexSeeker(String dimension)
{
return baseAdapter.getBitmapIndexSeeker(dimension);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Druid - a distributed column store.
* Copyright 2012 - 2015 Metamarkets Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.druid.segment.column;

import io.druid.segment.data.IndexedInts;

/**
* Only support access in order
*/
public interface BitmapIndexSeeker
{
public IndexedInts seek(String value);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Druid - a distributed column store.
* Copyright 2012 - 2015 Metamarkets Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.druid.segment.column;

import io.druid.segment.data.EmptyIndexedInts;
import io.druid.segment.data.IndexedInts;

public class EmptyBitmapIndexSeeker implements BitmapIndexSeeker
{

@Override
public IndexedInts seek(String value)
{
return new EmptyIndexedInts();
}

}
Loading

0 comments on commit ef74cd3

Please sign in to comment.