-
Notifications
You must be signed in to change notification settings - Fork 3.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add analysisTypes to SegmentMetadataQuery cache key #1782
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
import com.fasterxml.jackson.annotation.JsonProperty; | ||
import com.fasterxml.jackson.annotation.JsonValue; | ||
import com.google.common.base.Preconditions; | ||
import com.google.common.collect.Lists; | ||
import io.druid.common.utils.JodaUtils; | ||
import io.druid.query.BaseQuery; | ||
import io.druid.query.DataSource; | ||
|
@@ -30,30 +31,43 @@ | |
import io.druid.query.spec.QuerySegmentSpec; | ||
import org.joda.time.Interval; | ||
|
||
import java.nio.ByteBuffer; | ||
import java.util.Arrays; | ||
import java.util.EnumSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis> | ||
{ | ||
/* The SegmentMetadataQuery cache key may contain UTF-8 column name strings. | ||
* Prepend 0xFF before the analysisTypes as a separator to avoid | ||
* any potential confusion with string values. | ||
*/ | ||
public static final byte[] ANALYSIS_TYPES_CACHE_PREFIX = new byte[]{(byte) 0xFF}; | ||
|
||
public enum AnalysisType | ||
{ | ||
CARDINALITY, | ||
SIZE; | ||
|
||
@JsonValue | ||
@Override | ||
public String toString() { | ||
public String toString() | ||
{ | ||
return this.name().toLowerCase(); | ||
} | ||
|
||
@JsonCreator | ||
public static AnalysisType fromString(String name) { | ||
public static AnalysisType fromString(String name) | ||
{ | ||
return valueOf(name.toUpperCase()); | ||
} | ||
} | ||
|
||
|
||
public byte[] getCacheKey() | ||
{ | ||
return new byte[]{(byte) this.ordinal()}; | ||
} | ||
} | ||
|
||
public static final Interval DEFAULT_INTERVAL = new Interval( | ||
JodaUtils.MIN_INSTANT, JodaUtils.MAX_INSTANT | ||
|
@@ -67,7 +81,7 @@ public static AnalysisType fromString(String name) { | |
private final ColumnIncluderator toInclude; | ||
private final boolean merge; | ||
private final boolean usingDefaultInterval; | ||
private final EnumSet analysisTypes; | ||
private final EnumSet<AnalysisType> analysisTypes; | ||
|
||
@JsonCreator | ||
public SegmentMetadataQuery( | ||
|
@@ -147,6 +161,26 @@ public boolean hasSize() | |
return analysisTypes.contains(AnalysisType.SIZE); | ||
} | ||
|
||
public byte[] getAnalysisTypesCacheKey() | ||
{ | ||
int size = 1; | ||
List<byte[]> typeBytesList = Lists.newArrayListWithExpectedSize(analysisTypes.size()); | ||
for (AnalysisType analysisType : analysisTypes) { | ||
final byte[] bytes = analysisType.getCacheKey(); | ||
typeBytesList.add(bytes); | ||
size += bytes.length; | ||
} | ||
|
||
final ByteBuffer bytes = ByteBuffer.allocate(size); | ||
bytes.put(ANALYSIS_TYPES_CACHE_PREFIX); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does there need to be anything between the individual cache keys? Is it possible two different analysisTypes could have cache keys that yield the same cache key? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need a separator. Right now the keys are just single bytes (and probably forever… how many flags could there be!) so this is not an imminent risk. If we do find that we need a ton of types then we can use the high bit for a variable length encoding. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that would happen in this case since each analysisType's byte[] has the same fixed length |
||
for (byte[] typeBytes : typeBytesList) { | ||
bytes.put(typeBytes); | ||
} | ||
|
||
return bytes.array(); | ||
} | ||
|
||
|
||
@Override | ||
public Query<SegmentAnalysis> withOverriddenContext(Map<String, Object> contextOverride) | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
/* | ||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. Metamarkets licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package io.druid.query.metadata; | ||
|
||
|
||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import com.google.common.collect.ImmutableList; | ||
import com.google.common.collect.ImmutableMap; | ||
import io.druid.jackson.DefaultObjectMapper; | ||
import io.druid.query.CacheStrategy; | ||
|
||
import io.druid.query.TableDataSource; | ||
import io.druid.query.metadata.metadata.ColumnAnalysis; | ||
import io.druid.query.metadata.metadata.SegmentAnalysis; | ||
import io.druid.query.metadata.metadata.SegmentMetadataQuery; | ||
import io.druid.query.spec.QuerySegmentSpecs; | ||
import io.druid.segment.column.ValueType; | ||
import org.joda.time.Interval; | ||
import org.junit.Assert; | ||
import org.junit.Test; | ||
|
||
public class SegmentMetadataQueryQueryToolChestTest | ||
{ | ||
@Test | ||
public void testCacheStrategy() throws Exception | ||
{ | ||
SegmentMetadataQuery query = new SegmentMetadataQuery( | ||
new TableDataSource("dummy"), | ||
QuerySegmentSpecs.create("2015-01-01/2015-01-02"), | ||
null, | ||
null, | ||
null, | ||
null, | ||
false | ||
); | ||
|
||
CacheStrategy<SegmentAnalysis, SegmentAnalysis, SegmentMetadataQuery> strategy = | ||
new SegmentMetadataQueryQueryToolChest(null).getCacheStrategy(query); | ||
|
||
// Test cache key generation | ||
byte[] expectedKey = {0x04, 0x01, (byte) 0xFF, 0x00, 0x01}; | ||
byte[] actualKey = strategy.computeCacheKey(query); | ||
Assert.assertArrayEquals(expectedKey, actualKey); | ||
|
||
SegmentAnalysis result = new SegmentAnalysis( | ||
"testSegment", | ||
ImmutableList.of( | ||
new Interval("2011-01-12T00:00:00.000Z/2011-04-15T00:00:00.001Z") | ||
), | ||
ImmutableMap.of( | ||
"placement", | ||
new ColumnAnalysis( | ||
ValueType.STRING.toString(), | ||
10881, | ||
1, | ||
null | ||
) | ||
), 71982 | ||
); | ||
|
||
Object preparedValue = strategy.prepareForCache().apply(result); | ||
|
||
ObjectMapper objectMapper = new DefaultObjectMapper(); | ||
SegmentAnalysis fromCacheValue = objectMapper.readValue( | ||
objectMapper.writeValueAsBytes(preparedValue), | ||
strategy.getCacheObjectClazz() | ||
); | ||
|
||
SegmentAnalysis fromCacheResult = strategy.pullFromCache().apply(fromCacheValue); | ||
|
||
Assert.assertEquals(result, fromCacheResult); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a bit pedantic, but if there were enough analysis types that they could conceivably serialize to a valid column name, it's possible for the caching to get confused for a query with a ListColumnIncluderator. Some kind of separator should help with that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shall I reintroduce the prefix byte then? Can give it a value of something like 0xA0 so it wouldn't be confused with part of a valid UTF-8 column name.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
anything that won't appear in a valid column name would be great!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
as mentioned elsewhere
0xFF
is generally what we're using as byte array filler between UTF8 stuff.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@gianm @drcrallen Added 0xFF prefix byte before analysisTypes list