Skip to content

Commit

Permalink
Merge pull request #2285 from fjy/stringformat
Browse files Browse the repository at this point in the history
fixed #1873, add ability to express CONCAT as an extractionFn
  • Loading branch information
fjy committed Jan 19, 2016
2 parents a2e327e + 3459a20 commit 1b359d6
Show file tree
Hide file tree
Showing 15 changed files with 289 additions and 64 deletions.
10 changes: 10 additions & 0 deletions docs/content/querying/dimensionspecs.md
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,16 @@ Example for chaining [regular expression extraction function](#regular-expressio
It will transform dimension values with specified extraction functions in the order named.
For example, `'/druid/prod/historical'` is transformed to `'the dru'` as regular expression extraction function first transforms it to `'druid'` and then, javascript extraction function transforms it to `'the druid'`, and lastly, substring extraction function transforms it to `'the dru'`.

### String Format Extraction Function

Returns the dimension value formatted according to the given format string.

```json
{ "type" : "stringFormat", "format" : <sprintf_expression> }
```

For example if you want to concat "[" and "]" before and after the actual dimension value, you need to specify "[%s]" as format string.

### Filtering DimensionSpecs

These are only valid for multi-valued dimensions. If you have a row in druid that has a multi-valued dimension with values ["v1", "v2", "v3"] and you send a groupBy/topN query grouping by that dimension with [query filter](filter.html) for value "v1". In the response you will get 3 rows containing "v1", "v2" and "v3". This behavior might be unintuitive for some use cases.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,38 +29,44 @@

public class CascadeExtractionFn implements ExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x9;

private final ExtractionFn extractionFns[];
private final ChainedExtractionFn chainedExtractionFn;
private final ChainedExtractionFn DEFAULT_CHAINED_EXTRACTION_FN = new ChainedExtractionFn(
new ExtractionFn() {
public byte[] getCacheKey() {
new ExtractionFn()
{
public byte[] getCacheKey()
{
return new byte[0];
}

public String apply(Object value) {
public String apply(Object value)
{
return null;
}

public String apply(String value) {
public String apply(String value)
{
return null;
}

public String apply(long value) {
public String apply(long value)
{
return null;
}

public boolean preservesOrdering() {
public boolean preservesOrdering()
{
return false;
}

public ExtractionType getExtractionType() {
public ExtractionType getExtractionType()
{
return ExtractionType.MANY_TO_ONE;
}

@Override
public String toString() {
public String toString()
{
return "nullExtractionFn{}";
}
},
Expand All @@ -78,48 +84,55 @@ public CascadeExtractionFn(
this.chainedExtractionFn = DEFAULT_CHAINED_EXTRACTION_FN;
} else {
ChainedExtractionFn root = null;
for (int idx = 0; idx < extractionFns.length; idx++) {
Preconditions.checkArgument(extractionFns[idx] != null, "empty function is not allowed");
root = new ChainedExtractionFn(extractionFns[idx], root);
for (ExtractionFn fn : extractionFn) {
Preconditions.checkArgument(fn != null, "empty function is not allowed");
root = new ChainedExtractionFn(fn, root);
}
this.chainedExtractionFn = root;
}
}

@JsonProperty
public ExtractionFn[] getExtractionFns() {
public ExtractionFn[] getExtractionFns()
{
return extractionFns;
}

@Override
public byte[] getCacheKey() {
byte[] cacheKey = new byte[] {CACHE_TYPE_ID};
public byte[] getCacheKey()
{
byte[] cacheKey = new byte[]{ExtractionCacheHelper.CACHE_TYPE_ID_CASCADE};

return Bytes.concat(cacheKey, chainedExtractionFn.getCacheKey());
}

@Override
public String apply(Object value) {
public String apply(Object value)
{
return chainedExtractionFn.apply(value);
}

@Override
public String apply(String value){
public String apply(String value)
{
return chainedExtractionFn.apply(value);
}

@Override
public String apply(long value){
public String apply(long value)
{
return chainedExtractionFn.apply(value);
}

@Override
public boolean preservesOrdering(){
public boolean preservesOrdering()
{
return chainedExtractionFn.preservesOrdering();
}

@Override
public ExtractionType getExtractionType(){
public ExtractionType getExtractionType()
{
return chainedExtractionFn.getExtractionType();
}

Expand Down Expand Up @@ -152,44 +165,53 @@ public int hashCode()
}

@Override
public String toString() {
public String toString()
{
return "CascadeExtractionFn{" +
"extractionFns=[" + chainedExtractionFn.toString() + "]}";
"extractionFns=[" + chainedExtractionFn.toString() + "]}";
}

private class ChainedExtractionFn {
private class ChainedExtractionFn
{
private final ExtractionFn fn;
private final ChainedExtractionFn child;

public ChainedExtractionFn(ExtractionFn fn, ChainedExtractionFn child) {
public ChainedExtractionFn(ExtractionFn fn, ChainedExtractionFn child)
{
this.fn = fn;
this.child = child;
}

public byte[] getCacheKey() {
public byte[] getCacheKey()
{
byte[] fnCacheKey = fn.getCacheKey();

return (child != null) ? Bytes.concat(fnCacheKey, child.getCacheKey()) : fnCacheKey;
}

public String apply(Object value) {
public String apply(Object value)
{
return fn.apply((child != null) ? child.apply(value) : value);
}

public String apply(String value){
public String apply(String value)
{
return fn.apply((child != null) ? child.apply(value) : value);
}

public String apply(long value){
public String apply(long value)
{
return fn.apply((child != null) ? child.apply(value) : value);
}

public boolean preservesOrdering(){
public boolean preservesOrdering()
{
boolean childPreservesOrdering = (child == null) || child.preservesOrdering();
return fn.preservesOrdering() && childPreservesOrdering;
}

public ExtractionType getExtractionType(){
public ExtractionType getExtractionType()
{
if (child != null && child.getExtractionType() == ExtractionType.MANY_TO_ONE) {
return ExtractionType.MANY_TO_ONE;
} else {
Expand Down Expand Up @@ -227,10 +249,11 @@ public int hashCode()
return result;
}

public String toString() {
public String toString()
{
return (child != null)
? Joiner.on(",").join(child.toString(), fn.toString())
: fn.toString();
? Joiner.on(",").join(child.toString(), fn.toString())
: fn.toString();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public abstract class DimExtractionFn implements ExtractionFn
@Override
public String apply(Object value)
{
return apply(value.toString());
return apply(value == null ? null : value.toString());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.query.extraction;

/**
*
*/
public class ExtractionCacheHelper
{
public static final byte CACHE_TYPE_ID_TIME_DIM = 0x0;
public static final byte CACHE_TYPE_ID_REGEX = 0x1;
public static final byte CACHE_TYPE_ID_MATCHING_DIM = 0x2;
public static final byte CACHE_TYPE_ID_SEARCH_QUERY = 0x3;
public static final byte CACHE_TYPE_ID_JAVASCRIPT = 0x4;
public static final byte CACHE_TYPE_ID_TIME_FORMAT = 0x5;
public static final byte CACHE_TYPE_ID_IDENTITY = 0x6;
public static final byte CACHE_TYPE_ID_LOOKUP = 0x7;
public static final byte CACHE_TYPE_ID_SUBSTRING = 0x8;
public static final byte CACHE_TYPE_ID_CASCADE = 0x9;
public static final byte CACHE_TYPE_ID_STRING_FORMAT = 0xA;
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
@JsonSubTypes.Type(name = "identity", value = IdentityExtractionFn.class),
@JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class),
@JsonSubTypes.Type(name = "substring", value = SubstringDimExtractionFn.class),
@JsonSubTypes.Type(name = "cascade", value = CascadeExtractionFn.class)
@JsonSubTypes.Type(name = "cascade", value = CascadeExtractionFn.class),
@JsonSubTypes.Type(name = "stringFormat", value = StringFormatExtractionFn.class)
})
/**
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)
Expand All @@ -56,7 +57,7 @@ public interface ExtractionFn

/**
* The "extraction" function. This should map a value into some other String value.
*
* <p>
* In order to maintain the "null and empty string are equivalent" semantics that Druid provides, the
* empty string is considered invalid output for this method and should instead return null. This is
* a contract on the method rather than enforced at a lower level in order to eliminate a global check
Expand All @@ -74,7 +75,7 @@ public interface ExtractionFn

/**
* Offers information on whether the extraction will preserve the original ordering of the values.
* <p/>
* <p>
* Some optimizations of queries is possible if ordering is preserved. Null values *do* count towards
* ordering.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@

public class IdentityExtractionFn implements ExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x6;

private static final IdentityExtractionFn instance = new IdentityExtractionFn();

private IdentityExtractionFn()
Expand All @@ -35,7 +33,7 @@ private IdentityExtractionFn()
@Override
public byte[] getCacheKey()
{
return new byte[]{CACHE_TYPE_ID};
return new byte[]{ExtractionCacheHelper.CACHE_TYPE_ID_IDENTITY};
}

@Override
Expand Down Expand Up @@ -67,13 +65,13 @@ public ExtractionType getExtractionType()
{
return ExtractionType.ONE_TO_ONE;
}

@Override
public String toString()
{
return "Identity";
}

@Override
public boolean equals(Object o)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ public String apply(Object input)
};
}

private static final byte CACHE_TYPE_ID = 0x4;

private final String function;
private final Function<Object, String> fn;
private final boolean injective;
Expand Down Expand Up @@ -97,7 +95,7 @@ public byte[] getCacheKey()
{
byte[] bytes = StringUtils.toUtf8(function);
return ByteBuffer.allocate(1 + bytes.length)
.put(CACHE_TYPE_ID)
.put(ExtractionCacheHelper.CACHE_TYPE_ID_JAVASCRIPT)
.put(bytes)
.array();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@

public class LookupExtractionFn extends FunctionalExtraction
{
private static final byte CACHE_TYPE_ID = 0x7;

private final LookupExtractor lookup;
private final boolean optimize;

Expand Down Expand Up @@ -98,7 +96,7 @@ public byte[] getCacheKey()
{
try {
final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
outputStream.write(CACHE_TYPE_ID);
outputStream.write(ExtractionCacheHelper.CACHE_TYPE_ID_LOOKUP);
outputStream.write(lookup.getCacheKey());
if (getReplaceMissingValueWith() != null) {
outputStream.write(StringUtils.toUtf8(getReplaceMissingValueWith()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@
*/
public class MatchingDimExtractionFn extends DimExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x2;

private final String expr;
private final Pattern pattern;

Expand All @@ -53,7 +51,7 @@ public byte[] getCacheKey()
{
byte[] exprBytes = StringUtils.toUtf8(expr);
return ByteBuffer.allocate(1 + exprBytes.length)
.put(CACHE_TYPE_ID)
.put(ExtractionCacheHelper.CACHE_TYPE_ID_MATCHING_DIM)
.put(exprBytes)
.array();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
*/
public class RegexDimExtractionFn extends DimExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x1;
private static final byte CACHE_KEY_SEPARATOR = (byte) 0xFF;

private final String expr;
Expand Down Expand Up @@ -75,7 +74,7 @@ public byte[] getCacheKey()
totalLen += 2; // separators

return ByteBuffer.allocate(totalLen)
.put(CACHE_TYPE_ID)
.put(ExtractionCacheHelper.CACHE_TYPE_ID_REGEX)
.put(exprBytes)
.put(CACHE_KEY_SEPARATOR)
.put(replaceStrBytes)
Expand Down
Loading

0 comments on commit 1b359d6

Please sign in to comment.