Skip to content

Commit

Permalink
Address PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
jon-wei committed Jan 25, 2017
1 parent b67e83e commit 95e2e50
Show file tree
Hide file tree
Showing 47 changed files with 738 additions and 628 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -87,27 +87,13 @@ public boolean apply(String input)
@Override
public DruidLongPredicate makeLongPredicate()
{
return new DruidLongPredicate()
{
@Override
public boolean applyLong(long input)
{
return false;
}
};
return DruidLongPredicate.FALSE_PREDICATE;
}

@Override
public DruidFloatPredicate makeFloatPredicate()
{
return new DruidFloatPredicate()
{
@Override
public boolean applyFloat(float input)
{
return false;
}
};
return DruidFloatPredicate.FALSE_PREDICATE;
}
},
null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -627,27 +627,13 @@ public boolean apply(String input)
@Override
public DruidLongPredicate makeLongPredicate()
{
return new DruidLongPredicate()
{
@Override
public boolean applyLong(long input)
{
return false;
}
};
return DruidLongPredicate.FALSE_PREDICATE;
}

@Override
public DruidFloatPredicate makeFloatPredicate()
{
return new DruidFloatPredicate()
{
@Override
public boolean applyFloat(float input)
{
return false;
}
};
return DruidFloatPredicate.FALSE_PREDICATE;
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,19 @@ public class BenchmarkSchemaInfo
private List<BenchmarkColumnSchema> columnSchemas;
private List<AggregatorFactory> aggs;
private Interval dataInterval;
private boolean withRollup;

public BenchmarkSchemaInfo (
List<BenchmarkColumnSchema> columnSchemas,
List<AggregatorFactory> aggs,
Interval dataInterval
Interval dataInterval,
boolean withRollup
)
{
this.columnSchemas = columnSchemas;
this.aggs = aggs;
this.dataInterval = dataInterval;
this.withRollup = withRollup;
}

public List<BenchmarkColumnSchema> getColumnSchemas()
Expand All @@ -61,4 +64,8 @@ public Interval getDataInterval()
return dataInterval;
}

public boolean isWithRollup()
{
return withRollup;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,71 @@ public class BenchmarkSchemas
BenchmarkSchemaInfo basicSchema = new BenchmarkSchemaInfo(
basicSchemaColumns,
basicSchemaIngestAggs,
basicSchemaDataInterval
basicSchemaDataInterval,
true
);
SCHEMA_MAP.put("basic", basicSchema);
}

static { // simple single string column and count agg schema, no rollup
List<BenchmarkColumnSchema> basicSchemaColumns = ImmutableList.of(
// dims
BenchmarkColumnSchema.makeSequential("dimSequential", ValueType.STRING, false, 1, null, 0, 1000000)
);

List<AggregatorFactory> basicSchemaIngestAggs = new ArrayList<>();
basicSchemaIngestAggs.add(new CountAggregatorFactory("rows"));

Interval basicSchemaDataInterval = new Interval(0, 1000000);

BenchmarkSchemaInfo basicSchema = new BenchmarkSchemaInfo(
basicSchemaColumns,
basicSchemaIngestAggs,
basicSchemaDataInterval,
false
);
SCHEMA_MAP.put("simple", basicSchema);
}

static { // simple single long column and count agg schema, no rollup
List<BenchmarkColumnSchema> basicSchemaColumns = ImmutableList.of(
// dims, ingest as a metric for now with rollup off, until numeric dims at ingestion are supported
BenchmarkColumnSchema.makeSequential("dimSequential", ValueType.LONG, true, 1, null, 0, 1000000)
);

List<AggregatorFactory> basicSchemaIngestAggs = new ArrayList<>();
basicSchemaIngestAggs.add(new LongSumAggregatorFactory("dimSequential", "dimSequential"));
basicSchemaIngestAggs.add(new CountAggregatorFactory("rows"));

Interval basicSchemaDataInterval = new Interval(0, 1000000);

BenchmarkSchemaInfo basicSchema = new BenchmarkSchemaInfo(
basicSchemaColumns,
basicSchemaIngestAggs,
basicSchemaDataInterval,
false
);
SCHEMA_MAP.put("simpleLong", basicSchema);
}

static { // simple single float column and count agg schema, no rollup
List<BenchmarkColumnSchema> basicSchemaColumns = ImmutableList.of(
// dims, ingest as a metric for now with rollup off, until numeric dims at ingestion are supported
BenchmarkColumnSchema.makeSequential("dimSequential", ValueType.FLOAT, true, 1, null, 0, 1000000)
);

List<AggregatorFactory> basicSchemaIngestAggs = new ArrayList<>();
basicSchemaIngestAggs.add(new DoubleSumAggregatorFactory("dimSequential", "dimSequential"));
basicSchemaIngestAggs.add(new CountAggregatorFactory("rows"));

Interval basicSchemaDataInterval = new Interval(0, 1000000);

BenchmarkSchemaInfo basicSchema = new BenchmarkSchemaInfo(
basicSchemaColumns,
basicSchemaIngestAggs,
basicSchemaDataInterval,
false
);
SCHEMA_MAP.put("simpleFloat", basicSchema);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
import io.druid.segment.QueryableIndex;
import io.druid.segment.QueryableIndexSegment;
import io.druid.segment.column.ColumnConfig;
import io.druid.segment.column.ValueType;
import io.druid.segment.incremental.IncrementalIndex;
import io.druid.segment.incremental.IncrementalIndexSchema;
import io.druid.segment.incremental.OnheapIncrementalIndex;
Expand Down Expand Up @@ -237,8 +238,90 @@ private void setupQueries()

basicQueries.put("nested", queryA);
}

SCHEMA_QUERY_MAP.put("basic", basicQueries);

// simple one column schema, for testing performance difference between querying on numeric values as Strings and
// directly as longs
Map<String, GroupByQuery> simpleQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo simpleSchema = BenchmarkSchemas.SCHEMA_MAP.get("simple");

{ // simple.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(simpleSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory(
"rows",
"rows"
));
GroupByQuery queryA = GroupByQuery
.builder()
.setDataSource("blah")
.setQuerySegmentSpec(intervalSpec)
.setDimensions(Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.STRING)
))
.setAggregatorSpecs(
queryAggs
)
.setGranularity(QueryGranularity.fromString(queryGranularity))
.build();

simpleQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simple", simpleQueries);


Map<String, GroupByQuery> simpleLongQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo simpleLongSchema = BenchmarkSchemas.SCHEMA_MAP.get("simpleLong");
{ // simpleLong.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(simpleLongSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory(
"rows",
"rows"
));
GroupByQuery queryA = GroupByQuery
.builder()
.setDataSource("blah")
.setQuerySegmentSpec(intervalSpec)
.setDimensions(Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.LONG)
))
.setAggregatorSpecs(
queryAggs
)
.setGranularity(QueryGranularity.fromString(queryGranularity))
.build();

simpleLongQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simpleLong", simpleLongQueries);


Map<String, GroupByQuery> simpleFloatQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo simpleFloatSchema = BenchmarkSchemas.SCHEMA_MAP.get("simpleFloat");
{ // simpleFloat.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(simpleFloatSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory(
"rows",
"rows"
));
GroupByQuery queryA = GroupByQuery
.builder()
.setDataSource("blah")
.setQuerySegmentSpec(intervalSpec)
.setDimensions(Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.FLOAT)
))
.setAggregatorSpecs(
queryAggs
)
.setGranularity(QueryGranularity.fromString(queryGranularity))
.build();

simpleFloatQueries.put("A", queryA);
}
SCHEMA_QUERY_MAP.put("simpleFloat", simpleFloatQueries);
}

@Setup(Level.Trial)
Expand Down Expand Up @@ -278,7 +361,7 @@ public void setup() throws IOException
for (int i = 0; i < numSegments; i++) {
log.info("Generating rows for segment %d/%d", i + 1, numSegments);

final IncrementalIndex index = makeIncIndex();
final IncrementalIndex index = makeIncIndex(schemaInfo.isWithRollup());

for (int j = 0; j < rowsPerSegment; j++) {
final InputRow row = dataGenerator.nextRow();
Expand Down Expand Up @@ -393,13 +476,14 @@ public String getFormatString()
);
}

private IncrementalIndex makeIncIndex()
private IncrementalIndex makeIncIndex(boolean withRollup)
{
return new OnheapIncrementalIndex(
new IncrementalIndexSchema.Builder()
.withQueryGranularity(QueryGranularities.NONE)
.withMetrics(schemaInfo.getAggsArray())
.withDimensionsSpec(new DimensionsSpec(null, null, null))
.withRollup(withRollup)
.build(),
true,
false,
Expand Down
11 changes: 10 additions & 1 deletion docs/content/querying/dimensionspecs.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,18 @@ The following JSON fields can be used in a query to operate on dimension values.
Returns dimension values as is and optionally renames the dimension.

```json
{ "type" : "default", "dimension" : <dimension>, "outputName": <output_name> }
{
"type" : "default",
"dimension" : <dimension>,
"outputName": <output_name>,
"outputType": <"STRING"|"LONG"|"FLOAT">
}
```

When specifying a DimensionSpec on a numeric column, the user should include the type of the column in the `outputType` field. This is necessary as it is possible for a column with given name to have different value types in different segments: result merging may fail unless results of different type are converted to the type specified by `outputType`

If left unspecified, the `outputType` defaults to STRING.

### Extraction DimensionSpec

Returns dimension values transformed using the given [extraction function](#extraction-functions).
Expand Down
1 change: 0 additions & 1 deletion docs/content/querying/query-context.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,3 @@ The query context is used for various query configuration parameters.
|`maxResults`|500000|Maximum number of results groupBy query can process. Default value used can be changed by `druid.query.groupBy.maxResults` in druid configuration at broker and historical nodes. At query time you can only lower the value.|
|`maxIntermediateRows`|50000|Maximum number of intermediate rows while processing single segment for groupBy query. Default value used can be changed by `druid.query.groupBy.maxIntermediateRows` in druid configuration at broker and historical nodes. At query time you can only lower the value.|
|`groupByIsSingleThreaded`|false|Whether to run single threaded group By queries. Default value used can be changed by `druid.query.groupBy.singleThreaded` in druid configuration at historical nodes.|
|`typeHints`|{}| A map of column name -> column type (String, Long, Float). By default, druid returns all column values as strings within query results. If querying on a non-String column, `typeHints` must be included in a query, containing a mapping of the name of the non-String column to its desired return type. This is necessary because columns with the same name in different segments do not necessarily have the same value type, and a type must be chosen when merging results.|
18 changes: 0 additions & 18 deletions docs/content/querying/querying.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,21 +99,3 @@ Possible codes for the *error* field include:
|`Query cancelled`|The query was cancelled through the query cancellation API.|
|`Resource limit exceeded`|The query exceeded a configured resource limit (e.g. groupBy maxResults).|
|`Unknown exception`|Some other exception occurred. Check errorMessage and errorClass for details, although keep in mind that the contents of those fields are free-form and may change from release to release.|


Column Types
-----------
It is possible to run queries with non-String columns as dimensions, for example, grouping on a column previously ingested as a long metric.

When including a non-String column as a dimension, its desired return type must be specified in the query context. See `typeHints` in [Context](../querying/query-context.html) for more information.

For example, if a grouping on a long column named "longNumbers", the GroupBy query should contain a `typeHints` map within its query context:

```json

"context": {
"typeHints" : {
"longNumbers": "LONG"
}
}
```
Loading

0 comments on commit 95e2e50

Please sign in to comment.