Skip to content

Commit

Permalink
Add date and date_nanos conversion to the numeric_type sort option (#…
Browse files Browse the repository at this point in the history
…40199) (#40224)

This change adds an option to convert a `date` field to nanoseconds resolution
 and a `date_nanos` field to millisecond resolution when sorting.
The resolution of the sort can be set using the `numeric_type` option of the
field sort builder. The conversion is done at the shard level and is restricted
to dates from 1970 to 2262 for the nanoseconds resolution in order to avoid
numeric overflow.
  • Loading branch information
jimczi authored Mar 20, 2019
1 parent 5eb33f2 commit 3400483
Show file tree
Hide file tree
Showing 9 changed files with 305 additions and 48 deletions.
61 changes: 59 additions & 2 deletions docs/reference/search/request/sort.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ POST /_search

For numeric fields it is also possible to cast the values from one type
to another using the `numeric_type` option.
This option accepts the following values: [`"double", "long"`] and can be useful
for cross-index search if the sort field is mapped differently on some
This option accepts the following values: [`"double", "long", "date", "date_nanos"`]
and can be useful for cross-index search if the sort field is mapped differently on some
indices.

Consider for instance these two indices:
Expand Down Expand Up @@ -175,6 +175,63 @@ but note that in this case floating points are replaced by the largest
value that is less than or equal (greater than or equal if the value
is negative) to the argument and is equal to a mathematical integer.

This option can also be used to convert a `date` field that uses millisecond
resolution to a `date_nanos` field with nanosecond resolution.
Consider for instance these two indices:

[source,js]
--------------------------------------------------
PUT /index_double
{
"mappings": {
"properties": {
"field": { "type": "date" }
}
}
}
--------------------------------------------------
// CONSOLE

[source,js]
--------------------------------------------------
PUT /index_long
{
"mappings": {
"properties": {
"field": { "type": "date_nanos" }
}
}
}
--------------------------------------------------
// CONSOLE
// TEST[continued]

Values in these indices are stored with different resolutions so sorting on these
fields will always sort the `date` before the `date_nanos` (ascending order).
With the `numeric_type` type option it is possible to set a single resolution for
the sort, setting to `date` will convert the `date_nanos` to the millisecond resolution
while `date_nanos` will convert the values in the `date` field to the nanoseconds resolution:

[source,js]
--------------------------------------------------
POST /index_long,index_double/_search
{
"sort" : [
{
"field" : {
"numeric_type" : "date_nanos"
}
}
]
}
--------------------------------------------------
// CONSOLE
// TEST[continued]

[WARNING]
To avoid overflow, the conversion to `date_nanos` cannot be applied on dates before
1970 and after 2262 as nanoseconds are represented as longs.

[[nested-sorting]]
==== Sorting within nested objects.

Expand Down
24 changes: 22 additions & 2 deletions server/src/main/java/org/elasticsearch/common/time/DateUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ public static ZoneId of(String zoneId) {

private static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");

static final long MAX_NANOSECOND_IN_MILLIS = MAX_NANOSECOND_INSTANT.toEpochMilli();

/**
* convert a java time instant to a long value which is stored in lucene
* the long value resembles the nanoseconds since the epoch
Expand Down Expand Up @@ -117,7 +119,7 @@ public static long toLong(Instant instant) {
*/
public static Instant toInstant(long nanoSecondsSinceEpoch) {
if (nanoSecondsSinceEpoch < 0) {
throw new IllegalArgumentException("nanoseconds are [" + nanoSecondsSinceEpoch + "] are before the epoch in 1970 and cannot " +
throw new IllegalArgumentException("nanoseconds [" + nanoSecondsSinceEpoch + "] are before the epoch in 1970 and cannot " +
"be processed in nanosecond resolution");
}
if (nanoSecondsSinceEpoch == 0) {
Expand All @@ -129,6 +131,24 @@ public static Instant toInstant(long nanoSecondsSinceEpoch) {
return Instant.ofEpochSecond(seconds, nanos);
}

/**
* Convert a nanosecond timestamp in milliseconds
*
* @param milliSecondsSinceEpoch the millisecond since the epoch
* @return the nanoseconds since the epoch
*/
public static long toNanoSeconds(long milliSecondsSinceEpoch) {
if (milliSecondsSinceEpoch < 0) {
throw new IllegalArgumentException("milliSeconds [" + milliSecondsSinceEpoch + "] are before the epoch in 1970 and cannot " +
"be converted to nanoseconds");
} else if (milliSecondsSinceEpoch > MAX_NANOSECOND_IN_MILLIS) {
throw new IllegalArgumentException("milliSeconds [" + milliSecondsSinceEpoch + "] are after 2262-04-11T23:47:16.854775807 " +
"and cannot be converted to nanoseconds");
}

return milliSecondsSinceEpoch * 1_000_000;
}

/**
* Convert a nanosecond timestamp in milliseconds
*
Expand All @@ -137,7 +157,7 @@ public static Instant toInstant(long nanoSecondsSinceEpoch) {
*/
public static long toMilliSeconds(long nanoSecondsSinceEpoch) {
if (nanoSecondsSinceEpoch < 0) {
throw new IllegalArgumentException("nanoseconds are [" + nanoSecondsSinceEpoch + "] are before the epoch in 1970 and will " +
throw new IllegalArgumentException("nanoseconds are [" + nanoSecondsSinceEpoch + "] are before the epoch in 1970 and cannot " +
"be converted to milliseconds");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,31 +26,53 @@
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BitSet;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
import org.elasticsearch.index.fielddata.FieldData;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.plain.SortedNumericDVIndexFieldData;
import org.elasticsearch.search.MultiValueMode;

import java.io.IOException;
import java.util.function.Function;

/**
* Comparator source for long values.
*/
public class LongValuesComparatorSource extends IndexFieldData.XFieldComparatorSource {

private final IndexNumericFieldData indexFieldData;
private final Function<SortedNumericDocValues, SortedNumericDocValues> converter;

public LongValuesComparatorSource(IndexNumericFieldData indexFieldData, @Nullable Object missingValue, MultiValueMode sortMode,
Nested nested) {
public LongValuesComparatorSource(IndexNumericFieldData indexFieldData, @Nullable Object missingValue,
MultiValueMode sortMode, Nested nested) {
this(indexFieldData, missingValue, sortMode, nested, null);
}

public LongValuesComparatorSource(IndexNumericFieldData indexFieldData, @Nullable Object missingValue,
MultiValueMode sortMode, Nested nested,
Function<SortedNumericDocValues, SortedNumericDocValues> converter) {
super(missingValue, sortMode, nested);
this.indexFieldData = indexFieldData;
this.converter = converter;
}

@Override
public SortField.Type reducedType() {
return SortField.Type.LONG;
}

private SortedNumericDocValues loadDocValues(LeafReaderContext context) {
final AtomicNumericFieldData data = indexFieldData.load(context);
SortedNumericDocValues values;
if (data instanceof SortedNumericDVIndexFieldData.NanoSecondFieldData) {
values = ((SortedNumericDVIndexFieldData.NanoSecondFieldData) data).getLongValuesAsNanos();
} else {
values = data.getLongValues();
}
return converter != null ? converter.apply(values) : values;
}

@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());
Expand All @@ -61,7 +83,7 @@ public FieldComparator<?> newComparator(String fieldname, int numHits, int sortP
return new FieldComparator.LongComparator(numHits, null, null) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
final SortedNumericDocValues values = indexFieldData.load(context).getLongValues();
final SortedNumericDocValues values = loadDocValues(context);
final NumericDocValues selectedValues;
if (nested == null) {
selectedValues = FieldData.replaceMissing(sortMode.select(values), dMissingValue);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.function.LongUnaryOperator;

/**
* FieldData backed by {@link LeafReader#getSortedNumericDocValues(String)}
Expand All @@ -69,8 +70,8 @@ public SortedNumericDVIndexFieldData(Index index, String fieldNames, NumericType
* Values are casted to the provided <code>targetNumericType</code> type if it doesn't
* match the field's <code>numericType</code>.
*/
public SortField sortField(NumericType targetNumericType, Object missingValue, MultiValueMode sortMode, Nested nested,
boolean reverse) {
public SortField sortField(NumericType targetNumericType, Object missingValue, MultiValueMode sortMode,
Nested nested, boolean reverse) {
final XFieldComparatorSource source;
switch (targetNumericType) {
case HALF_FLOAT:
Expand All @@ -82,6 +83,26 @@ public SortField sortField(NumericType targetNumericType, Object missingValue, M
source = new DoubleValuesComparatorSource(this, missingValue, sortMode, nested);
break;

case DATE:
if (numericType == NumericType.DATE_NANOSECONDS) {
// converts date values to nanosecond resolution
source = new LongValuesComparatorSource(this, missingValue,
sortMode, nested, dvs -> convertNanosToMillis(dvs));
} else {
source = new LongValuesComparatorSource(this, missingValue, sortMode, nested);
}
break;

case DATE_NANOSECONDS:
if (numericType == NumericType.DATE) {
// converts date_nanos values to millisecond resolution
source = new LongValuesComparatorSource(this, missingValue,
sortMode, nested, dvs -> convertMillisToNanos(dvs));
} else {
source = new LongValuesComparatorSource(this, missingValue, sortMode, nested);
}
break;

default:
assert !targetNumericType.isFloatingPoint();
source = new LongValuesComparatorSource(this, missingValue, sortMode, nested);
Expand All @@ -93,9 +114,9 @@ public SortField sortField(NumericType targetNumericType, Object missingValue, M
* returns a custom sort field otherwise.
*/
if (nested != null
|| (sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN)
|| numericType == NumericType.HALF_FLOAT
|| targetNumericType != numericType) {
|| (sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN)
|| numericType == NumericType.HALF_FLOAT
|| targetNumericType != numericType) {
return new SortField(fieldName, source, reverse);
}

Expand Down Expand Up @@ -171,29 +192,7 @@ public final class NanoSecondFieldData extends AtomicLongFieldData {

@Override
public SortedNumericDocValues getLongValues() {
final SortedNumericDocValues dv = getLongValuesAsNanos();
return new AbstractSortedNumericDocValues() {

@Override
public boolean advanceExact(int target) throws IOException {
return dv.advanceExact(target);
}

@Override
public long nextValue() throws IOException {
return DateUtils.toMilliSeconds(dv.nextValue());
}

@Override
public int docValueCount() {
return dv.docValueCount();
}

@Override
public int nextDoc() throws IOException {
return dv.nextDoc();
}
};
return convertNanosToMillis(getLongValuesAsNanos());
}

public SortedNumericDocValues getLongValuesAsNanos() {
Expand Down Expand Up @@ -463,4 +462,47 @@ public Collection<Accountable> getChildResources() {
return Collections.emptyList();
}
}

/**
* Convert the values in <code>dvs</code> from nanosecond to millisecond resolution.
*/
static SortedNumericDocValues convertNanosToMillis(SortedNumericDocValues dvs) {
return convertNumeric(dvs, DateUtils::toMilliSeconds);
}

/**
* Convert the values in <code>dvs</code> from millisecond to nanosecond resolution.
*/
static SortedNumericDocValues convertMillisToNanos(SortedNumericDocValues values) {
return convertNumeric(values, DateUtils::toNanoSeconds);
}

/**
* Convert the values in <code>dvs</code> using the provided <code>converter</code>.
*/
private static SortedNumericDocValues convertNumeric(SortedNumericDocValues values, LongUnaryOperator converter) {
return new AbstractSortedNumericDocValues() {

@Override
public boolean advanceExact(int target) throws IOException {
return values.advanceExact(target);
}

@Override
public long nextValue() throws IOException {
return converter.applyAsLong(values.nextValue());
}

@Override
public int docValueCount() {
return values.docValueCount();
}

@Override
public int nextDoc() throws IOException {
return values.nextDoc();
}
};
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ public Builder format(String format) {
return this;
}

Builder withResolution(Resolution resolution) {
public Builder withResolution(Resolution resolution) {
this.resolution = resolution;
return this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,16 +304,19 @@ public String getNumericType() {
* Allowed values are <code>long</code> and <code>double</code>.
*/
public FieldSortBuilder setNumericType(String numericType) {
String upperCase = numericType.toUpperCase(Locale.ENGLISH);
switch (upperCase) {
case "LONG":
case "DOUBLE":
String lowerCase = numericType.toLowerCase(Locale.ENGLISH);
switch (lowerCase) {
case "long":
case "double":
case "date":
case "date_nanos":
break;

default:
throw new IllegalArgumentException("invalid value for [numeric_type], must be [LONG, DOUBLE], got " + numericType);
throw new IllegalArgumentException("invalid value for [numeric_type], " +
"must be [long, double, date, date_nanos], got " + lowerCase);
}
this.numericType = upperCase;
this.numericType = lowerCase;
return this;
}

Expand Down Expand Up @@ -348,6 +351,23 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
return builder;
}

private static NumericType resolveNumericType(String value) {
switch (value) {
case "long":
return NumericType.LONG;
case "double":
return NumericType.DOUBLE;
case "date":
return NumericType.DATE;
case "date_nanos":
return NumericType.DATE_NANOSECONDS;

default:
throw new IllegalArgumentException("invalid value for [numeric_type], " +
"must be [long, double, date, date_nanos], got " + value);
}
}

@Override
public SortFieldAndFormat build(QueryShardContext context) throws IOException {
if (DOC_FIELD_NAME.equals(fieldName)) {
Expand Down Expand Up @@ -404,7 +424,7 @@ public SortFieldAndFormat build(QueryShardContext context) throws IOException {
"[numeric_type] option cannot be set on a non-numeric field, got " + fieldType.typeName());
}
SortedNumericDVIndexFieldData numericFieldData = (SortedNumericDVIndexFieldData) fieldData;
NumericType resolvedType = NumericType.valueOf(numericType);
NumericType resolvedType = resolveNumericType(numericType);
field = numericFieldData.sortField(resolvedType, missing, localSortMode, nested, reverse);
} else {
field = fieldData.sortField(missing, localSortMode, nested, reverse);
Expand Down
Loading

0 comments on commit 3400483

Please sign in to comment.