Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support 'EXTRACT' as a scalar function #13463

Merged
merged 2 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,15 @@


/**
* Helper methods and constructs for datetrunc function
* Helper methods and constructs for date/time functions
*/
public class DateTimeUtils {
private DateTimeUtils() {
}

private static final Logger LOGGER = LoggerFactory.getLogger(DateTimeUtils.class);
private static final DateTimeFieldType QUARTER_OF_YEAR = new QuarterOfYearDateTimeField();
private static final Chronology CHRONOLOGY_UTC = ISOChronology.getInstanceUTC();

public static DateTimeField getTimestampField(ISOChronology chronology, String unitString) {
switch (unitString.toLowerCase()) {
Expand Down Expand Up @@ -152,4 +153,41 @@ public DurationField getField(Chronology chronology) {
}
}
}

/**
* The supported field types for the EXTRACT operator
*/
public enum ExtractFieldType {
YEAR, QUARTER, MONTH, WEEK, DAY, DOY, DOW, HOUR, MINUTE, SECOND
}

/**
* Helper method to implement the SQL <code>EXTRACT</code> operator.
*/
public static int extract(ExtractFieldType extractFieldType, long timestamp) {
switch (extractFieldType) {
case YEAR:
return CHRONOLOGY_UTC.year().get(timestamp);
case QUARTER:
return (CHRONOLOGY_UTC.monthOfYear().get(timestamp) - 1) / 3 + 1;
case MONTH:
return CHRONOLOGY_UTC.monthOfYear().get(timestamp);
case WEEK:
return CHRONOLOGY_UTC.weekOfWeekyear().get(timestamp);
case DAY:
return CHRONOLOGY_UTC.dayOfMonth().get(timestamp);
case DOY:
return CHRONOLOGY_UTC.dayOfYear().get(timestamp);
case DOW:
return CHRONOLOGY_UTC.dayOfWeek().get(timestamp);
case HOUR:
return CHRONOLOGY_UTC.hourOfDay().get(timestamp);
case MINUTE:
return CHRONOLOGY_UTC.minuteOfHour().get(timestamp);
case SECOND:
return CHRONOLOGY_UTC.secondOfMinute().get(timestamp);
default:
throw new IllegalArgumentException("Unsupported FIELD type");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1259,4 +1259,9 @@ public static long[] timestampDiffMVReverse(String unit, long timestamp1, long[]
}
return results;
}

@ScalarFunction
public static int extract(String interval, long timestamp) {
return DateTimeUtils.extract(DateTimeUtils.ExtractFieldType.valueOf(interval), timestamp);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -263,32 +263,25 @@ public void testQuotedStrings() {
@Test
public void testExtract() {
{
// Case 1 -- Year and date format ('2017-06-15')
PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(YEAR FROM '2017-06-15')");
Function function = pinotQuery.getSelectList().get(0).getFunctionCall();
Assert.assertEquals(function.getOperands().get(0).getLiteral().getStringValue(), "YEAR");
Assert.assertEquals(function.getOperands().get(1).getLiteral().getStringValue(), "2017-06-15");
// Case 1 -- Year
PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(YEAR FROM 1719573611000)");
// The CompileTimeFunctionsInvoker will rewrite the query to replace the function call with the resultant literal
// value
Assert.assertEquals(pinotQuery.getSelectList().get(0).getLiteral().getIntValue(), 2024);
}
{
// Case 2 -- date format ('2017-06-15 09:34:21')
PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(YEAR FROM '2017-06-15 09:34:21')");
Function function = pinotQuery.getSelectList().get(0).getFunctionCall();
Assert.assertEquals(function.getOperands().get(0).getLiteral().getStringValue(), "YEAR");
Assert.assertEquals(function.getOperands().get(1).getLiteral().getStringValue(), "2017-06-15 09:34:21");
// Case 2 -- Month
PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(MONTH FROM '1719573611000')");
// The CompileTimeFunctionsInvoker will rewrite the query to replace the function call with the resultant literal
// value
Assert.assertEquals(pinotQuery.getSelectList().get(0).getLiteral().getIntValue(), 6);
}
{
// Case 3 -- Month
PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(MONTH FROM '2017-06-15')");
Function function = pinotQuery.getSelectList().get(0).getFunctionCall();
Assert.assertEquals(function.getOperands().get(0).getLiteral().getStringValue(), "MONTH");
Assert.assertEquals(function.getOperands().get(1).getLiteral().getStringValue(), "2017-06-15");
}
{
// Case 4 -- Day
PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(DAY FROM '2017-06-15')");
Function function = pinotQuery.getSelectList().get(0).getFunctionCall();
Assert.assertEquals(function.getOperands().get(0).getLiteral().getStringValue(), "DAY");
Assert.assertEquals(function.getOperands().get(1).getLiteral().getStringValue(), "2017-06-15");
// Case 3 -- Day
PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(DAY FROM 1719573611000)");
// The CompileTimeFunctionsInvoker will rewrite the query to replace the function call with the resultant literal
// value
Assert.assertEquals(pinotQuery.getSelectList().get(0).getLiteral().getIntValue(), 28);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,18 @@

import java.util.List;
import java.util.Map;
import java.util.stream.IntStream;
import org.apache.pinot.common.function.DateTimeUtils;
import org.apache.pinot.core.operator.ColumnContext;
import org.apache.pinot.core.operator.blocks.ValueBlock;
import org.apache.pinot.core.operator.transform.TransformResultMetadata;
import org.joda.time.Chronology;
import org.joda.time.DateTimeField;
import org.joda.time.chrono.ISOChronology;
import org.roaringbitmap.RoaringBitmap;


public class ExtractTransformFunction extends BaseTransformFunction {
public static final String FUNCTION_NAME = "extract";
private TransformFunction _mainTransformFunction;
protected Field _field;
protected Chronology _chronology = ISOChronology.getInstanceUTC();

private enum Field {
YEAR, QUARTER, MONTH, WEEK, DAY, DOY, DOW, HOUR, MINUTE, SECOND
}
private DateTimeUtils.ExtractFieldType _field;

@Override
public String getName() {
Expand All @@ -51,7 +45,7 @@ public void init(List<TransformFunction> arguments, Map<String, ColumnContext> c
throw new IllegalArgumentException("Exactly 2 arguments are required for EXTRACT transform function");
}

_field = Field.valueOf(((LiteralTransformFunction) arguments.get(0)).getStringLiteral());
_field = DateTimeUtils.ExtractFieldType.valueOf(((LiteralTransformFunction) arguments.get(0)).getStringLiteral());
_mainTransformFunction = arguments.get(1);
}

Expand All @@ -65,60 +59,10 @@ public int[] transformToIntValuesSV(ValueBlock valueBlock) {
int numDocs = valueBlock.getNumDocs();
initIntValuesSV(numDocs);
long[] timestamps = _mainTransformFunction.transformToLongValuesSV(valueBlock);
convert(timestamps, numDocs, _intValuesSV);
IntStream.range(0, numDocs).forEach(i -> _intValuesSV[i] = DateTimeUtils.extract(_field, timestamps[i]));
return _intValuesSV;
}

private void convert(long[] timestamps, int numDocs, int[] output) {
for (int i = 0; i < numDocs; i++) {
DateTimeField accessor;
switch (_field) {
case YEAR:
accessor = _chronology.year();
output[i] = accessor.get(timestamps[i]);
break;
case QUARTER:
accessor = _chronology.monthOfYear();
output[i] = (accessor.get(timestamps[i]) - 1) / 3 + 1;
break;
case MONTH:
accessor = _chronology.monthOfYear();
output[i] = accessor.get(timestamps[i]);
break;
case WEEK:
accessor = _chronology.weekOfWeekyear();
output[i] = accessor.get(timestamps[i]);
break;
case DAY:
accessor = _chronology.dayOfMonth();
output[i] = accessor.get(timestamps[i]);
break;
case DOY:
accessor = _chronology.dayOfYear();
output[i] = accessor.get(timestamps[i]);
break;
case DOW:
accessor = _chronology.dayOfWeek();
output[i] = accessor.get(timestamps[i]);
break;
case HOUR:
accessor = _chronology.hourOfDay();
output[i] = accessor.get(timestamps[i]);
break;
case MINUTE:
accessor = _chronology.minuteOfHour();
output[i] = accessor.get(timestamps[i]);
break;
case SECOND:
accessor = _chronology.secondOfMinute();
output[i] = accessor.get(timestamps[i]);
break;
default:
throw new IllegalArgumentException("Unsupported FIELD type");
}
}
}

@Override
public RoaringBitmap getNullBitmap(ValueBlock valueBlock) {
return _mainTransformFunction.getNullBitmap(valueBlock);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,12 @@ public static Object[][] testCases() {
return new Object[][]{
//@formatter:off
{"year", (LongToIntFunction) DateTimeFunctions::year},
{"quarter", (LongToIntFunction) timestamp -> (DateTimeFunctions.monthOfYear(timestamp) - 1) / 3 + 1},
{"month", (LongToIntFunction) DateTimeFunctions::monthOfYear},
{"week", (LongToIntFunction) DateTimeFunctions::weekOfYear},
{"day", (LongToIntFunction) DateTimeFunctions::dayOfMonth},
{"doy", (LongToIntFunction) DateTimeFunctions::dayOfYear},
{"dow", (LongToIntFunction) DateTimeFunctions::dayOfWeek},
{"hour", (LongToIntFunction) DateTimeFunctions::hour},
{"minute", (LongToIntFunction) DateTimeFunctions::minute},
{"second", (LongToIntFunction) DateTimeFunctions::second},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.Objects;
import javax.annotation.Nullable;
import org.apache.calcite.avatica.util.ByteString;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.rel.logical.LogicalFilter;
Expand Down Expand Up @@ -233,6 +234,8 @@ private static Object getLiteralValue(RexLiteral rexLiteral) {
} else if (value instanceof ByteString) {
// BYTES
return ((ByteString) value).getBytes();
} else if (value instanceof TimeUnitRange) {
return ((TimeUnitRange) value).name();
} else {
return value;
}
Expand Down
Loading