diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java index d5ec3f250982..c2c1dd85306f 100644 --- a/api/src/main/java/org/apache/iceberg/Schema.java +++ b/api/src/main/java/org/apache/iceberg/Schema.java @@ -54,6 +54,8 @@ public class Schema implements Serializable { private static final Joiner NEWLINE = Joiner.on('\n'); private static final String ALL_COLUMNS = "*"; private static final int DEFAULT_SCHEMA_ID = 0; + private static final Map MIN_FORMAT_VERSIONS = + ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3); private final StructType struct; private final int schemaId; @@ -573,4 +575,27 @@ private List reassignIds(List columns, TypeUtil.GetID }); return res.asStructType().fields(); } + + /** + * Check the compatibility of the schema with a format version. + * + *

This validates that the schema does not contain types that were released in later format + * versions. + * + * @param schema a Schema + * @param formatVersion table format version + */ + public static void checkCompatibility(Schema schema, int formatVersion) { + // check the type in each field + for (NestedField field : schema.lazyIdToField().values()) { + Integer minFormatVersion = MIN_FORMAT_VERSIONS.get(field.type().typeId()); + Preconditions.checkState( + minFormatVersion == null || formatVersion >= minFormatVersion, + "Invalid type in v%s schema: %s %s is not supported until v%s", + formatVersion, + schema.findColumnName(field.fieldId()), + field.type(), + minFormatVersion); + } + } } diff --git a/api/src/main/java/org/apache/iceberg/expressions/BoundLiteralPredicate.java b/api/src/main/java/org/apache/iceberg/expressions/BoundLiteralPredicate.java index 02dc31c6a6c5..127d46e6a48f 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/BoundLiteralPredicate.java +++ b/api/src/main/java/org/apache/iceberg/expressions/BoundLiteralPredicate.java @@ -31,6 +31,7 @@ public class BoundLiteralPredicate extends BoundPredicate { Type.TypeID.LONG, Type.TypeID.DATE, Type.TypeID.TIME, + Type.TypeID.TIMESTAMP_NANO, Type.TypeID.TIMESTAMP); private static long toLong(Literal lit) { diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index 3708dafc4126..4d56529e62db 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -36,6 +36,7 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; /** Expression utility methods. */ public class ExpressionUtil { @@ -43,6 +44,7 @@ public class ExpressionUtil { Transforms.bucket(Integer.MAX_VALUE).bind(Types.StringType.get()); private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); private static final long FIVE_MINUTES_IN_MICROS = TimeUnit.MINUTES.toMicros(5); + private static final long FIVE_MINUTES_IN_NANOS = TimeUnit.MINUTES.toNanos(5); private static final long THREE_DAYS_IN_HOURS = TimeUnit.DAYS.toHours(3); private static final long NINETY_DAYS_IN_HOURS = TimeUnit.DAYS.toHours(90); private static final Pattern DATE = Pattern.compile("\\d{4}-\\d{2}-\\d{2}"); @@ -52,6 +54,12 @@ public class ExpressionUtil { private static final Pattern TIMESTAMPTZ = Pattern.compile( "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?([-+]\\d{2}:\\d{2}|Z)"); + private static final Pattern TIMESTAMPNS = + Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?"); + private static final Pattern TIMESTAMPTZNS = + Pattern.compile( + "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?([-+]\\d{2}:\\d{2}|Z)"); + static final int LONG_IN_PREDICATE_ABBREVIATION_THRESHOLD = 10; private static final int LONG_IN_PREDICATE_ABBREVIATION_MIN_GAIN = 5; @@ -515,6 +523,8 @@ private static String sanitize(Type type, Object value, long now, int today) { return "(time)"; case TIMESTAMP: return sanitizeTimestamp((long) value, now); + case TIMESTAMP_NANO: + return sanitizeTimestamp(DateTimeUtil.nanosToMicros((long) value / 1000), now); case STRING: return sanitizeString((CharSequence) value, now, today); case BOOLEAN: @@ -536,6 +546,9 @@ private static String sanitize(Literal literal, long now, int today) { return sanitizeDate(((Literals.DateLiteral) literal).value(), today); } else if (literal instanceof Literals.TimestampLiteral) { return sanitizeTimestamp(((Literals.TimestampLiteral) literal).value(), now); + } else if (literal instanceof Literals.TimestampNanoLiteral) { + return sanitizeTimestamp( + DateTimeUtil.nanosToMicros(((Literals.TimestampNanoLiteral) literal).value()), now); } else if (literal instanceof Literals.TimeLiteral) { return "(time)"; } else if (literal instanceof Literals.IntegerLiteral) { @@ -594,6 +607,12 @@ private static String sanitizeString(CharSequence value, long now, int today) { if (DATE.matcher(value).matches()) { Literal date = Literal.of(value).to(Types.DateType.get()); return sanitizeDate(date.value(), today); + } else if (TIMESTAMPNS.matcher(value).matches()) { + Literal ts = Literal.of(value).to(Types.TimestampNanoType.withoutZone()); + return sanitizeTimestamp(DateTimeUtil.nanosToMicros(ts.value()), now); + } else if (TIMESTAMPTZNS.matcher(value).matches()) { + Literal ts = Literal.of(value).to(Types.TimestampNanoType.withZone()); + return sanitizeTimestamp(DateTimeUtil.nanosToMicros(ts.value()), now); } else if (TIMESTAMP.matcher(value).matches()) { Literal ts = Literal.of(value).to(Types.TimestampType.withoutZone()); return sanitizeTimestamp(ts.value(), now); diff --git a/api/src/main/java/org/apache/iceberg/expressions/Literals.java b/api/src/main/java/org/apache/iceberg/expressions/Literals.java index 79d7190c49df..ee47035b1e72 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Literals.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Literals.java @@ -24,7 +24,6 @@ import java.nio.ByteBuffer; import java.time.Instant; import java.time.LocalDate; -import java.time.LocalDateTime; import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.ZoneOffset; @@ -40,6 +39,7 @@ import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.ByteBuffers; +import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.NaNUtil; class Literals { @@ -299,6 +299,9 @@ public Literal to(Type type) { return (Literal) new TimeLiteral(value()); case TIMESTAMP: return (Literal) new TimestampLiteral(value()); + case TIMESTAMP_NANO: + // assume micros and convert to nanos to match the behavior in the timestamp case above + return new TimestampLiteral(value()).to(type); case DATE: if ((long) Integer.MAX_VALUE < value()) { return aboveMax(); @@ -437,11 +440,9 @@ public Literal to(Type type) { case TIMESTAMP: return (Literal) this; case DATE: - return (Literal) - new DateLiteral( - (int) - ChronoUnit.DAYS.between( - EPOCH_DAY, EPOCH.plus(value(), ChronoUnit.MICROS).toLocalDate())); + return (Literal) new DateLiteral(DateTimeUtil.microsToDays(value())); + case TIMESTAMP_NANO: + return (Literal) new TimestampNanoLiteral(DateTimeUtil.microsToNanos(value())); default: } return null; @@ -453,6 +454,32 @@ protected Type.TypeID typeId() { } } + static class TimestampNanoLiteral extends ComparableLiteral { + TimestampNanoLiteral(Long value) { + super(value); + } + + @Override + @SuppressWarnings("unchecked") + public Literal to(Type type) { + switch (type.typeId()) { + case DATE: + return (Literal) new DateLiteral(DateTimeUtil.nanosToDays(value())); + case TIMESTAMP: + return (Literal) new TimestampLiteral(DateTimeUtil.nanosToMicros(value())); + case TIMESTAMP_NANO: + return (Literal) this; + default: + } + return null; + } + + @Override + protected Type.TypeID typeId() { + return Type.TypeID.TIMESTAMP_NANO; + } + } + static class DecimalLiteral extends ComparableLiteral { DecimalLiteral(BigDecimal value) { super(value); @@ -502,19 +529,21 @@ public Literal to(Type type) { case TIMESTAMP: if (((Types.TimestampType) type).shouldAdjustToUTC()) { - long timestampMicros = - ChronoUnit.MICROS.between( - EPOCH, OffsetDateTime.parse(value(), DateTimeFormatter.ISO_DATE_TIME)); + long timestampMicros = DateTimeUtil.isoTimestamptzToMicros(value().toString()); return (Literal) new TimestampLiteral(timestampMicros); } else { - long timestampMicros = - ChronoUnit.MICROS.between( - EPOCH, - LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .atOffset(ZoneOffset.UTC)); + long timestampMicros = DateTimeUtil.isoTimestampToMicros(value().toString()); return (Literal) new TimestampLiteral(timestampMicros); } + case TIMESTAMP_NANO: + if (((Types.TimestampNanoType) type).shouldAdjustToUTC()) { + return (Literal) + new TimestampNanoLiteral(DateTimeUtil.isoTimestamptzToNanos(value())); + } else { + return (Literal) new TimestampNanoLiteral(DateTimeUtil.isoTimestampToNanos(value())); + } + case STRING: return (Literal) this; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java index 912bcd271725..0e4e782cc110 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java @@ -33,6 +33,7 @@ import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.BucketUtil; +import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.SerializableFunction; class Bucket implements Transform, Serializable { @@ -63,6 +64,8 @@ static & SerializableFunction> B get( case FIXED: case BINARY: return (B) new BucketByteBuffer(numBuckets); + case TIMESTAMP_NANO: + return (B) new BucketTimestampNano(numBuckets); case UUID: return (B) new BucketUUID(numBuckets); default: @@ -107,6 +110,7 @@ public boolean canTransform(Type type) { case DATE: case TIME: case TIMESTAMP: + case TIMESTAMP_NANO: case STRING: case BINARY: case FIXED: @@ -214,6 +218,20 @@ protected int hash(Long value) { } } + // In order to bucket TimestampNano the same as Timestamp, convert to micros before hashing. + private static class BucketTimestampNano extends Bucket + implements SerializableFunction { + + private BucketTimestampNano(int numBuckets) { + super(numBuckets); + } + + @Override + protected int hash(Long nanos) { + return BucketUtil.hash(DateTimeUtil.nanosToMicros(nanos)); + } + } + private static class BucketString extends Bucket implements SerializableFunction { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Dates.java b/api/src/main/java/org/apache/iceberg/transforms/Dates.java index 3d26b542be7b..88db16797867 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Dates.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Dates.java @@ -97,6 +97,10 @@ public Type getResultType(Type sourceType) { return Types.IntegerType.get(); } + ChronoUnit granularity() { + return granularity; + } + @Override public boolean preservesOrder() { return true; @@ -109,11 +113,11 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Dates) { - // test the granularity, in days. day(ts) => 1 day, months(ts) => 30 days, and day satisfies - // the order of months - Dates otherTransform = (Dates) other; - return granularity.getDuration().toDays() - <= otherTransform.granularity.getDuration().toDays(); + return TransformUtil.satisfiesOrderOf(granularity, ((Dates) other).granularity()); + } else if (other instanceof Timestamps) { + return TransformUtil.satisfiesOrderOf(granularity, ((Timestamps) other).granularity()); + } else if (other instanceof TimeTransform) { + return TransformUtil.satisfiesOrderOf(granularity, ((TimeTransform) other).granularity()); } return false; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Days.java b/api/src/main/java/org/apache/iceberg/transforms/Days.java index f69d5d6110ed..e2b829b86662 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Days.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Days.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -31,38 +32,19 @@ static Days get() { } @Override - @SuppressWarnings("unchecked") - protected Transform toEnum(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.DAY; - case TIMESTAMP: - return (Transform) Timestamps.DAY; - default: - throw new IllegalArgumentException("Unsupported type: " + type); - } + protected ChronoUnit granularity() { + return ChronoUnit.DAYS; } @Override - public Type getResultType(Type sourceType) { - return Types.DateType.get(); + protected Transform toEnum(Type type) { + return (Transform) + fromSourceType(type, Dates.DAY, Timestamps.MICROS_TO_DAY, Timestamps.NANOS_TO_DAY); } @Override - public boolean satisfiesOrderOf(Transform other) { - if (this == other) { - return true; - } - - if (other instanceof Timestamps) { - return Timestamps.DAY.satisfiesOrderOf(other); - } else if (other instanceof Dates) { - return Dates.DAY.satisfiesOrderOf(other); - } else if (other instanceof Days || other instanceof Months || other instanceof Years) { - return true; - } - - return false; + public Type getResultType(Type sourceType) { + return Types.DateType.get(); } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Hours.java b/api/src/main/java/org/apache/iceberg/transforms/Hours.java index afc14516f3cd..2ff79f6a66a7 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Hours.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Hours.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -30,19 +31,21 @@ static Hours get() { return (Hours) INSTANCE; } + @Override + protected ChronoUnit granularity() { + return ChronoUnit.HOURS; + } + @Override @SuppressWarnings("unchecked") protected Transform toEnum(Type type) { - if (type.typeId() == Type.TypeID.TIMESTAMP) { - return (Transform) Timestamps.HOUR; - } - - throw new IllegalArgumentException("Unsupported type: " + type); + return (Transform) + fromSourceType(type, null, Timestamps.MICROS_TO_HOUR, Timestamps.NANOS_TO_HOUR); } @Override public boolean canTransform(Type type) { - return type.typeId() == Type.TypeID.TIMESTAMP; + return type.typeId() == Type.TypeID.TIMESTAMP || type.typeId() == Type.TypeID.TIMESTAMP_NANO; } @Override @@ -50,24 +53,6 @@ public Type getResultType(Type sourceType) { return Types.IntegerType.get(); } - @Override - public boolean satisfiesOrderOf(Transform other) { - if (this == other) { - return true; - } - - if (other instanceof Timestamps) { - return other == Timestamps.HOUR; - } else if (other instanceof Hours - || other instanceof Days - || other instanceof Months - || other instanceof Years) { - return true; - } - - return false; - } - @Override public String toHumanString(Type alwaysInt, Integer value) { return value != null ? TransformUtil.humanHour(value) : "null"; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Months.java b/api/src/main/java/org/apache/iceberg/transforms/Months.java index 8fa4d42385f7..73ec50e5dd9a 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Months.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Months.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -31,38 +32,19 @@ static Months get() { } @Override - @SuppressWarnings("unchecked") - protected Transform toEnum(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.MONTH; - case TIMESTAMP: - return (Transform) Timestamps.MONTH; - default: - throw new IllegalArgumentException("Unsupported type: " + type); - } + protected ChronoUnit granularity() { + return ChronoUnit.MONTHS; } @Override - public Type getResultType(Type sourceType) { - return Types.IntegerType.get(); + protected Transform toEnum(Type type) { + return (Transform) + fromSourceType(type, Dates.MONTH, Timestamps.MICROS_TO_MONTH, Timestamps.NANOS_TO_MONTH); } @Override - public boolean satisfiesOrderOf(Transform other) { - if (this == other) { - return true; - } - - if (other instanceof Timestamps) { - return Timestamps.MONTH.satisfiesOrderOf(other); - } else if (other instanceof Dates) { - return Dates.MONTH.satisfiesOrderOf(other); - } else if (other instanceof Months || other instanceof Years) { - return true; - } - - return false; + public Type getResultType(Type sourceType) { + return Types.IntegerType.get(); } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java index e4796478bf28..0d80ef88a296 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java @@ -122,16 +122,23 @@ static R visit(Schema schema, PartitionField field, PartitionSpecVisitor int width = ((Truncate) transform).width(); return visitor.truncate(field.fieldId(), sourceName, field.sourceId(), width); } else if (transform == Dates.YEAR - || transform == Timestamps.YEAR + || transform == Timestamps.MICROS_TO_YEAR + || transform == Timestamps.NANOS_TO_YEAR || transform instanceof Years) { return visitor.year(field.fieldId(), sourceName, field.sourceId()); } else if (transform == Dates.MONTH - || transform == Timestamps.MONTH + || transform == Timestamps.MICROS_TO_MONTH + || transform == Timestamps.NANOS_TO_MONTH || transform instanceof Months) { return visitor.month(field.fieldId(), sourceName, field.sourceId()); - } else if (transform == Dates.DAY || transform == Timestamps.DAY || transform instanceof Days) { + } else if (transform == Dates.DAY + || transform == Timestamps.MICROS_TO_DAY + || transform == Timestamps.NANOS_TO_DAY + || transform instanceof Days) { return visitor.day(field.fieldId(), sourceName, field.sourceId()); - } else if (transform == Timestamps.HOUR || transform instanceof Hours) { + } else if (transform == Timestamps.MICROS_TO_HOUR + || transform == Timestamps.NANOS_TO_HOUR + || transform instanceof Hours) { return visitor.hour(field.fieldId(), sourceName, field.sourceId()); } else if (transform instanceof VoidTransform) { return visitor.alwaysNull(field.fieldId(), sourceName, field.sourceId()); diff --git a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java index 680e095270fb..62cc9d3cdb33 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java @@ -85,21 +85,26 @@ static List visit(SortOrder sortOrder, SortOrderVisitor visitor) { visitor.truncate( sourceName, field.sourceId(), width, field.direction(), field.nullOrder())); } else if (transform == Dates.YEAR - || transform == Timestamps.YEAR + || transform == Timestamps.MICROS_TO_YEAR + || transform == Timestamps.NANOS_TO_YEAR || transform instanceof Years) { results.add( visitor.year(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform == Dates.MONTH - || transform == Timestamps.MONTH + || transform == Timestamps.MICROS_TO_MONTH + || transform == Timestamps.NANOS_TO_MONTH || transform instanceof Months) { results.add( visitor.month(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform == Dates.DAY - || transform == Timestamps.DAY + || transform == Timestamps.MICROS_TO_DAY + || transform == Timestamps.NANOS_TO_DAY || transform instanceof Days) { results.add( visitor.day(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if (transform == Timestamps.HOUR || transform instanceof Hours) { + } else if (transform == Timestamps.MICROS_TO_HOUR + || transform == Timestamps.NANOS_TO_HOUR + || transform instanceof Hours) { results.add( visitor.hour(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform instanceof UnknownTransform) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java b/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java index 01ea8130aa60..c348fda52b02 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java @@ -18,6 +18,7 @@ */ package org.apache.iceberg.transforms; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.BoundTransform; import org.apache.iceberg.expressions.UnboundPredicate; @@ -25,6 +26,24 @@ import org.apache.iceberg.util.SerializableFunction; abstract class TimeTransform implements Transform { + protected static R fromSourceType(Type type, R dateResult, R microsResult, R nanosResult) { + switch (type.typeId()) { + case DATE: + if (dateResult != null) { + return dateResult; + } + break; + case TIMESTAMP: + return microsResult; + case TIMESTAMP_NANO: + return nanosResult; + } + + throw new IllegalArgumentException("Unsupported type: " + type); + } + + protected abstract ChronoUnit granularity(); + protected abstract Transform toEnum(Type type); @Override @@ -37,9 +56,29 @@ public boolean preservesOrder() { return true; } + @Override + public boolean satisfiesOrderOf(Transform other) { + if (this == other) { + return true; + } + + if (other instanceof Dates) { + return TransformUtil.satisfiesOrderOf(granularity(), ((Dates) other).granularity()); + } else if (other instanceof Timestamps) { + return TransformUtil.satisfiesOrderOf(granularity(), ((Timestamps) other).granularity()); + } else if (other instanceof TimeTransform) { + return TransformUtil.satisfiesOrderOf( + granularity(), ((TimeTransform) other).granularity()); + } + + return false; + } + @Override public boolean canTransform(Type type) { - return type.typeId() == Type.TypeID.DATE || type.typeId() == Type.TypeID.TIMESTAMP; + return type.typeId() == Type.TypeID.DATE + || type.typeId() == Type.TypeID.TIMESTAMP + || type.typeId() == Type.TypeID.TIMESTAMP_NANO; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index b5b50e9d42b2..8b8c2ca0a96b 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -32,53 +32,29 @@ import org.apache.iceberg.util.SerializableFunction; enum Timestamps implements Transform { - YEAR(ChronoUnit.YEARS, "year"), - MONTH(ChronoUnit.MONTHS, "month"), - DAY(ChronoUnit.DAYS, "day"), - HOUR(ChronoUnit.HOURS, "hour"); + MICROS_TO_YEAR(ChronoUnit.YEARS, "year", MicrosToYears.INSTANCE), + MICROS_TO_MONTH(ChronoUnit.MONTHS, "month", MicrosToMonths.INSTANCE), + MICROS_TO_DAY(ChronoUnit.DAYS, "day", MicrosToDays.INSTANCE), + MICROS_TO_HOUR(ChronoUnit.HOURS, "hour", MicrosToHours.INSTANCE), - @Immutable - static class Apply implements SerializableFunction { - private final ChronoUnit granularity; - - Apply(ChronoUnit granularity) { - this.granularity = granularity; - } - - @Override - public Integer apply(Long timestampMicros) { - if (timestampMicros == null) { - return null; - } - - switch (granularity) { - case YEARS: - return DateTimeUtil.microsToYears(timestampMicros); - case MONTHS: - return DateTimeUtil.microsToMonths(timestampMicros); - case DAYS: - return DateTimeUtil.microsToDays(timestampMicros); - case HOURS: - return DateTimeUtil.microsToHours(timestampMicros); - default: - throw new UnsupportedOperationException("Unsupported time unit: " + granularity); - } - } - } + NANOS_TO_YEAR(ChronoUnit.YEARS, "year", NanosToYears.INSTANCE), + NANOS_TO_MONTH(ChronoUnit.MONTHS, "month", NanosToMonths.INSTANCE), + NANOS_TO_DAY(ChronoUnit.DAYS, "day", NanosToDays.INSTANCE), + NANOS_TO_HOUR(ChronoUnit.HOURS, "hour", NanosToHours.INSTANCE); private final ChronoUnit granularity; private final String name; - private final Apply apply; + private final SerializableFunction apply; - Timestamps(ChronoUnit granularity, String name) { - this.granularity = granularity; + Timestamps(ChronoUnit granularity, String name, SerializableFunction apply) { this.name = name; - this.apply = new Apply(granularity); + this.granularity = granularity; + this.apply = apply; } @Override - public Integer apply(Long timestampMicros) { - return apply.apply(timestampMicros); + public Integer apply(Long timestamp) { + return apply.apply(timestamp); } @Override @@ -89,7 +65,7 @@ public SerializableFunction bind(Type type) { @Override public boolean canTransform(Type type) { - return type.typeId() == Type.TypeID.TIMESTAMP; + return type.typeId() == Type.TypeID.TIMESTAMP || type.typeId() == Type.TypeID.TIMESTAMP_NANO; } @Override @@ -100,6 +76,10 @@ public Type getResultType(Type sourceType) { return Types.IntegerType.get(); } + ChronoUnit granularity() { + return granularity; + } + @Override public boolean preservesOrder() { return true; @@ -111,12 +91,12 @@ public boolean satisfiesOrderOf(Transform other) { return true; } - if (other instanceof Timestamps) { - // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies - // the order of day - Timestamps otherTransform = (Timestamps) other; - return granularity.getDuration().toHours() - <= otherTransform.granularity.getDuration().toHours(); + if (other instanceof Dates) { + return TransformUtil.satisfiesOrderOf(granularity, ((Dates) other).granularity()); + } else if (other instanceof Timestamps) { + return TransformUtil.satisfiesOrderOf(granularity, ((Timestamps) other).granularity()); + } else if (other instanceof TimeTransform) { + return TransformUtil.satisfiesOrderOf(granularity, ((TimeTransform) other).granularity()); } return false; @@ -197,4 +177,116 @@ public String toString() { public String dedupName() { return "time"; } + + @Immutable + static class MicrosToYears implements SerializableFunction { + static final MicrosToYears INSTANCE = new MicrosToYears(); + + @Override + public Integer apply(Long micros) { + if (micros == null) { + return null; + } + + return DateTimeUtil.microsToYears(micros); + } + } + + @Immutable + static class MicrosToMonths implements SerializableFunction { + static final MicrosToMonths INSTANCE = new MicrosToMonths(); + + @Override + public Integer apply(Long micros) { + if (micros == null) { + return null; + } + + return DateTimeUtil.microsToMonths(micros); + } + } + + @Immutable + static class MicrosToDays implements SerializableFunction { + static final MicrosToDays INSTANCE = new MicrosToDays(); + + @Override + public Integer apply(Long micros) { + if (micros == null) { + return null; + } + + return DateTimeUtil.microsToDays(micros); + } + } + + @Immutable + static class MicrosToHours implements SerializableFunction { + static final MicrosToHours INSTANCE = new MicrosToHours(); + + @Override + public Integer apply(Long micros) { + if (micros == null) { + return null; + } + + return DateTimeUtil.microsToHours(micros); + } + } + + @Immutable + static class NanosToYears implements SerializableFunction { + static final NanosToYears INSTANCE = new NanosToYears(); + + @Override + public Integer apply(Long nanos) { + if (nanos == null) { + return null; + } + + return DateTimeUtil.nanosToYears(nanos); + } + } + + @Immutable + static class NanosToMonths implements SerializableFunction { + static final NanosToMonths INSTANCE = new NanosToMonths(); + + @Override + public Integer apply(Long nanos) { + if (nanos == null) { + return null; + } + + return DateTimeUtil.nanosToMonths(nanos); + } + } + + @Immutable + static class NanosToDays implements SerializableFunction { + static final NanosToDays INSTANCE = new NanosToDays(); + + @Override + public Integer apply(Long nanos) { + if (nanos == null) { + return null; + } + + return DateTimeUtil.nanosToDays(nanos); + } + } + + @Immutable + static class NanosToHours implements SerializableFunction { + static final NanosToHours INSTANCE = new NanosToHours(); + + @Override + public Integer apply(Long nanos) { + if (nanos == null) { + return null; + } + + return DateTimeUtil.nanosToHours(nanos); + } + } } diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transform.java b/api/src/main/java/org/apache/iceberg/transforms/Transform.java index 5a56b672b1b1..78312b58b12f 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transform.java @@ -181,6 +181,12 @@ default String toHumanString(Type type, T value) { } else { return TransformUtil.humanTimestampWithoutZone((Long) value); } + case TIMESTAMP_NANO: + if (((Types.TimestampNanoType) type).shouldAdjustToUTC()) { + return TransformUtil.humanTimestampNanoWithZone((Long) value); + } else { + return TransformUtil.humanTimestampNanoWithoutZone((Long) value); + } case FIXED: case BINARY: if (value instanceof ByteBuffer) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java index 53bc23a49888..dd7f97e950e8 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java @@ -26,6 +26,7 @@ import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; import java.util.Base64; +import org.apache.iceberg.util.DateTimeUtil; class TransformUtil { @@ -55,11 +56,19 @@ static String humanTime(Long microsFromMidnight) { } static String humanTimestampWithZone(Long timestampMicros) { - return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toString(); + return DateTimeUtil.microsToIsoTimestamptz(timestampMicros); } static String humanTimestampWithoutZone(Long timestampMicros) { - return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toLocalDateTime().toString(); + return DateTimeUtil.microsToIsoTimestamp(timestampMicros); + } + + static String humanTimestampNanoWithZone(Long timestampNanos) { + return DateTimeUtil.nanosToIsoTimestamptz(timestampNanos); + } + + static String humanTimestampNanoWithoutZone(Long timestampNanos) { + return DateTimeUtil.nanosToIsoTimestamp(timestampNanos); } static String humanHour(int hourOrdinal) { @@ -73,4 +82,10 @@ static String base64encode(ByteBuffer buffer) { // use direct encoding because all of the encoded bytes are in ASCII return StandardCharsets.ISO_8859_1.decode(Base64.getEncoder().encode(buffer)).toString(); } + + static boolean satisfiesOrderOf(ChronoUnit leftGranularity, ChronoUnit rightGranularity) { + // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies + // the order of day + return leftGranularity.getDuration().toHours() <= rightGranularity.getDuration().toHours(); + } } diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index a1ce33ddd6da..11282efdefb1 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -23,7 +23,6 @@ import java.util.regex.Pattern; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Type; /** @@ -68,6 +67,8 @@ private Transforms() {} return new UnknownTransform<>(transform); } + /** @deprecated use {@link #identity()} instead; will be removed in 2.0.0 */ + @Deprecated public static Transform fromString(Type type, String transform) { Matcher widthMatcher = HAS_WIDTH.matcher(transform); if (widthMatcher.matches()) { @@ -80,22 +81,20 @@ private Transforms() {} } } - if (transform.equalsIgnoreCase("identity")) { - return Identity.get(type); - } - - try { - if (type.typeId() == Type.TypeID.TIMESTAMP) { - return Timestamps.valueOf(transform.toUpperCase(Locale.ENGLISH)); - } else if (type.typeId() == Type.TypeID.DATE) { - return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH)); - } - } catch (IllegalArgumentException ignored) { - // fall through to return unknown transform - } - - if (transform.equalsIgnoreCase("void")) { - return VoidTransform.get(); + String lowerTransform = transform.toLowerCase(Locale.ENGLISH); + switch (lowerTransform) { + case "identity": + return Identity.get(type); + case "year": + return Years.get().toEnum(type); + case "month": + return Months.get().toEnum(type); + case "day": + return Days.get().toEnum(type); + case "hour": + return Hours.get().toEnum(type); + case "void": + return VoidTransform.get(); } return new UnknownTransform<>(transform); @@ -125,14 +124,7 @@ public static Transform identity(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform year(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.YEAR; - case TIMESTAMP: - return (Transform) Timestamps.YEAR; - default: - throw new IllegalArgumentException("Cannot partition type " + type + " by year"); - } + return (Transform) Years.get().toEnum(type); } /** @@ -146,14 +138,7 @@ public static Transform year(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform month(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.MONTH; - case TIMESTAMP: - return (Transform) Timestamps.MONTH; - default: - throw new IllegalArgumentException("Cannot partition type " + type + " by month"); - } + return (Transform) Months.get().toEnum(type); } /** @@ -167,14 +152,7 @@ public static Transform month(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform day(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.DAY; - case TIMESTAMP: - return (Transform) Timestamps.DAY; - default: - throw new IllegalArgumentException("Cannot partition type " + type + " by day"); - } + return (Transform) Days.get().toEnum(type); } /** @@ -188,9 +166,7 @@ public static Transform day(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform hour(Type type) { - Preconditions.checkArgument( - type.typeId() == Type.TypeID.TIMESTAMP, "Cannot partition type %s by hour", type); - return (Transform) Timestamps.HOUR; + return (Transform) Hours.get().toEnum(type); } /** diff --git a/api/src/main/java/org/apache/iceberg/transforms/Years.java b/api/src/main/java/org/apache/iceberg/transforms/Years.java index 6c1eee578506..2920a37dc692 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Years.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Years.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -31,38 +32,19 @@ static Years get() { } @Override - @SuppressWarnings("unchecked") - protected Transform toEnum(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.YEAR; - case TIMESTAMP: - return (Transform) Timestamps.YEAR; - default: - throw new IllegalArgumentException("Unsupported type: " + type); - } + protected ChronoUnit granularity() { + return ChronoUnit.YEARS; } @Override - public Type getResultType(Type sourceType) { - return Types.IntegerType.get(); + protected Transform toEnum(Type type) { + return (Transform) + fromSourceType(type, Dates.YEAR, Timestamps.MICROS_TO_YEAR, Timestamps.NANOS_TO_YEAR); } @Override - public boolean satisfiesOrderOf(Transform other) { - if (this == other) { - return true; - } - - if (other instanceof Timestamps) { - return Timestamps.YEAR.satisfiesOrderOf(other); - } else if (other instanceof Dates) { - return Dates.YEAR.satisfiesOrderOf(other); - } else if (other instanceof Years) { - return true; - } - - return false; + public Type getResultType(Type sourceType) { + return Types.IntegerType.get(); } @Override diff --git a/api/src/main/java/org/apache/iceberg/types/Comparators.java b/api/src/main/java/org/apache/iceberg/types/Comparators.java index d09d9f5395ce..36664d3121cf 100644 --- a/api/src/main/java/org/apache/iceberg/types/Comparators.java +++ b/api/src/main/java/org/apache/iceberg/types/Comparators.java @@ -41,6 +41,8 @@ private Comparators() {} .put(Types.TimeType.get(), Comparator.naturalOrder()) .put(Types.TimestampType.withZone(), Comparator.naturalOrder()) .put(Types.TimestampType.withoutZone(), Comparator.naturalOrder()) + .put(Types.TimestampNanoType.withZone(), Comparator.naturalOrder()) + .put(Types.TimestampNanoType.withoutZone(), Comparator.naturalOrder()) .put(Types.StringType.get(), Comparators.charSequences()) .put(Types.UUIDType.get(), Comparator.naturalOrder()) .put(Types.BinaryType.get(), Comparators.unsignedBytes()) diff --git a/api/src/main/java/org/apache/iceberg/types/Conversions.java b/api/src/main/java/org/apache/iceberg/types/Conversions.java index 1d2539514954..e18c7b4362e6 100644 --- a/api/src/main/java/org/apache/iceberg/types/Conversions.java +++ b/api/src/main/java/org/apache/iceberg/types/Conversions.java @@ -97,6 +97,7 @@ public static ByteBuffer toByteBuffer(Type.TypeID typeId, Object value) { case LONG: case TIME: case TIMESTAMP: + case TIMESTAMP_NANO: return ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(0, (long) value); case FLOAT: return ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putFloat(0, (float) value); @@ -146,6 +147,7 @@ private static Object internalFromByteBuffer(Type type, ByteBuffer buffer) { case LONG: case TIME: case TIMESTAMP: + case TIMESTAMP_NANO: if (tmp.remaining() < 8) { // type was later promoted to long return (long) tmp.getInt(); diff --git a/api/src/main/java/org/apache/iceberg/types/Type.java b/api/src/main/java/org/apache/iceberg/types/Type.java index 5062b54d10e1..571bf9a14e43 100644 --- a/api/src/main/java/org/apache/iceberg/types/Type.java +++ b/api/src/main/java/org/apache/iceberg/types/Type.java @@ -37,6 +37,7 @@ enum TypeID { DATE(Integer.class), TIME(Long.class), TIMESTAMP(Long.class), + TIMESTAMP_NANO(Long.class), STRING(CharSequence.class), UUID(java.util.UUID.class), FIXED(ByteBuffer.class), diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java index 07d06dcc5a89..1a5d336abf19 100644 --- a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java +++ b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java @@ -496,6 +496,7 @@ private static int estimateSize(Type type) { case DOUBLE: case TIME: case TIMESTAMP: + case TIMESTAMP_NANO: // longs and doubles occupy 8 bytes // times and timestamps are internally represented as longs return 8; diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index ce6caa4721df..2352b9b52f13 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -49,6 +49,8 @@ private Types() {} .put(TimeType.get().toString(), TimeType.get()) .put(TimestampType.withZone().toString(), TimestampType.withZone()) .put(TimestampType.withoutZone().toString(), TimestampType.withoutZone()) + .put(TimestampNanoType.withZone().toString(), TimestampNanoType.withZone()) + .put(TimestampNanoType.withoutZone().toString(), TimestampNanoType.withoutZone()) .put(StringType.get().toString(), StringType.get()) .put(UUIDType.get().toString(), UUIDType.get()) .put(BinaryType.get().toString(), BinaryType.get()) @@ -259,6 +261,59 @@ public int hashCode() { } } + public static class TimestampNanoType extends PrimitiveType { + private static final TimestampNanoType INSTANCE_WITH_ZONE = new TimestampNanoType(true); + private static final TimestampNanoType INSTANCE_WITHOUT_ZONE = new TimestampNanoType(false); + + public static TimestampNanoType withZone() { + return INSTANCE_WITH_ZONE; + } + + public static TimestampNanoType withoutZone() { + return INSTANCE_WITHOUT_ZONE; + } + + private final boolean adjustToUTC; + + private TimestampNanoType(boolean adjustToUTC) { + this.adjustToUTC = adjustToUTC; + } + + public boolean shouldAdjustToUTC() { + return adjustToUTC; + } + + @Override + public TypeID typeId() { + return TypeID.TIMESTAMP_NANO; + } + + @Override + public String toString() { + if (shouldAdjustToUTC()) { + return "timestamptz_ns"; + } else { + return "timestamp_ns"; + } + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } else if (!(other instanceof TimestampNanoType)) { + return false; + } + + return adjustToUTC == ((TimestampNanoType) other).adjustToUTC; + } + + @Override + public int hashCode() { + return Objects.hash(TimestampNanoType.class, adjustToUTC); + } + } + public static class StringType extends PrimitiveType { private static final StringType INSTANCE = new StringType(); diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index a2f5301f44a9..3c312486be00 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -35,6 +35,15 @@ private DateTimeUtil() {} public static final LocalDate EPOCH_DAY = EPOCH.toLocalDate(); public static final long MICROS_PER_MILLIS = 1000L; public static final long MICROS_PER_SECOND = 1_000_000L; + private static final long NANOS_PER_SECOND = 1_000_000_000L; + private static final long NANOS_PER_MICRO = 1_000L; + + private static final DateTimeFormatter FORMATTER = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .appendOffset("+HH:MM:ss", "+00:00") + .toFormatter(); public static LocalDate dateFromDays(int daysFromEpoch) { return ChronoUnit.DAYS.addTo(EPOCH_DAY, daysFromEpoch); @@ -60,6 +69,10 @@ public static LocalDateTime timestampFromMicros(long microsFromEpoch) { return ChronoUnit.MICROS.addTo(EPOCH, microsFromEpoch).toLocalDateTime(); } + public static LocalDateTime timestampFromNanos(long nanosFromEpoch) { + return ChronoUnit.NANOS.addTo(EPOCH, nanosFromEpoch).toLocalDateTime(); + } + public static long microsFromInstant(Instant instant) { return ChronoUnit.MICROS.between(EPOCH, instant.atOffset(ZoneOffset.UTC)); } @@ -68,6 +81,10 @@ public static long microsFromTimestamp(LocalDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); } + public static long nanosFromTimestamp(LocalDateTime dateTime) { + return ChronoUnit.NANOS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); + } + public static long microsToMillis(long micros) { // When the timestamp is negative, i.e before 1970, we need to adjust the milliseconds portion. // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision. @@ -75,6 +92,14 @@ public static long microsToMillis(long micros) { return Math.floorDiv(micros, MICROS_PER_MILLIS); } + public static long nanosToMicros(long nanos) { + return Math.floorDiv(nanos, NANOS_PER_MICRO); + } + + public static long microsToNanos(long micros) { + return Math.multiplyExact(micros, NANOS_PER_MICRO); + } + public static OffsetDateTime timestamptzFromMicros(long microsFromEpoch) { return ChronoUnit.MICROS.addTo(EPOCH, microsFromEpoch); } @@ -83,6 +108,10 @@ public static long microsFromTimestamptz(OffsetDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime); } + public static long nanosFromTimestamptz(OffsetDateTime dateTime) { + return ChronoUnit.NANOS.between(EPOCH, dateTime); + } + public static String formatTimestampMillis(long millis) { return Instant.ofEpochMilli(millis).toString().replace("Z", "+00:00"); } @@ -97,13 +126,12 @@ public static String microsToIsoTime(long micros) { public static String microsToIsoTimestamptz(long micros) { LocalDateTime localDateTime = timestampFromMicros(micros); - DateTimeFormatter zeroOffsetFormatter = - new DateTimeFormatterBuilder() - .parseCaseInsensitive() - .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .appendOffset("+HH:MM:ss", "+00:00") - .toFormatter(); - return localDateTime.atOffset(ZoneOffset.UTC).format(zeroOffsetFormatter); + return localDateTime.atOffset(ZoneOffset.UTC).format(FORMATTER); + } + + public static String nanosToIsoTimestamptz(long nanos) { + LocalDateTime localDateTime = timestampFromNanos(nanos); + return localDateTime.atOffset(ZoneOffset.UTC).format(FORMATTER); } public static String microsToIsoTimestamp(long micros) { @@ -111,6 +139,11 @@ public static String microsToIsoTimestamp(long micros) { return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); } + public static String nanosToIsoTimestamp(long nanos) { + LocalDateTime localDateTime = timestampFromNanos(nanos); + return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); + } + public static int isoDateToDays(String dateString) { return daysFromDate(LocalDate.parse(dateString, DateTimeFormatter.ISO_LOCAL_DATE)); } @@ -124,6 +157,11 @@ public static long isoTimestamptzToMicros(String timestampString) { OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); } + public static long isoTimestamptzToNanos(CharSequence timestampString) { + return nanosFromTimestamptz( + OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); + } + public static boolean isUTCTimestamptz(String timestampString) { OffsetDateTime offsetDateTime = OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME); @@ -135,6 +173,11 @@ public static long isoTimestampToMicros(String timestampString) { LocalDateTime.parse(timestampString, DateTimeFormatter.ISO_LOCAL_DATE_TIME)); } + public static long isoTimestampToNanos(CharSequence timestampString) { + return nanosFromTimestamp( + LocalDateTime.parse(timestampString, DateTimeFormatter.ISO_LOCAL_DATE_TIME)); + } + public static int daysToYears(int days) { return convertDays(days, ChronoUnit.YEARS); } @@ -185,6 +228,36 @@ private static int convertMicros(long micros, ChronoUnit granularity) { } } + public static int nanosToYears(long nanos) { + return Math.toIntExact(convertNanos(nanos, ChronoUnit.YEARS)); + } + + public static int nanosToMonths(long nanos) { + return Math.toIntExact(convertNanos(nanos, ChronoUnit.MONTHS)); + } + + public static int nanosToDays(long nanos) { + return Math.toIntExact(convertNanos(nanos, ChronoUnit.DAYS)); + } + + public static int nanosToHours(long nanos) { + return Math.toIntExact(convertNanos(nanos, ChronoUnit.HOURS)); + } + + private static long convertNanos(long nanos, ChronoUnit granularity) { + if (nanos >= 0) { + long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); + long nanoAdjustment = Math.floorMod(nanos, NANOS_PER_SECOND); + return granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)); + } else { + // add 1 nano to the value to account for the case where there is exactly 1 unit between + // the timestamp and epoch because the result will always be decremented. + long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); + long nanoAdjustment = Math.floorMod(nanos + 1, NANOS_PER_SECOND); + return granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)) - 1; + } + } + private static OffsetDateTime toOffsetDateTime(long epochSecond, long nanoAdjustment) { return Instant.ofEpochSecond(epochSecond, nanoAdjustment).atOffset(ZoneOffset.UTC); } diff --git a/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java index 5e4ca1fb11be..11f2cb353880 100644 --- a/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java +++ b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java @@ -34,7 +34,8 @@ public class PartitionSpecTestBase { Types.NestedField.required(7, "s", Types.StringType.get()), Types.NestedField.required(8, "u", Types.UUIDType.get()), Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)), - Types.NestedField.required(10, "b", Types.BinaryType.get())); + Types.NestedField.required(10, "b", Types.BinaryType.get()), + Types.NestedField.required(11, "tsn", Types.TimestampNanoType.withoutZone())); // a spec with all of the allowed transform/type pairs public static final PartitionSpec[] SPECS = @@ -49,6 +50,7 @@ public class PartitionSpecTestBase { PartitionSpec.builderFor(SCHEMA).identity("u").build(), PartitionSpec.builderFor(SCHEMA).identity("f").build(), PartitionSpec.builderFor(SCHEMA).identity("b").build(), + PartitionSpec.builderFor(SCHEMA).identity("tsn").build(), PartitionSpec.builderFor(SCHEMA).bucket("i", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("l", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("d", 128).build(), @@ -59,6 +61,7 @@ public class PartitionSpecTestBase { PartitionSpec.builderFor(SCHEMA).bucket("u", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("f", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("b", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("tsn", 128).build(), PartitionSpec.builderFor(SCHEMA).year("d").build(), PartitionSpec.builderFor(SCHEMA).month("d").build(), PartitionSpec.builderFor(SCHEMA).day("d").build(), @@ -66,6 +69,10 @@ public class PartitionSpecTestBase { PartitionSpec.builderFor(SCHEMA).month("ts").build(), PartitionSpec.builderFor(SCHEMA).day("ts").build(), PartitionSpec.builderFor(SCHEMA).hour("ts").build(), + PartitionSpec.builderFor(SCHEMA).year("tsn").build(), + PartitionSpec.builderFor(SCHEMA).month("tsn").build(), + PartitionSpec.builderFor(SCHEMA).day("tsn").build(), + PartitionSpec.builderFor(SCHEMA).hour("tsn").build(), PartitionSpec.builderFor(SCHEMA).truncate("i", 10).build(), PartitionSpec.builderFor(SCHEMA).truncate("l", 10).build(), PartitionSpec.builderFor(SCHEMA).truncate("dec", 10).build(), diff --git a/api/src/test/java/org/apache/iceberg/TestAccessors.java b/api/src/test/java/org/apache/iceberg/TestAccessors.java index 332556e474c7..7b4feb845f12 100644 --- a/api/src/test/java/org/apache/iceberg/TestAccessors.java +++ b/api/src/test/java/org/apache/iceberg/TestAccessors.java @@ -180,6 +180,8 @@ public void testTime() { public void testTimestamp() { assertAccessorReturns(Types.TimestampType.withoutZone(), 123L); assertAccessorReturns(Types.TimestampType.withZone(), 123L); + assertAccessorReturns(Types.TimestampNanoType.withoutZone(), 123L); + assertAccessorReturns(Types.TimestampNanoType.withZone(), 123L); } @Test diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java b/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java index 5455415da015..31a6c486bf6e 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java @@ -54,6 +54,44 @@ public void testPartitionPath() { .isEqualTo("ts_hour=2017-12-01-10/id_bucket=" + idBucket); } + @Test + public void testPartitionPathWithNanoseconds() { + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).hour("ts").bucket("id", 10).build(); + + Transform hour = Transforms.hour(); + Transform bucket = Transforms.bucket(10); + + Literal ts = + Literal.of("2017-12-01T10:12:55.038194789").to(Types.TimestampNanoType.withoutZone()); + Object tsHour = hour.bind(Types.TimestampNanoType.withoutZone()).apply(ts.value()); + Object idBucket = bucket.bind(Types.IntegerType.get()).apply(1); + + Row partition = Row.of(tsHour, idBucket); + + assertThat(spec.partitionToPath(partition)) + .as("Should produce expected partition key") + .isEqualTo("ts_hour=2017-12-01-10/id_bucket=" + idBucket); + } + + @Test + public void testPartitionPathWithNanosecondsTz() { + PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).hour("ts").bucket("id", 10).build(); + + Transform hour = Transforms.hour(); + Transform bucket = Transforms.bucket(10); + + Literal ts = + Literal.of("2017-12-01T10:12:55.038194789-08:00").to(Types.TimestampNanoType.withZone()); + Object tsTzHour = hour.bind(Types.TimestampNanoType.withZone()).apply(ts.value()); + Object idBucket = bucket.bind(Types.IntegerType.get()).apply(1); + + Row partition = Row.of(tsTzHour, idBucket); + + assertThat(spec.partitionToPath(partition)) + .as("Should produce expected partition key") + .isEqualTo("ts_hour=2017-12-01-18/id_bucket=" + idBucket); + } + @Test public void testEscapedStrings() { PartitionSpec spec = diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java index 894989f38e7d..10d3b6d0adfa 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java @@ -42,11 +42,12 @@ public class TestExpressionUtil { Types.NestedField.required(2, "val", Types.IntegerType.get()), Types.NestedField.required(3, "val2", Types.IntegerType.get()), Types.NestedField.required(4, "ts", Types.TimestampType.withoutZone()), - Types.NestedField.required(5, "date", Types.DateType.get()), - Types.NestedField.required(6, "time", Types.DateType.get()), - Types.NestedField.optional(7, "data", Types.StringType.get()), - Types.NestedField.optional(8, "measurement", Types.DoubleType.get()), - Types.NestedField.optional(9, "test", Types.IntegerType.get())); + Types.NestedField.required(5, "tsns", Types.TimestampNanoType.withoutZone()), + Types.NestedField.required(6, "date", Types.DateType.get()), + Types.NestedField.required(7, "time", Types.DateType.get()), + Types.NestedField.optional(8, "data", Types.StringType.get()), + Types.NestedField.optional(9, "measurement", Types.DoubleType.get()), + Types.NestedField.optional(10, "test", Types.IntegerType.get())); private static final Types.StructType STRUCT = SCHEMA.asStruct(); @@ -460,7 +461,9 @@ public void testSanitizeTimestamp() { "2022-04-29T23:49:51", "2022-04-29T23:49:51.123456", "2022-04-29T23:49:51-07:00", - "2022-04-29T23:49:51.123456+01:00")) { + "2022-04-29T23:49:51.123456+01:00", + "2022-04-29T23:49:51.123456789", + "2022-04-29T23:49:51.123456789+01:00")) { assertEquals( Expressions.equal("test", "(timestamp)"), ExpressionUtil.sanitize(Expressions.equal("test", timestamp))); @@ -496,6 +499,13 @@ public void testSanitizeTimestampAboutNow() { Expression.Operation.EQ, "test", Literal.of(nowLocal).to(Types.TimestampType.withoutZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-about-now)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(nowLocal).to(Types.TimestampNanoType.withoutZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", nowLocal))) .as("Sanitized string should be identical except for descriptive literal") @@ -522,6 +532,13 @@ public void testSanitizeTimestampPast() { Expression.Operation.EQ, "test", Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampType.withoutZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-1-hours-ago)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampNanoType.withoutZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesAgoLocal))) .as("Sanitized string should be identical except for descriptive literal") @@ -548,6 +565,13 @@ public void testSanitizeTimestampLastWeek() { Expression.Operation.EQ, "test", Literal.of(lastWeekLocal).to(Types.TimestampType.withoutZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-7-days-ago)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(lastWeekLocal).to(Types.TimestampNanoType.withoutZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", lastWeekLocal))) .as("Sanitized string should be identical except for descriptive literal") @@ -574,6 +598,13 @@ public void testSanitizeTimestampFuture() { Expression.Operation.EQ, "test", Literal.of(ninetyMinutesFromNowLocal).to(Types.TimestampType.withoutZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-1-hours-from-now)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(ninetyMinutesFromNowLocal).to(Types.TimestampNanoType.withoutZone())))); assertThat( ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesFromNowLocal))) @@ -597,6 +628,13 @@ public void testSanitizeTimestamptzAboutNow() { Expression.Operation.EQ, "test", Literal.of(nowUtc).to(Types.TimestampType.withZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-about-now)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(nowUtc).to(Types.TimestampNanoType.withZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", nowUtc))) .as("Sanitized string should be identical except for descriptive literal") @@ -618,6 +656,13 @@ public void testSanitizeTimestamptzPast() { Expression.Operation.EQ, "test", Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampType.withZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-1-hours-ago)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampNanoType.withZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesAgoUtc))) .as("Sanitized string should be identical except for descriptive literal") @@ -639,6 +684,13 @@ public void testSanitizeTimestamptzLastWeek() { Expression.Operation.EQ, "test", Literal.of(lastWeekUtc).to(Types.TimestampType.withZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-7-days-ago)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(lastWeekUtc).to(Types.TimestampNanoType.withZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", lastWeekUtc))) .as("Sanitized string should be identical except for descriptive literal") @@ -660,6 +712,13 @@ public void testSanitizeTimestamptzFuture() { Expression.Operation.EQ, "test", Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampType.withZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-1-hours-from-now)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampNanoType.withZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesFromNowUtc))) .as("Sanitized string should be identical except for descriptive literal") diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java b/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java index d5aa251ffb50..24fc458b37b4 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java @@ -38,8 +38,10 @@ public void testLiterals() throws Exception { Literal.of(8.75D), Literal.of("2017-11-29").to(Types.DateType.get()), Literal.of("11:30:07").to(Types.TimeType.get()), - Literal.of("2017-11-29T11:30:07.123").to(Types.TimestampType.withoutZone()), - Literal.of("2017-11-29T11:30:07.123+01:00").to(Types.TimestampType.withZone()), + Literal.of("2017-11-29T11:30:07.123456").to(Types.TimestampType.withoutZone()), + Literal.of("2017-11-29T11:30:07.123456+01:00").to(Types.TimestampType.withZone()), + Literal.of("2017-11-29T11:30:07.123456789").to(Types.TimestampNanoType.withoutZone()), + Literal.of("2017-11-29T11:30:07.123456789+01:00").to(Types.TimestampNanoType.withZone()), Literal.of("abc"), Literal.of(UUID.randomUUID()), Literal.of(new byte[] {1, 2, 3}).to(Types.FixedType.ofLength(3)), diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java index f8d2cd49d969..e2611ddb281f 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java @@ -42,7 +42,9 @@ public void testIdentityConversions() { Pair.of(Literal.of("34.55"), Types.DecimalType.of(9, 2)), Pair.of(Literal.of("2017-08-18"), Types.DateType.get()), Pair.of(Literal.of("14:21:01.919"), Types.TimeType.get()), - Pair.of(Literal.of("2017-08-18T14:21:01.919"), Types.TimestampType.withoutZone()), + Pair.of(Literal.of("2017-08-18T14:21:01.919432"), Types.TimestampType.withoutZone()), + Pair.of( + Literal.of("2017-08-18T14:21:01.919432755"), Types.TimestampNanoType.withoutZone()), Pair.of(Literal.of("abc"), Types.StringType.get()), Pair.of(Literal.of(UUID.randomUUID()), Types.UUIDType.get()), Pair.of(Literal.of(new byte[] {0, 1, 2}), Types.FixedType.ofLength(3)), @@ -62,6 +64,22 @@ public void testIdentityConversions() { } } + @Test + public void testTimestampWithMicrosecondsToDate() { + final Literal micros = + Literal.of("2017-08-18T14:21:01.919432755").to(Types.TimestampType.withoutZone()); + final Literal dateOfNanos = micros.to(Types.DateType.get()); + assertThat(dateOfNanos).isEqualTo(Literal.of("2017-08-18").to(Types.DateType.get())); + } + + @Test + public void testTimestampWithNanosecondsToDate() { + final Literal nanos = + Literal.of("2017-08-18T14:21:01.919432755").to(Types.TimestampNanoType.withoutZone()); + final Literal dateOfNanos = nanos.to(Types.DateType.get()); + assertThat(dateOfNanos).isEqualTo(Literal.of("2017-08-18").to(Types.DateType.get())); + } + @Test public void testBinaryToFixed() { Literal lit = Literal.of(ByteBuffer.wrap(new byte[] {0, 1, 2})); @@ -101,6 +119,8 @@ public void testInvalidBooleanConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), @@ -116,6 +136,8 @@ public void testInvalidIntegerConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -144,6 +166,8 @@ public void testInvalidFloatConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -161,6 +185,8 @@ public void testInvalidDoubleConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -179,6 +205,8 @@ public void testInvalidDateConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 4), Types.StringType.get(), Types.UUIDType.get(), @@ -198,6 +226,8 @@ public void testInvalidTimeConversions() { Types.DateType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 4), Types.StringType.get(), Types.UUIDType.get(), @@ -206,9 +236,26 @@ public void testInvalidTimeConversions() { } @Test - public void testInvalidTimestampConversions() { + public void testInvalidTimestampMicrosConversions() { testInvalidConversions( - Literal.of("2017-08-18T14:21:01.919").to(Types.TimestampType.withoutZone()), + Literal.of("2017-08-18T14:21:01.919123").to(Types.TimestampType.withoutZone()), + Types.BooleanType.get(), + Types.IntegerType.get(), + Types.LongType.get(), + Types.FloatType.get(), + Types.DoubleType.get(), + Types.TimeType.get(), + Types.DecimalType.of(9, 4), + Types.StringType.get(), + Types.UUIDType.get(), + Types.FixedType.ofLength(1), + Types.BinaryType.get()); + } + + @Test + public void testInvalidTimestampNanosConversions() { + testInvalidConversions( + Literal.of("2017-08-18T14:21:01.919123456").to(Types.TimestampNanoType.withoutZone()), Types.BooleanType.get(), Types.IntegerType.get(), Types.LongType.get(), @@ -233,8 +280,10 @@ public void testInvalidDecimalConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -267,8 +316,10 @@ public void testInvalidUUIDConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.FixedType.ofLength(1), @@ -286,8 +337,10 @@ public void testInvalidFixedConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), @@ -307,6 +360,8 @@ public void testInvalidBinaryConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java index 1dc2225b3805..45d6654f6aaf 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java @@ -132,6 +132,51 @@ public void testStringToTimestampLiteral() { .isEqualTo(avroValue); } + @Test + public void testStringToTimestampLiteralWithMicrosecondPrecisionFromNanoseconds() { + // use Avro's timestamp conversion to validate the result + Schema avroSchema = LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); + TimeConversions.TimestampMicrosConversion avroConversion = + new TimeConversions.TimestampMicrosConversion(); + + Literal timestampStr = Literal.of("2017-08-18T14:21:01.123456789"); + Literal timestamp = timestampStr.to(Types.TimestampType.withoutZone()); + long avroValue = + avroConversion.toLong( + LocalDateTime.of(2017, 8, 18, 14, 21, 1, 123456000).toInstant(ZoneOffset.UTC), + avroSchema, + avroSchema.getLogicalType()); + + assertThat((long) timestamp.value()) + .as("Timestamp without zone should match UTC") + .isEqualTo(avroValue); + } + + @Test + public void testStringToTimestampLiteralWithNanosecondPrecisionFromNanoseconds() { + Literal timestampStr = Literal.of("2017-08-18T14:21:01.123456789"); + Literal timestamp = timestampStr.to(Types.TimestampNanoType.withoutZone()); + + // Not only using Avro's timestamp conversion as it has no timestampNanos(). + long expected = 1503066061123456789L; + assertThat((long) timestamp.value()) + .as("Timestamp without zone should match UTC") + .isEqualTo(expected); + + // use Avro's timestamp conversion to validate the result within one microsecond + Schema avroSchema = LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); + TimeConversions.TimestampMicrosConversion avroConversion = + new TimeConversions.TimestampMicrosConversion(); + long avroValue = + avroConversion.toLong( + LocalDateTime.of(2017, 8, 18, 14, 21, 1, 123456000).toInstant(ZoneOffset.UTC), + avroSchema, + avroSchema.getLogicalType()); + assertThat(timestamp.value() - avroValue * 1000) + .as("Timestamp without zone should match UTC") + .isEqualTo(789L); + } + @Test public void testNegativeStringToTimestampLiteral() { // use Avro's timestamp conversion to validate the result @@ -181,8 +226,13 @@ public void testNegativeStringToTimestampLiteral() { @Test public void testTimestampWithZoneWithoutZoneInLiteral() { // Zone must be present in literals when converting to timestamp with zone - Literal timestampStr = Literal.of("2017-08-18T14:21:01.919"); - assertThatThrownBy(() -> timestampStr.to(Types.TimestampType.withZone())) + assertThatThrownBy( + () -> Literal.of("2017-08-18T14:21:01.919").to(Types.TimestampType.withZone())) + .isInstanceOf(DateTimeException.class) + .hasMessageContaining("could not be parsed"); + assertThatThrownBy( + () -> + Literal.of("2017-08-18T14:21:01.919123456").to(Types.TimestampNanoType.withZone())) .isInstanceOf(DateTimeException.class) .hasMessageContaining("could not be parsed"); } @@ -190,8 +240,14 @@ public void testTimestampWithZoneWithoutZoneInLiteral() { @Test public void testTimestampWithoutZoneWithZoneInLiteral() { // Zone must not be present in literals when converting to timestamp without zone - Literal timestampStr = Literal.of("2017-08-18T14:21:01.919+07:00"); - assertThatThrownBy(() -> timestampStr.to(Types.TimestampType.withoutZone())) + assertThatThrownBy( + () -> Literal.of("2017-08-18T14:21:01.919+07:00").to(Types.TimestampType.withoutZone())) + .isInstanceOf(DateTimeException.class) + .hasMessageContaining("could not be parsed"); + assertThatThrownBy( + () -> + Literal.of("2017-08-18T14:21:01.919123456+07:00") + .to(Types.TimestampNanoType.withoutZone())) .isInstanceOf(DateTimeException.class) .hasMessageContaining("could not be parsed"); } diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java new file mode 100644 index 000000000000..379ad4db5e97 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.expressions; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.time.format.DateTimeParseException; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; +import org.junit.jupiter.api.Test; + +public class TestTimestampLiteralConversions { + @Test + public void testTimestampToTimestampNanoConversion() { + Literal timestamp = + Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(1510842668000000L); + + Literal timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(1510842668000000000L); + + timestamp = Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(0L); + + timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(0L); + + timestamp = Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(0L); + + timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(0L); + + timestamp = Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(-1L); + + timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(-1000L); + } + + @Test + public void testTimestampToDateConversion() { + Literal ts = + Literal.of("2017-11-16T14:31:08.000001").to(Types.TimestampType.withoutZone()); + int dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(DateTimeUtil.isoDateToDays("2017-11-16")); + + ts = Literal.of("1970-01-01T00:00:00.000001").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0); + + ts = Literal.of("1969-12-31T23:59:59.999999").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1); + + ts = Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(DateTimeUtil.isoDateToDays("2017-11-16")); + + ts = Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0).isEqualTo(DateTimeUtil.isoDateToDays("1970-01-01")); + + ts = Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0).isEqualTo(DateTimeUtil.isoDateToDays("1970-01-01")); + + ts = Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1).isEqualTo(DateTimeUtil.isoDateToDays("1969-12-31")); + } + + @Test + public void testTimestampMicrosToDateConversion() { + Literal ts = + Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampType.withoutZone()); + int dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(DateTimeUtil.isoDateToDays("2017-11-16")); + + ts = Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0); + + ts = Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0); + + ts = Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1); + } + + @Test + public void testTimestampNanoToTimestampConversion() { + Literal timestamp = + Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampNanoType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(1510842668000000001L); + + Literal timestampNano = timestamp.to(Types.TimestampType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(1510842668000000L); + + timestamp = + Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampNanoType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(1L); + + timestampNano = timestamp.to(Types.TimestampType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(0L); + + timestamp = + Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampNanoType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(-1L); + + timestampNano = timestamp.to(Types.TimestampType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(-1L); + + timestamp = + Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampNanoType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(-1000L); + + timestampNano = timestamp.to(Types.TimestampType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(-1L); + } + + @Test + public void testTimestampNanosToDateConversion() { + Literal ts = + Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampNanoType.withoutZone()); + int dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(DateTimeUtil.isoDateToDays("2017-11-16")); + + ts = Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampNanoType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0); + + ts = Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampNanoType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1); + + ts = Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampNanoType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1); + } + + @Test + public void testTimestampNanosWithZoneConversion() { + Literal isoTimestampNanosWithZoneOffset = + Literal.of("2017-11-16T14:31:08.000000001+00:00"); + + assertThatThrownBy(() -> isoTimestampNanosWithZoneOffset.to(Types.TimestampType.withoutZone())) + .as("Should not convert timestamp with offset to a timestamp without zone") + .isInstanceOf(DateTimeParseException.class); + + assertThatThrownBy( + () -> isoTimestampNanosWithZoneOffset.to(Types.TimestampNanoType.withoutZone())) + .as("Should not convert timestamp with offset to a timestamp without zone") + .isInstanceOf(DateTimeParseException.class); + + assertThat(isoTimestampNanosWithZoneOffset.to(Types.TimestampType.withZone()).value()) + .isEqualTo(1510842668000000L); + + assertThat(isoTimestampNanosWithZoneOffset.to(Types.TimestampNanoType.withZone()).value()) + .isEqualTo(1510842668000000001L); + } + + @Test + public void testTimestampMicrosWithZoneConversion() { + Literal isoTimestampMicrosWithZoneOffset = + Literal.of("2017-11-16T14:31:08.000001+00:00"); + + assertThatThrownBy(() -> isoTimestampMicrosWithZoneOffset.to(Types.TimestampType.withoutZone())) + .as("Should not convert timestamp with offset to a timestamp without zone") + .isInstanceOf(DateTimeParseException.class); + + assertThatThrownBy( + () -> isoTimestampMicrosWithZoneOffset.to(Types.TimestampNanoType.withoutZone())) + .as("Should not convert timestamp with offset to a timestamp without zone") + .isInstanceOf(DateTimeParseException.class); + + assertThat(isoTimestampMicrosWithZoneOffset.to(Types.TimestampType.withZone()).value()) + .isEqualTo(1510842668000001L); + + assertThat(isoTimestampMicrosWithZoneOffset.to(Types.TimestampNanoType.withZone()).value()) + .isEqualTo(1510842668000001000L); + } + + @Test + public void testTimestampNanosWithoutZoneConversion() { + Literal isoTimestampNanosWithoutZoneOffset = + Literal.of("2017-11-16T14:31:08.000000001"); + + assertThatThrownBy(() -> isoTimestampNanosWithoutZoneOffset.to(Types.TimestampType.withZone())) + .as("Should not convert timestamp without offset to a timestamp with zone") + .isInstanceOf(DateTimeParseException.class); + + assertThatThrownBy( + () -> isoTimestampNanosWithoutZoneOffset.to(Types.TimestampNanoType.withZone())) + .as("Should not convert timestamp without offset to a timestamp with zone") + .isInstanceOf(DateTimeParseException.class); + + assertThat(isoTimestampNanosWithoutZoneOffset.to(Types.TimestampType.withoutZone()).value()) + .isEqualTo(1510842668000000L); + + assertThat(isoTimestampNanosWithoutZoneOffset.to(Types.TimestampNanoType.withoutZone()).value()) + .isEqualTo(1510842668000000001L); + } + + @Test + public void testTimestampMicrosWithoutZoneConversion() { + Literal isoTimestampMicrosWithoutZoneOffset = + Literal.of("2017-11-16T14:31:08.000001"); + + assertThatThrownBy(() -> isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampType.withZone())) + .as("Should not convert timestamp without offset to a timestamp with zone") + .isInstanceOf(DateTimeParseException.class); + + assertThatThrownBy( + () -> isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampNanoType.withZone())) + .as("Should not convert timestamp without offset to a timestamp with zone") + .isInstanceOf(DateTimeParseException.class); + + assertThat(isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampType.withoutZone()).value()) + .isEqualTo(1510842668000001L); + + assertThat( + isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampNanoType.withoutZone()).value()) + .isEqualTo(1510842668000001000L); + } +} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java index f397f94f0ce5..585694afd762 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java @@ -165,6 +165,62 @@ public void testLong() { .isEqualTo(hashBytes(buffer.array())); } + @Test + public void testTimestampNanoPromotion() { + Types.TimestampType tsType = Types.TimestampType.withoutZone(); + Types.TimestampNanoType tsNsType = Types.TimestampNanoType.withoutZone(); + Bucket tsNsBucket = Bucket.get(tsNsType, 1); + Bucket tsBucket = Bucket.get(tsType, 1); + + // Values from spec Appendix B: 32-bit Hash Requirements + assertThat(tsBucket.hash(Literal.of("2017-11-16T22:31:08").to(tsType).value())) + .as( + "Spec example: hash(2017-11-16T22:31:08) = -2047944441 for Timestamp and TimestampNano should match") + .isEqualTo(-2047944441); + assertThat(tsNsBucket.hash(Literal.of("2017-11-16T22:31:08").to(tsNsType).value())) + .as( + "Spec example: hash(2017-11-16T22:31:08) = -2047944441 for Timestamp and TimestampNano should match") + .isEqualTo(-2047944441); + + assertThat(tsBucket.hash(Literal.of("2017-11-16T22:31:08.000001").to(tsType).value())) + .as( + "Spec example: hash(2017-11-16T22:31:08.000001) = -1207196810 for Timestamp and TimestampNano should match") + .isEqualTo(-1207196810); + assertThat(tsNsBucket.hash(Literal.of("2017-11-16T22:31:08.000001001").to(tsNsType).value())) + .as( + "Spec example: hash(2017-11-16T22:31:08.000001) = -1207196810 for Timestamp and TimestampNano should match") + .isEqualTo(-1207196810); + } + + @Test + public void testTimestampTzNanoPromotion() { + Types.TimestampType tsTzType = Types.TimestampType.withZone(); + Types.TimestampNanoType tsTzNsType = Types.TimestampNanoType.withZone(); + Bucket tsTzNsBucket = Bucket.get(tsTzNsType, 1); + Bucket tsTzBucket = Bucket.get(tsTzType, 1); + + // Values from spec Appendix B: 32-bit Hash Requirements + assertThat(tsTzBucket.hash(Literal.of("2017-11-16T14:31:08-08:00").to(tsTzType).value())) + .as( + "Spec example: hash(2017-11-16T14:31:08-08:00) = -2047944441 for Timestamp and TimestampNano should match") + .isEqualTo(-2047944441); + assertThat(tsTzNsBucket.hash(Literal.of("2017-11-16T14:31:08-08:00").to(tsTzNsType).value())) + .as( + "Spec example: hash(2017-11-16T14:31:08-08:00) = -2047944441 for Timestamp and TimestampNano should match") + .isEqualTo(-2047944441); + + assertThat(tsTzBucket.hash(Literal.of("2017-11-16T14:31:08.000001-08:00").to(tsTzType).value())) + .as( + "Spec example: hash(2017-11-16T14:31:08.000001-08:00) = -1207196810 for Timestamp and TimestampNano should match") + .isEqualTo(-1207196810); + assertThat( + tsTzNsBucket.hash( + Literal.of("2017-11-16T14:31:08.000001001-08:00").to(tsTzNsType).value())) + .as( + "Spec example: hash(2017-11-16T14:31:08.000001-08:00) = -1207196810 for Timestamp and TimestampNano should match") + .isEqualTo(-1207196810); + } + @Test public void testIntegerTypePromotion() { int randomInt = testRandom.nextInt(); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDates.java b/api/src/test/java/org/apache/iceberg/transforms/TestDates.java index b9c380244666..c899b4cfa1cb 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestDates.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestDates.java @@ -26,6 +26,75 @@ import org.junit.jupiter.api.Test; public class TestDates { + @Test + public void testSatisfiesOrderOfDates() { + assertThat(Dates.DAY.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Dates.MONTH.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Dates.MONTH.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Dates.YEAR.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Dates.MONTH)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + } + + @Test + public void testSatisfiesOrderOfTimestamps() { + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + } + + @Test + public void testSatisfiesOrderOfTimestampNanos() { + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + } + + @Test + public void testSatisfiesOrderOfTimeTransforms() { + assertThat(Dates.DAY.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Dates.DAY.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Dates.MONTH.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Dates.MONTH.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Dates.YEAR.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Months.get())).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Years.get())).isTrue(); + } + @Test @SuppressWarnings("deprecation") public void testDeprecatedDateTransform() { diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java index 6101fdf0986d..93d3281411f3 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java @@ -106,7 +106,7 @@ public void testTimestampWithZoneHumanString() { // value will always be in UTC assertThat(identity.toHumanString(timestamptz, ts.value())) .as("Should produce timestamp with time zone adjusted to UTC") - .isEqualTo("2017-12-01T18:12:55.038194Z"); + .isEqualTo("2017-12-01T18:12:55.038194+00:00"); } @Test diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java new file mode 100644 index 000000000000..2a161f9bc822 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.transforms; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestTimeTransforms { + @Test + public void testMicrosSatisfiesOrderOfDates() { + assertThat(Hours.get().satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Days.get().satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Months.get().satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Months.get().satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Years.get().satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Dates.MONTH)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Dates.YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimestamps() { + assertThat(Hours.get().satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Days.get().satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Months.get().satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Years.get().satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimestampNanos() { + assertThat(Hours.get().satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Days.get().satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Months.get().satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Years.get().satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimeTransforms() { + assertThat(Hours.get().satisfiesOrderOf(Hours.get())).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Days.get().satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Days.get().satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Months.get().satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Months.get().satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Years.get().satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Months.get())).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Years.get())).isTrue(); + } + + @Test + public void testHoursToEnum() { + Hours hours = Hours.get(); + Types.DateType type = Types.DateType.get(); + assertThatThrownBy(() -> hours.toEnum(type)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Unsupported type: date"); + } +} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index 3c37e643eb95..78b0e67c686b 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -26,13 +26,222 @@ import org.junit.jupiter.api.Test; public class TestTimestamps { + @Test + public void testMicrosSatisfiesOrderOfDates() { + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Dates.MONTH)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimestamps() { + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimestampNanos() { + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimeTransforms() { + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Hours.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Months.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Years.get())).isTrue(); + } + + @Test + public void testNanosSatisfiesOrderOfDates() { + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Dates.MONTH)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + } + + @Test + public void testNanosSatisfiesOrderOfTimestamps() { + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + } + + @Test + public void testNanosSatisfiesOrderOfTimestampNanos() { + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + } + + @Test + public void testNanosSatisfiesOrderOfTimeTransforms() { + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Hours.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Months.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Years.get())).isTrue(); + } + @Test @SuppressWarnings("deprecation") public void testDeprecatedTimestampTransform() { Types.TimestampType type = Types.TimestampType.withoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000001").to(type); - Literal nts = Literal.of("1969-12-31T23:59:58.999999").to(type); + Literal nts = Literal.of("1969-12-31T23:59:59.999999").to(type); + + Transform years = Transforms.year(type); + assertThat((int) years.apply(ts.value())).as("Should produce 2017 - 1970 = 47").isEqualTo(47); + assertThat((int) years.apply(pts.value())).as("Should produce 1970 - 1970 = 0").isZero(); + assertThat((int) years.apply(nts.value())).as("Should produce 1969 - 1970 = -1").isEqualTo(-1); + + Transform months = Transforms.month(type); + assertThat((int) months.apply(ts.value())) + .as("Should produce 47 * 12 + 11 = 575") + .isEqualTo(575); + assertThat((int) months.apply(pts.value())).as("Should produce 0 * 12 + 0 = 0").isZero(); + assertThat((int) months.apply(nts.value())).isEqualTo(-1); + + Transform days = Transforms.day(type); + assertThat((int) days.apply(ts.value())).as("Should produce 17501").isEqualTo(17501); + assertThat((int) days.apply(pts.value())).as("Should produce 0 * 365 + 0 = 0").isZero(); + assertThat((int) days.apply(nts.value())).isEqualTo(-1); + + Transform hours = Transforms.hour(type); + assertThat((int) hours.apply(ts.value())) + .as("Should produce 17501 * 24 + 10") + .isEqualTo(420034); + assertThat((int) hours.apply(pts.value())).as("Should produce 0 * 24 + 0 = 0").isZero(); + assertThat((int) hours.apply(nts.value())).isEqualTo(-1); + } + + @Test + @SuppressWarnings("deprecation") + public void testDeprecatedTimestampNanoTransform() { + Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); + Literal ts = Literal.of("2017-12-01T10:12:55.038194789").to(type); + Literal pts = Literal.of("1970-01-01T00:00:01.000000001").to(type); + Literal nts = Literal.of("1969-12-31T23:59:59.999999999").to(type); Transform years = Transforms.year(type); assertThat((int) years.apply(ts.value())).as("Should produce 2017 - 1970 = 47").isEqualTo(47); @@ -64,7 +273,51 @@ public void testTimestampTransform() { Types.TimestampType type = Types.TimestampType.withoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000001").to(type); - Literal nts = Literal.of("1969-12-31T23:59:58.999999").to(type); + Literal nts = Literal.of("1969-12-31T23:59:59.999999").to(type); + + Transform years = Transforms.year(); + assertThat((int) years.bind(type).apply(ts.value())) + .as("Should produce 2017 - 1970 = 47") + .isEqualTo(47); + assertThat((int) years.bind(type).apply(pts.value())) + .as("Should produce 1970 - 1970 = 0") + .isZero(); + assertThat((int) years.bind(type).apply(nts.value())) + .as("Should produce 1969 - 1970 = -1") + .isEqualTo(-1); + + Transform months = Transforms.month(); + assertThat((int) months.bind(type).apply(ts.value())) + .as("Should produce 47 * 12 + 11 = 575") + .isEqualTo(575); + assertThat((int) months.bind(type).apply(pts.value())) + .as("Should produce 0 * 12 + 0 = 0") + .isZero(); + assertThat((int) months.bind(type).apply(nts.value())).isEqualTo(-1); + + Transform days = Transforms.day(); + assertThat((int) days.bind(type).apply(ts.value())).as("Should produce 17501").isEqualTo(17501); + assertThat((int) days.bind(type).apply(pts.value())) + .as("Should produce 0 * 365 + 0 = 0") + .isZero(); + assertThat((int) days.bind(type).apply(nts.value())).isEqualTo(-1); + + Transform hours = Transforms.hour(); + assertThat((int) hours.bind(type).apply(ts.value())) + .as("Should produce 17501 * 24 + 10") + .isEqualTo(420034); + assertThat((int) hours.bind(type).apply(pts.value())) + .as("Should produce 0 * 24 + 0 = 0") + .isZero(); + assertThat((int) hours.bind(type).apply(nts.value())).isEqualTo(-1); + } + + @Test + public void testTimestampNanoTransform() { + Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); + Literal ts = Literal.of("2017-12-01T10:12:55.038194789").to(type); + Literal pts = Literal.of("1970-01-01T00:00:01.000000001").to(type); + Literal nts = Literal.of("1969-12-31T23:59:59.999999999").to(type); Transform years = Transforms.year(); assertThat((int) years.bind(type).apply(ts.value())) @@ -123,6 +376,26 @@ public void testTimestampWithoutZoneToHumanString() { .isEqualTo("2017-12-01-10"); } + @Test + public void testTimestampNanoWithoutZoneToHumanString() { + Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); + Literal date = Literal.of("2017-12-01T10:12:55.038194789").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("2017"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("2017-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("2017-12-01"); + + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("2017-12-01-10"); + } + @Test public void testNegativeTimestampWithoutZoneToHumanString() { Types.TimestampType type = Types.TimestampType.withoutZone(); @@ -143,6 +416,26 @@ public void testNegativeTimestampWithoutZoneToHumanString() { .isEqualTo("1969-12-30-10"); } + @Test + public void testNegativeTimestampNanoWithoutZoneToHumanString() { + Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); + Literal date = Literal.of("1969-12-30T10:12:55.038194789").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("1969"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("1969-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("1969-12-30"); + + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("1969-12-30-10"); + } + @Test public void testNegativeTimestampWithoutZoneToHumanStringLowerBound() { Types.TimestampType type = Types.TimestampType.withoutZone(); @@ -163,6 +456,26 @@ public void testNegativeTimestampWithoutZoneToHumanStringLowerBound() { .isEqualTo("1969-12-30-00"); } + @Test + public void testNegativeTimestampNanoWithoutZoneToHumanStringLowerBound() { + Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); + Literal date = Literal.of("1969-12-30T00:00:00.000000000").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("1969"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("1969-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("1969-12-30"); + + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("1969-12-30-00"); + } + @Test public void testNegativeTimestampWithoutZoneToHumanStringUpperBound() { Types.TimestampType type = Types.TimestampType.withoutZone(); @@ -183,6 +496,26 @@ public void testNegativeTimestampWithoutZoneToHumanStringUpperBound() { .isEqualTo("1969-12-31-23"); } + @Test + public void testNegativeTimestampNanoWithoutZoneToHumanStringUpperBound() { + Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); + Literal date = Literal.of("1969-12-31T23:59:59.999999999").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("1969"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("1969-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("1969-12-31"); + + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("1969-12-31-23"); + } + @Test public void testTimestampWithZoneToHumanString() { Types.TimestampType type = Types.TimestampType.withZone(); @@ -205,7 +538,28 @@ public void testTimestampWithZoneToHumanString() { } @Test - public void testNullHumanString() { + public void testTimestampNanoWithZoneToHumanString() { + Types.TimestampNanoType type = Types.TimestampNanoType.withZone(); + Literal date = Literal.of("2017-12-01T10:12:55.038194789-08:00").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("2017"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("2017-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("2017-12-01"); + + // the hour is 18 because the value is always UTC + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("2017-12-01-18"); + } + + @Test + public void testTimestampNullHumanString() { Types.TimestampType type = Types.TimestampType.withZone(); assertThat(Transforms.year().toHumanString(type, null)) .as("Should produce \"null\" for null") @@ -221,6 +575,23 @@ public void testNullHumanString() { .isEqualTo("null"); } + @Test + public void testTimestampNanoNullHumanString() { + Types.TimestampNanoType type = Types.TimestampNanoType.withZone(); + assertThat(Transforms.year().toHumanString(type, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + assertThat(Transforms.month().toHumanString(type, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + assertThat(Transforms.day().toHumanString(type, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + assertThat(Transforms.hour().toHumanString(type, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + } + @Test public void testTimestampsReturnType() { Types.TimestampType type = Types.TimestampType.withZone(); @@ -241,4 +612,25 @@ public void testTimestampsReturnType() { Type hourResultType = hour.getResultType(type); assertThat(hourResultType).isEqualTo(Types.IntegerType.get()); } + + @Test + public void testTimestampNanosReturnType() { + Types.TimestampNanoType type = Types.TimestampNanoType.withZone(); + + Transform year = Transforms.year(); + Type yearResultType = year.getResultType(type); + assertThat(yearResultType).isEqualTo(Types.IntegerType.get()); + + Transform month = Transforms.month(); + Type monthResultType = month.getResultType(type); + assertThat(monthResultType).isEqualTo(Types.IntegerType.get()); + + Transform day = Transforms.day(); + Type dayResultType = day.getResultType(type); + assertThat(dayResultType).isEqualTo(Types.DateType.get()); + + Transform hour = Transforms.hour(); + Type hourResultType = hour.getResultType(type); + assertThat(hourResultType).isEqualTo(Types.IntegerType.get()); + } } diff --git a/api/src/test/java/org/apache/iceberg/types/TestComparators.java b/api/src/test/java/org/apache/iceberg/types/TestComparators.java index 165d96c029cc..07653ba3c8a8 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestComparators.java +++ b/api/src/test/java/org/apache/iceberg/types/TestComparators.java @@ -79,6 +79,12 @@ public void testTimestamp() { assertComparesCorrectly(Comparators.forType(Types.TimestampType.withZone()), 111, 222); } + @Test + public void testTimestampNanos() { + assertComparesCorrectly(Comparators.forType(Types.TimestampNanoType.withoutZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampNanoType.withZone()), 111, 222); + } + @Test public void testString() { assertComparesCorrectly(Comparators.forType(Types.StringType.get()), "a", "b"); diff --git a/api/src/test/java/org/apache/iceberg/types/TestConversions.java b/api/src/test/java/org/apache/iceberg/types/TestConversions.java index 6c7a884a5839..e207cfd8d59a 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestConversions.java +++ b/api/src/test/java/org/apache/iceberg/types/TestConversions.java @@ -37,6 +37,7 @@ import org.apache.iceberg.types.Types.LongType; import org.apache.iceberg.types.Types.StringType; import org.apache.iceberg.types.Types.TimeType; +import org.apache.iceberg.types.Types.TimestampNanoType; import org.apache.iceberg.types.Types.TimestampType; import org.apache.iceberg.types.Types.UUIDType; import org.junit.jupiter.api.Test; @@ -93,7 +94,7 @@ public void testByteBufferConversions() { assertThat(Literal.of(10000L).to(TimeType.get()).toByteBuffer().array()) .isEqualTo(new byte[] {16, 39, 0, 0, 0, 0, 0, 0}); - // timestamps are stored as microseconds from 1970-01-01 00:00:00.000000 in an 8-byte + // timestamps are stored as micro|nanoseconds from 1970-01-01 00:00:00 in an 8-byte // little-endian long // 400000L is 0...110|00011010|10000000 in binary // 10000000 -> -128, 00011010 -> 26, 00000110 -> 6, ... , 00000000 -> 0 @@ -103,6 +104,16 @@ public void testByteBufferConversions() { .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); assertThat(Literal.of(400000L).to(TimestampType.withZone()).toByteBuffer().array()) .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + // values passed to assertConversion and Literal.of differ because Literal.of(...) assumes + // the value is in micros, which gets converted when to(TimestampNanoType) is called + assertConversion( + 400000000L, TimestampNanoType.withoutZone(), new byte[] {0, -124, -41, 23, 0, 0, 0, 0}); + assertConversion( + 400000000L, TimestampNanoType.withZone(), new byte[] {0, -124, -41, 23, 0, 0, 0, 0}); + assertThat(Literal.of(400000L).to(TimestampNanoType.withoutZone()).toByteBuffer().array()) + .isEqualTo(new byte[] {0, -124, -41, 23, 0, 0, 0, 0}); + assertThat(Literal.of(400000L).to(TimestampNanoType.withZone()).toByteBuffer().array()) + .isEqualTo(new byte[] {0, -124, -41, 23, 0, 0, 0, 0}); // strings are stored as UTF-8 bytes (without length) // 'A' -> 65, 'B' -> 66, 'C' -> 67 diff --git a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java index 7f5948bd5838..2d02da5346a7 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java +++ b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java @@ -39,6 +39,8 @@ public class TestReadabilityChecks { Types.TimeType.get(), Types.TimestampType.withoutZone(), Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(3), diff --git a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java index d981b5a26789..96c330d6eb43 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java @@ -41,6 +41,8 @@ public void testIdentityTypes() throws Exception { Types.TimeType.get(), Types.TimestampType.withoutZone(), Types.TimestampType.withZone(), + Types.TimestampNanoType.withoutZone(), + Types.TimestampNanoType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.BinaryType.get() diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index 5db7ca7cd1bc..226c53f1e9ce 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -31,6 +31,11 @@ public void fromPrimitiveString() { assertThat(Types.fromPrimitiveString("BooLean")).isSameAs(Types.BooleanType.get()); assertThat(Types.fromPrimitiveString("timestamp")).isSameAs(Types.TimestampType.withoutZone()); + assertThat(Types.fromPrimitiveString("timestamptz")).isSameAs(Types.TimestampType.withZone()); + assertThat(Types.fromPrimitiveString("timestamp_ns")) + .isSameAs(Types.TimestampNanoType.withoutZone()); + assertThat(Types.fromPrimitiveString("timestamptz_ns")) + .isSameAs(Types.TimestampNanoType.withZone()); assertThat(Types.fromPrimitiveString("Fixed[ 3 ]")).isEqualTo(Types.FixedType.ofLength(3)); diff --git a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java index 42da3a3dd774..6088fe51b57a 100644 --- a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java +++ b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java @@ -20,19 +20,72 @@ import static org.assertj.core.api.Assertions.assertThat; -import java.time.ZonedDateTime; +import org.apache.iceberg.transforms.Transforms; +import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Test; public class TestDateTimeUtil { + @Test + public void nanosToMicros() { + assertThat(DateTimeUtil.nanosToMicros(1510871468000001001L)).isEqualTo(1510871468000001L); + assertThat(DateTimeUtil.nanosToMicros(-1510871468000001001L)).isEqualTo(-1510871468000002L); + } + + @Test + public void microsToNanos() { + assertThat(DateTimeUtil.microsToNanos(1510871468000001L)).isEqualTo(1510871468000001000L); + assertThat(DateTimeUtil.microsToNanos(-1510871468000001L)).isEqualTo(-1510871468000001000L); + } + + @Test + public void isoTimestampToNanos() { + assertThat(DateTimeUtil.isoTimestampToNanos("2017-11-16T22:31:08.000001001")) + .isEqualTo(1510871468000001001L); + assertThat(DateTimeUtil.isoTimestampToNanos("1922-02-15T01:28:51.999998999")) + .isEqualTo(-1510871468000001001L); + } + + @Test + public void isoTimestamptzToNanos() { + assertThat(DateTimeUtil.isoTimestamptzToNanos("2017-11-16T14:31:08.000001001-08:00")) + .isEqualTo(1510871468000001001L); + assertThat(DateTimeUtil.isoTimestamptzToNanos("1922-02-15T01:28:51.999998999+00:00")) + .isEqualTo(-1510871468000001001L); + } + + @Test + public void convertNanos() { + assertThat( + Transforms.identity() + .toHumanString(Types.TimestampNanoType.withoutZone(), 1510871468000001001L)) + .isEqualTo("2017-11-16T22:31:08.000001001"); + assertThat(DateTimeUtil.nanosToYears(1510871468000001001L)).isEqualTo(47); + assertThat(Transforms.year().toHumanString(Types.IntegerType.get(), 47)).isEqualTo("2017"); + assertThat(DateTimeUtil.nanosToMonths(1510871468000001001L)).isEqualTo(574); + assertThat(Transforms.month().toHumanString(Types.IntegerType.get(), 574)).isEqualTo("2017-11"); + assertThat(DateTimeUtil.nanosToDays(1510871468000001001L)).isEqualTo(17486); + assertThat(Transforms.day().toHumanString(Types.IntegerType.get(), 17486)) + .isEqualTo("2017-11-16"); + assertThat(DateTimeUtil.nanosToHours(1510871468000001001L)).isEqualTo(419686); + assertThat(Transforms.hour().toHumanString(Types.IntegerType.get(), 419686)) + .isEqualTo("2017-11-16-22"); + } @Test - public void formatTimestampMillis() { - String timestamp = "1970-01-01T00:00:00.001+00:00"; - assertThat(DateTimeUtil.formatTimestampMillis(1L)).isEqualTo(timestamp); - assertThat(ZonedDateTime.parse(timestamp).toInstant().toEpochMilli()).isEqualTo(1L); - - timestamp = "1970-01-01T00:16:40+00:00"; - assertThat(DateTimeUtil.formatTimestampMillis(1000000L)).isEqualTo(timestamp); - assertThat(ZonedDateTime.parse(timestamp).toInstant().toEpochMilli()).isEqualTo(1000000L); + public void convertNanosNegative() { + assertThat( + Transforms.identity() + .toHumanString(Types.TimestampNanoType.withZone(), -1510871468000001001L)) + .isEqualTo("1922-02-15T01:28:51.999998999+00:00"); + assertThat(DateTimeUtil.nanosToYears(-1510871468000001001L)).isEqualTo(-48); + assertThat(Transforms.year().toHumanString(Types.IntegerType.get(), 47)).isEqualTo("2017"); + assertThat(DateTimeUtil.nanosToMonths(-1510871468000001001L)).isEqualTo(-575); + assertThat(Transforms.month().toHumanString(Types.IntegerType.get(), 574)).isEqualTo("2017-11"); + assertThat(DateTimeUtil.nanosToDays(-1510871468000001001L)).isEqualTo(-17487); + assertThat(Transforms.day().toHumanString(Types.IntegerType.get(), 17486)) + .isEqualTo("2017-11-16"); + assertThat(DateTimeUtil.nanosToHours(-1510871468000001001L)).isEqualTo(-419687); + assertThat(Transforms.hour().toHumanString(Types.IntegerType.get(), 419686)) + .isEqualTo("2017-11-16-22"); } } diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index a7edddecad52..12336fd99879 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -51,7 +51,7 @@ public class TableMetadata implements Serializable { static final long INITIAL_SEQUENCE_NUMBER = 0; static final long INVALID_SEQUENCE_NUMBER = -1; static final int DEFAULT_TABLE_FORMAT_VERSION = 2; - static final int SUPPORTED_TABLE_FORMAT_VERSION = 2; + static final int SUPPORTED_TABLE_FORMAT_VERSION = 3; static final int INITIAL_SPEC_ID = 0; static final int INITIAL_SORT_ORDER_ID = 1; static final int INITIAL_SCHEMA_ID = 0; @@ -1489,6 +1489,8 @@ private int addSchemaInternal(Schema schema, int newLastColumnId) { newLastColumnId, lastColumnId); + Schema.checkCompatibility(schema, formatVersion); + int newSchemaId = reuseOrCreateNewSchemaId(schema); boolean schemaFound = schemasById.containsKey(newSchemaId); if (schemaFound && newLastColumnId == lastColumnId) { diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 8075372d09c1..b4a9124baede 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -61,6 +61,7 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.JsonUtil; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -1627,4 +1628,54 @@ public void buildReplacementKeepsSnapshotLog() throws Exception { .hasSize(2) .containsExactlyElementsOf(metadata.snapshotLog()); } + + @Test + public void testConstructV3Metadata() { + TableMetadata.newTableMetadata( + TEST_SCHEMA, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + TEST_LOCATION, + ImmutableMap.of(), + 3); + } + + @Test + public void testV3TimestampNanoTypeSupport() { + Schema v3Schema = + new Schema( + Types.NestedField.required(3, "id", Types.LongType.get()), + Types.NestedField.required(4, "data", Types.StringType.get()), + Types.NestedField.required( + 5, + "struct", + Types.StructType.of( + Types.NestedField.optional( + 6, "ts_nanos", Types.TimestampNanoType.withZone())))); + + for (int unsupportedFormatVersion : ImmutableList.of(1, 2)) { + Assertions.assertThrows( + IllegalStateException.class, + () -> + TableMetadata.newTableMetadata( + v3Schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + TEST_LOCATION, + ImmutableMap.of(), + unsupportedFormatVersion), + String.format( + "Invalid type in v%s schema: struct.ts_nanos timestamptz_ns is not supported until v3", + unsupportedFormatVersion)); + } + + // should be allowed in v3 + TableMetadata.newTableMetadata( + v3Schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + TEST_LOCATION, + ImmutableMap.of(), + 3); + } } diff --git a/core/src/test/resources/TableMetadataUnsupportedVersion.json b/core/src/test/resources/TableMetadataUnsupportedVersion.json index 0633a71d24d5..c40a0c9cd5ae 100644 --- a/core/src/test/resources/TableMetadataUnsupportedVersion.json +++ b/core/src/test/resources/TableMetadataUnsupportedVersion.json @@ -1,5 +1,5 @@ { - "format-version": 3, + "format-version": 4, "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c", "location": "s3://bucket/test/location", "last-updated-ms": 1602638573874, @@ -33,4 +33,4 @@ "properties": {}, "current-snapshot-id": -1, "snapshots": [] -} \ No newline at end of file +} diff --git a/format/spec.md b/format/spec.md index e590a8b95116..9b447db56cb4 100644 --- a/format/spec.md +++ b/format/spec.md @@ -1050,10 +1050,10 @@ The 32-bit hash implementation is 32-bit Murmur3 hash, x86 variant, seeded with | **`time`** | `hashLong(microsecsFromMidnight(v))` | `22:31:08` → `-662762989` | | **`timestamp`** | `hashLong(microsecsFromUnixEpoch(v))` | `2017-11-16T22:31:08` → `-2047944441`
`2017-11-16T22:31:08.000001` → `-1207196810` | | **`timestamptz`** | `hashLong(microsecsFromUnixEpoch(v))` | `2017-11-16T14:31:08-08:00` → `-2047944441`
`2017-11-16T14:31:08.000001-08:00` → `-1207196810` | -| **`timestamp_ns`** | `hashLong(nanosecsFromUnixEpoch(v))` | `2017-11-16T22:31:08` → `-737750069`
`2017-11-16T22:31:08.000001` → `-976603392`
`2017-11-16T22:31:08.000000001` → `-160215926` | -| **`timestamptz_ns`** | `hashLong(nanosecsFromUnixEpoch(v))` | `2017-11-16T14:31:08-08:00` → `-737750069`
`2017-11-16T14:31:08.000001-08:00` → `-976603392`
`2017-11-16T14:31:08.000000001-08:00` → `-160215926` | +| **`timestamp_ns`** | `hashLong(microsecsFromUnixEpoch(v))` [3] | `2017-11-16T22:31:08` → `-2047944441`
`2017-11-16T22:31:08.000001001` → `-1207196810` | +| **`timestamptz_ns`** | `hashLong(microsecsFromUnixEpoch(v))` [3]| `2017-11-16T14:31:08-08:00` → `-2047944441`
`2017-11-16T14:31:08.000001001-08:00` → `-1207196810` | | **`string`** | `hashBytes(utf8Bytes(v))` | `iceberg` → `1210000089` | -| **`uuid`** | `hashBytes(uuidBytes(v))` [3] | `f79c3e09-677c-4bbd-a479-3f349cb785e7` → `1488055340` | +| **`uuid`** | `hashBytes(uuidBytes(v))` [4] | `f79c3e09-677c-4bbd-a479-3f349cb785e7` → `1488055340` | | **`fixed(L)`** | `hashBytes(v)` | `00 01 02 03` → `-188683207` | | **`binary`** | `hashBytes(v)` | `00 01 02 03` → `-188683207` | @@ -1062,17 +1062,18 @@ The types below are not currently valid for bucketing, and so are not hashed. Ho | Primitive type | Hash specification | Test value | |--------------------|-------------------------------------------|--------------------------------------------| | **`boolean`** | `false: hashInt(0)`, `true: hashInt(1)` | `true` → `1392991556` | -| **`float`** | `hashLong(doubleToLongBits(double(v))` [4]| `1.0F` → `-142385009`, `0.0F` → `1669671676`, `-0.0F` → `1669671676` | -| **`double`** | `hashLong(doubleToLongBits(v))` [4]| `1.0D` → `-142385009`, `0.0D` → `1669671676`, `-0.0D` → `1669671676` | +| **`float`** | `hashLong(doubleToLongBits(double(v))` [5]| `1.0F` → `-142385009`, `0.0F` → `1669671676`, `-0.0F` → `1669671676` | +| **`double`** | `hashLong(doubleToLongBits(v))` [5]| `1.0D` → `-142385009`, `0.0D` → `1669671676`, `-0.0D` → `1669671676` | Notes: 1. Integer and long hash results must be identical for all integer values. This ensures that schema evolution does not change bucket partition values if integer types are promoted. 2. Decimal values are hashed using the minimum number of bytes required to hold the unscaled value as a two’s complement big-endian; this representation does not include padding bytes required for storage in a fixed-length array. Hash results are not dependent on decimal scale, which is part of the type, not the data value. -3. UUIDs are encoded using big endian. The test UUID for the example above is: `f79c3e09-677c-4bbd-a479-3f349cb785e7`. This UUID encoded as a byte array is: +3. Nanosecond timestamps must be converted to microsecond precision before hashing to ensure timestamps have the same hash value. +4. UUIDs are encoded using big endian. The test UUID for the example above is: `f79c3e09-677c-4bbd-a479-3f349cb785e7`. This UUID encoded as a byte array is: `F7 9C 3E 09 67 7C 4B BD A4 79 3F 34 9C B7 85 E7` -4. `doubleToLongBits` must give the IEEE 754 compliant bit representation of the double value. All `NaN` bit patterns must be canonicalized to `0x7ff8000000000000L`. Negative zero (`-0.0`) must be canonicalized to positive zero (`0.0`). Float hash values are the result of hashing the float cast to double to ensure that schema evolution does not change hash values if float types are promoted. +5. `doubleToLongBits` must give the IEEE 754 compliant bit representation of the double value. All `NaN` bit patterns must be canonicalized to `0x7ff8000000000000L`. Negative zero (`-0.0`) must be canonicalized to positive zero (`0.0`). Float hash values are the result of hashing the float cast to double to ensure that schema evolution does not change hash values if float types are promoted. ## Appendix C: JSON serialization