From e8951ef829560db22d849579f48c335a63629c4c Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Wed, 30 Oct 2024 10:18:55 -0700 Subject: [PATCH 1/6] API: Add compatibility checks for default values. --- .../main/java/org/apache/iceberg/Schema.java | 39 ++++-- .../java/org/apache/iceberg/TestSchema.java | 114 ++++++++++++++++++ 2 files changed, 145 insertions(+), 8 deletions(-) create mode 100644 api/src/test/java/org/apache/iceberg/TestSchema.java diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java index 9bcf691f5a03..96ac381e9205 100644 --- a/api/src/main/java/org/apache/iceberg/Schema.java +++ b/api/src/main/java/org/apache/iceberg/Schema.java @@ -27,6 +27,7 @@ import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import java.util.stream.Collectors; import org.apache.iceberg.relocated.com.google.common.base.Joiner; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; @@ -54,6 +55,7 @@ public class Schema implements Serializable { private static final Joiner NEWLINE = Joiner.on('\n'); private static final String ALL_COLUMNS = "*"; private static final int DEFAULT_SCHEMA_ID = 0; + private static final Integer DEFAULT_VALUES_MIN_FORMAT_VERSION = 3; private static final Map MIN_FORMAT_VERSIONS = ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3); @@ -586,16 +588,37 @@ private List reassignIds(List columns, TypeUtil.GetID * @param formatVersion table format version */ public static void checkCompatibility(Schema schema, int formatVersion) { - // check the type in each field + // accumulate errors as a treemap to keep them in a reasonable order + TreeMap problems = Maps.newTreeMap(); + + // check each field's type and defaults for (NestedField field : schema.lazyIdToField().values()) { Integer minFormatVersion = MIN_FORMAT_VERSIONS.get(field.type().typeId()); - Preconditions.checkState( - minFormatVersion == null || formatVersion >= minFormatVersion, - "Invalid type in v%s schema: %s %s is not supported until v%s", - formatVersion, - schema.findColumnName(field.fieldId()), - field.type(), - minFormatVersion); + if (minFormatVersion != null && formatVersion < minFormatVersion) { + problems.put( + field.fieldId(), + String.format( + "Invalid type for %s: %s is not supported until v%s", + schema.findColumnName(field.fieldId()), field.type(), minFormatVersion)); + } + + if (field.initialDefault() != null && formatVersion < DEFAULT_VALUES_MIN_FORMAT_VERSION) { + problems.put( + field.fieldId(), + String.format( + "Invalid initial default for %s: non-null default (%s) is not supported until v%s", + schema.findColumnName(field.fieldId()), + field.initialDefault(), + DEFAULT_VALUES_MIN_FORMAT_VERSION)); + } + } + + // throw if there are any compatibility problems + if (!problems.isEmpty()) { + throw new IllegalStateException( + String.format( + "Invalid schema for v%s:\n- %s", + formatVersion, Joiner.on("\n- ").join(problems.values()))); } } } diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java b/api/src/test/java/org/apache/iceberg/TestSchema.java new file mode 100644 index 000000000000..7cf27920cb54 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/TestSchema.java @@ -0,0 +1,114 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import org.apache.iceberg.types.Types; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class TestSchema { + private static final Schema TS_NANO_CASES = + new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.optional(2, "ts", Types.TimestampNanoType.withZone()), + Types.NestedField.optional( + 3, "arr", Types.ListType.ofRequired(4, Types.TimestampNanoType.withoutZone())), + Types.NestedField.required( + 5, + "struct", + Types.StructType.of( + Types.NestedField.optional(6, "inner_ts", Types.TimestampNanoType.withZone()), + Types.NestedField.required(7, "data", Types.StringType.get()))), + Types.NestedField.optional( + 8, + "struct_arr", + Types.StructType.of( + Types.NestedField.optional(9, "ts", Types.TimestampNanoType.withoutZone())))); + + private static final Schema INITIAL_DEFAULT_SCHEMA = + new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.required("has_default") + .withId(2) + .ofType(Types.StringType.get()) + .withInitialDefault("--") + .withWriteDefault("--") + .build()); + + private static final Schema WRITE_DEFAULT_SCHEMA = + new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.required("has_default") + .withId(2) + .ofType(Types.StringType.get()) + .withWriteDefault("--") + .build()); + + @ParameterizedTest + @ValueSource(ints = {1, 2}) + public void testUnsupportedTimestampNano(int formatVersion) { + Assertions.assertThatThrownBy(() -> Schema.checkCompatibility(TS_NANO_CASES, formatVersion)) + .isInstanceOf(IllegalStateException.class) + .hasMessage( + "Invalid schema for v%s:\n" + + "- Invalid type for ts: timestamptz_ns is not supported until v3\n" + + "- Invalid type for arr.element: timestamp_ns is not supported until v3\n" + + "- Invalid type for struct.inner_ts: timestamptz_ns is not supported until v3\n" + + "- Invalid type for struct_arr.ts: timestamp_ns is not supported until v3", + formatVersion); + } + + @Test + public void testSupportedTimestampNano() { + Assertions.assertThatCode(() -> Schema.checkCompatibility(TS_NANO_CASES, 3)) + .doesNotThrowAnyException(); + } + + @ParameterizedTest + @ValueSource(ints = {1, 2}) + public void testUnsupportedInitialDefault(int formatVersion) { + Assertions.assertThatThrownBy( + () -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, formatVersion)) + .isInstanceOf(IllegalStateException.class) + .hasMessage( + "Invalid schema for v%s:\n" + + "- Invalid initial default for has_default: " + + "non-null default (--) is not supported until v3", + formatVersion); + } + + @Test + public void testSupportedInitialDefault() { + Assertions.assertThatCode(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, 3)) + .doesNotThrowAnyException(); + } + + @ParameterizedTest + @ValueSource(ints = {1, 2, 3}) + public void testSupportedInitialDefault(int formatVersion) { + // only the initial default is a forward-incompatible change + Assertions.assertThatCode(() -> Schema.checkCompatibility(WRITE_DEFAULT_SCHEMA, formatVersion)) + .doesNotThrowAnyException(); + } +} From 24ebc85821a35b251bb68e9a3d661f74bded1966 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Wed, 30 Oct 2024 10:23:20 -0700 Subject: [PATCH 2/6] Apply spotless. --- .../java/org/apache/iceberg/TestSchema.java | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java b/api/src/test/java/org/apache/iceberg/TestSchema.java index 7cf27920cb54..d381824455be 100644 --- a/api/src/test/java/org/apache/iceberg/TestSchema.java +++ b/api/src/test/java/org/apache/iceberg/TestSchema.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg; import org.apache.iceberg.types.Types; From 986007036902fce7d269f8e16600c2e91530a8d7 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Wed, 30 Oct 2024 11:43:56 -0700 Subject: [PATCH 3/6] Fix test failures and method name. --- api/src/test/java/org/apache/iceberg/TestSchema.java | 2 +- core/src/test/java/org/apache/iceberg/TestTableMetadata.java | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java b/api/src/test/java/org/apache/iceberg/TestSchema.java index d381824455be..2bacb83d3e99 100644 --- a/api/src/test/java/org/apache/iceberg/TestSchema.java +++ b/api/src/test/java/org/apache/iceberg/TestSchema.java @@ -103,7 +103,7 @@ public void testSupportedInitialDefault() { @ParameterizedTest @ValueSource(ints = {1, 2, 3}) - public void testSupportedInitialDefault(int formatVersion) { + public void testSupportedWriteDefault(int formatVersion) { // only the initial default is a forward-incompatible change Assertions.assertThatCode(() -> Schema.checkCompatibility(WRITE_DEFAULT_SCHEMA, formatVersion)) .doesNotThrowAnyException(); diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 5ada35765773..71254b3abb1b 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -1672,7 +1672,8 @@ public void testV3TimestampNanoTypeSupport() { unsupportedFormatVersion)) .isInstanceOf(IllegalStateException.class) .hasMessage( - "Invalid type in v%s schema: struct.ts_nanos timestamptz_ns is not supported until v3", + "Invalid schema for v%s:\n" + + "- Invalid type for struct.ts_nanos: timestamptz_ns is not supported until v3", unsupportedFormatVersion); } From 4c9a8302ee775ebd27f327b94b2e6724df446196 Mon Sep 17 00:00:00 2001 From: Russell Spitzer Date: Wed, 30 Oct 2024 14:43:04 -0500 Subject: [PATCH 4/6] Fix CheckStyle Issues --- api/src/main/java/org/apache/iceberg/Schema.java | 3 +-- .../test/java/org/apache/iceberg/TestSchema.java | 14 ++++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java index 96ac381e9205..722c912ff1e7 100644 --- a/api/src/main/java/org/apache/iceberg/Schema.java +++ b/api/src/main/java/org/apache/iceberg/Schema.java @@ -27,7 +27,6 @@ import java.util.Locale; import java.util.Map; import java.util.Set; -import java.util.TreeMap; import java.util.stream.Collectors; import org.apache.iceberg.relocated.com.google.common.base.Joiner; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; @@ -589,7 +588,7 @@ private List reassignIds(List columns, TypeUtil.GetID */ public static void checkCompatibility(Schema schema, int formatVersion) { // accumulate errors as a treemap to keep them in a reasonable order - TreeMap problems = Maps.newTreeMap(); + Map problems = Maps.newTreeMap(); // check each field's type and defaults for (NestedField field : schema.lazyIdToField().values()) { diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java b/api/src/test/java/org/apache/iceberg/TestSchema.java index 2bacb83d3e99..e8b9609a865b 100644 --- a/api/src/test/java/org/apache/iceberg/TestSchema.java +++ b/api/src/test/java/org/apache/iceberg/TestSchema.java @@ -19,11 +19,13 @@ package org.apache.iceberg; import org.apache.iceberg.types.Types; -import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + public class TestSchema { private static final Schema TS_NANO_CASES = new Schema( @@ -65,7 +67,7 @@ public class TestSchema { @ParameterizedTest @ValueSource(ints = {1, 2}) public void testUnsupportedTimestampNano(int formatVersion) { - Assertions.assertThatThrownBy(() -> Schema.checkCompatibility(TS_NANO_CASES, formatVersion)) + assertThatThrownBy(() -> Schema.checkCompatibility(TS_NANO_CASES, formatVersion)) .isInstanceOf(IllegalStateException.class) .hasMessage( "Invalid schema for v%s:\n" @@ -78,14 +80,14 @@ public void testUnsupportedTimestampNano(int formatVersion) { @Test public void testSupportedTimestampNano() { - Assertions.assertThatCode(() -> Schema.checkCompatibility(TS_NANO_CASES, 3)) + assertThatCode(() -> Schema.checkCompatibility(TS_NANO_CASES, 3)) .doesNotThrowAnyException(); } @ParameterizedTest @ValueSource(ints = {1, 2}) public void testUnsupportedInitialDefault(int formatVersion) { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, formatVersion)) .isInstanceOf(IllegalStateException.class) .hasMessage( @@ -97,7 +99,7 @@ public void testUnsupportedInitialDefault(int formatVersion) { @Test public void testSupportedInitialDefault() { - Assertions.assertThatCode(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, 3)) + assertThatCode(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, 3)) .doesNotThrowAnyException(); } @@ -105,7 +107,7 @@ public void testSupportedInitialDefault() { @ValueSource(ints = {1, 2, 3}) public void testSupportedWriteDefault(int formatVersion) { // only the initial default is a forward-incompatible change - Assertions.assertThatCode(() -> Schema.checkCompatibility(WRITE_DEFAULT_SCHEMA, formatVersion)) + assertThatCode(() -> Schema.checkCompatibility(WRITE_DEFAULT_SCHEMA, formatVersion)) .doesNotThrowAnyException(); } } From 55b9b1e10967cfeaae0483d14d98a6e3338f8ced Mon Sep 17 00:00:00 2001 From: Russell Spitzer Date: Wed, 30 Oct 2024 14:51:36 -0500 Subject: [PATCH 5/6] Spotless Apply --- api/src/test/java/org/apache/iceberg/TestSchema.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java b/api/src/test/java/org/apache/iceberg/TestSchema.java index e8b9609a865b..fec7343c5cbc 100644 --- a/api/src/test/java/org/apache/iceberg/TestSchema.java +++ b/api/src/test/java/org/apache/iceberg/TestSchema.java @@ -18,14 +18,14 @@ */ package org.apache.iceberg; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; -import static org.assertj.core.api.Assertions.assertThatCode; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - public class TestSchema { private static final Schema TS_NANO_CASES = new Schema( @@ -80,15 +80,13 @@ public void testUnsupportedTimestampNano(int formatVersion) { @Test public void testSupportedTimestampNano() { - assertThatCode(() -> Schema.checkCompatibility(TS_NANO_CASES, 3)) - .doesNotThrowAnyException(); + assertThatCode(() -> Schema.checkCompatibility(TS_NANO_CASES, 3)).doesNotThrowAnyException(); } @ParameterizedTest @ValueSource(ints = {1, 2}) public void testUnsupportedInitialDefault(int formatVersion) { - assertThatThrownBy( - () -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, formatVersion)) + assertThatThrownBy(() -> Schema.checkCompatibility(INITIAL_DEFAULT_SCHEMA, formatVersion)) .isInstanceOf(IllegalStateException.class) .hasMessage( "Invalid schema for v%s:\n" From 534cdab018c36c43638e428b501df45ef7732434 Mon Sep 17 00:00:00 2001 From: Russell Spitzer Date: Wed, 30 Oct 2024 14:58:42 -0500 Subject: [PATCH 6/6] Update api/src/main/java/org/apache/iceberg/Schema.java Co-authored-by: Fokko Driesprong --- api/src/main/java/org/apache/iceberg/Schema.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java index 722c912ff1e7..44f65ff56a54 100644 --- a/api/src/main/java/org/apache/iceberg/Schema.java +++ b/api/src/main/java/org/apache/iceberg/Schema.java @@ -54,7 +54,7 @@ public class Schema implements Serializable { private static final Joiner NEWLINE = Joiner.on('\n'); private static final String ALL_COLUMNS = "*"; private static final int DEFAULT_SCHEMA_ID = 0; - private static final Integer DEFAULT_VALUES_MIN_FORMAT_VERSION = 3; + private static final int DEFAULT_VALUES_MIN_FORMAT_VERSION = 3; private static final Map MIN_FORMAT_VERSIONS = ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3);