From 1b255c065b46b6e18b1a9a86ce5e544b8bfc2a02 Mon Sep 17 00:00:00 2001 From: Steve Lessard Date: Fri, 28 Jun 2024 13:48:01 -0700 Subject: [PATCH] OTF-920 - fix NullPointerException Handle case where the VectorHolder contains a null value --- .../GenericArrowVectorAccessorFactory.java | 3 +- .../arrow/vectorized/ArrowReaderTest.java | 51 +++++++++++ ...GenericArrowVectorAccessorFactoryTest.java | 86 +++++++++++++++++++ 3 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 arrow/src/test/java/org/apache/iceberg/arrow/vectorized/GenericArrowVectorAccessorFactoryTest.java diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/GenericArrowVectorAccessorFactory.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/GenericArrowVectorAccessorFactory.java index a988516bc6df..46e9ff141fa7 100644 --- a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/GenericArrowVectorAccessorFactory.java +++ b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/GenericArrowVectorAccessorFactory.java @@ -221,7 +221,8 @@ private ArrowVectorAccessor getPlai return new FixedSizeBinaryAccessor<>( (FixedSizeBinaryVector) vector, stringFactorySupplier.get()); } - throw new UnsupportedOperationException("Unsupported vector: " + vector.getClass()); + String vectorName = (vector == null) ? "null" : vector.getClass().toString(); + throw new UnsupportedOperationException("Unsupported vector: " + vectorName); } private static boolean isDecimal(PrimitiveType primitive) { diff --git a/arrow/src/test/java/org/apache/iceberg/arrow/vectorized/ArrowReaderTest.java b/arrow/src/test/java/org/apache/iceberg/arrow/vectorized/ArrowReaderTest.java index 9cd9c8cc5abf..1c33eaa02aec 100644 --- a/arrow/src/test/java/org/apache/iceberg/arrow/vectorized/ArrowReaderTest.java +++ b/arrow/src/test/java/org/apache/iceberg/arrow/vectorized/ArrowReaderTest.java @@ -20,6 +20,7 @@ import static org.apache.iceberg.Files.localInput; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.File; import java.io.IOException; @@ -59,6 +60,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.iceberg.AppendFiles; import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; import org.apache.iceberg.FileFormat; @@ -263,6 +265,55 @@ public void testReadColumnFilter2() throws Exception { scan, NUM_ROWS_PER_MONTH, 12 * NUM_ROWS_PER_MONTH, ImmutableList.of("timestamp")); } + @Test + public void testThrowsUOEWhenNewColumnHasNoValue() throws Exception { + rowsWritten = Lists.newArrayList(); + tables = new HadoopTables(); + + Schema schema = + new Schema( + Types.NestedField.required(1, "a", Types.IntegerType.get()), + Types.NestedField.optional(2, "b", Types.StringType.get()), + Types.NestedField.required(3, "c", Types.DecimalType.of(12, 3))); + + PartitionSpec spec = PartitionSpec.builderFor(schema).build(); + Table table1 = tables.create(schema, spec, tableLocation); + + // Add one record to the table + GenericRecord rec = GenericRecord.create(schema); + rec.setField("a", 1); + rec.setField("b", "san diego"); + rec.setField("c", new BigDecimal("1024.025")); + List genericRecords = Lists.newArrayList(); + genericRecords.add(rec); + + AppendFiles appendFiles = table1.newAppend(); + appendFiles.appendFile(writeParquetFile(table1, genericRecords)); + appendFiles.commit(); + + // Alter the table schema by adding a new, optional column. + // Do not add any data for this new column in the one existing row in the table + // and do not insert any new rows into the table. + Table table = tables.load(tableLocation); + table.updateSchema().addColumn("a1", Types.IntegerType.get()).commit(); + + // Select all columns, all rows from the table + TableScan scan = table.newScan().select("*"); + + assertThatThrownBy( + () -> { + // Read the data. + try (VectorizedTableScanIterable itr = + new VectorizedTableScanIterable(scan, 1000, false)) { + for (ColumnarBatch batch : itr) { + // no-op + } + } + }) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Unsupported vector: null"); + } + /** * The test asserts that {@link CloseableIterator#hasNext()} returned by the {@link ArrowReader} * is idempotent. diff --git a/arrow/src/test/java/org/apache/iceberg/arrow/vectorized/GenericArrowVectorAccessorFactoryTest.java b/arrow/src/test/java/org/apache/iceberg/arrow/vectorized/GenericArrowVectorAccessorFactoryTest.java new file mode 100644 index 000000000000..5712688e68d6 --- /dev/null +++ b/arrow/src/test/java/org/apache/iceberg/arrow/vectorized/GenericArrowVectorAccessorFactoryTest.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.arrow.vectorized; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.math.BigDecimal; +import java.util.function.Supplier; +import org.apache.arrow.vector.IntVector; +import org.apache.iceberg.types.Types; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.schema.PrimitiveType; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +class GenericArrowVectorAccessorFactoryTest { + @Mock + Supplier> decimalFactorySupplier; + + @Mock Supplier> stringFactorySupplier; + + @Mock + Supplier> + structChildFactorySupplier; + + @Mock + Supplier> arrayFactorySupplier; + + @InjectMocks GenericArrowVectorAccessorFactory genericArrowVectorAccessorFactory; + + @BeforeEach + void before() { + MockitoAnnotations.openMocks(this); + } + + @Test + void testGetVectorAccessorWithIntVector() { + IntVector vector = mock(IntVector.class); + when(vector.get(0)).thenReturn(88); + + Types.NestedField nestedField = Types.NestedField.optional(0, "a1", Types.IntegerType.get()); + ColumnDescriptor columnDescriptor = + new ColumnDescriptor( + new String[] {nestedField.name()}, PrimitiveType.PrimitiveTypeName.INT32, 0, 1); + NullabilityHolder nullabilityHolder = new NullabilityHolder(10000); + VectorHolder vectorHolder = + new VectorHolder(columnDescriptor, vector, false, null, nullabilityHolder, nestedField); + ArrowVectorAccessor actual = genericArrowVectorAccessorFactory.getVectorAccessor(vectorHolder); + assertThat(actual).isNotNull(); + assertThat(actual).isInstanceOf(ArrowVectorAccessor.class); + int intValue = actual.getInt(0); + assertThat(intValue).isEqualTo(88); + } + + @Test + void testGetVectorAccessorWithNullVector() { + assertThatThrownBy( + () -> { + genericArrowVectorAccessorFactory.getVectorAccessor(VectorHolder.dummyHolder(1)); + }) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Unsupported vector: null"); + } +}