Skip to content

Commit 1b255c0

Browse files
slessardsl255051
authored andcommitted
OTF-920 - fix NullPointerException
Handle case where the VectorHolder contains a null value
1 parent 66f589d commit 1b255c0

File tree

3 files changed

+139
-1
lines changed

3 files changed

+139
-1
lines changed

arrow/src/main/java/org/apache/iceberg/arrow/vectorized/GenericArrowVectorAccessorFactory.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,8 @@ private ArrowVectorAccessor<DecimalT, Utf8StringT, ArrayT, ChildVectorT> getPlai
221221
return new FixedSizeBinaryAccessor<>(
222222
(FixedSizeBinaryVector) vector, stringFactorySupplier.get());
223223
}
224-
throw new UnsupportedOperationException("Unsupported vector: " + vector.getClass());
224+
String vectorName = (vector == null) ? "null" : vector.getClass().toString();
225+
throw new UnsupportedOperationException("Unsupported vector: " + vectorName);
225226
}
226227

227228
private static boolean isDecimal(PrimitiveType primitive) {

arrow/src/test/java/org/apache/iceberg/arrow/vectorized/ArrowReaderTest.java

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import static org.apache.iceberg.Files.localInput;
2222
import static org.assertj.core.api.Assertions.assertThat;
23+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
2324

2425
import java.io.File;
2526
import java.io.IOException;
@@ -59,6 +60,7 @@
5960
import org.apache.arrow.vector.types.pojo.ArrowType;
6061
import org.apache.arrow.vector.types.pojo.Field;
6162
import org.apache.arrow.vector.types.pojo.FieldType;
63+
import org.apache.iceberg.AppendFiles;
6264
import org.apache.iceberg.DataFile;
6365
import org.apache.iceberg.DataFiles;
6466
import org.apache.iceberg.FileFormat;
@@ -263,6 +265,55 @@ public void testReadColumnFilter2() throws Exception {
263265
scan, NUM_ROWS_PER_MONTH, 12 * NUM_ROWS_PER_MONTH, ImmutableList.of("timestamp"));
264266
}
265267

268+
@Test
269+
public void testThrowsUOEWhenNewColumnHasNoValue() throws Exception {
270+
rowsWritten = Lists.newArrayList();
271+
tables = new HadoopTables();
272+
273+
Schema schema =
274+
new Schema(
275+
Types.NestedField.required(1, "a", Types.IntegerType.get()),
276+
Types.NestedField.optional(2, "b", Types.StringType.get()),
277+
Types.NestedField.required(3, "c", Types.DecimalType.of(12, 3)));
278+
279+
PartitionSpec spec = PartitionSpec.builderFor(schema).build();
280+
Table table1 = tables.create(schema, spec, tableLocation);
281+
282+
// Add one record to the table
283+
GenericRecord rec = GenericRecord.create(schema);
284+
rec.setField("a", 1);
285+
rec.setField("b", "san diego");
286+
rec.setField("c", new BigDecimal("1024.025"));
287+
List<GenericRecord> genericRecords = Lists.newArrayList();
288+
genericRecords.add(rec);
289+
290+
AppendFiles appendFiles = table1.newAppend();
291+
appendFiles.appendFile(writeParquetFile(table1, genericRecords));
292+
appendFiles.commit();
293+
294+
// Alter the table schema by adding a new, optional column.
295+
// Do not add any data for this new column in the one existing row in the table
296+
// and do not insert any new rows into the table.
297+
Table table = tables.load(tableLocation);
298+
table.updateSchema().addColumn("a1", Types.IntegerType.get()).commit();
299+
300+
// Select all columns, all rows from the table
301+
TableScan scan = table.newScan().select("*");
302+
303+
assertThatThrownBy(
304+
() -> {
305+
// Read the data.
306+
try (VectorizedTableScanIterable itr =
307+
new VectorizedTableScanIterable(scan, 1000, false)) {
308+
for (ColumnarBatch batch : itr) {
309+
// no-op
310+
}
311+
}
312+
})
313+
.isInstanceOf(UnsupportedOperationException.class)
314+
.hasMessage("Unsupported vector: null");
315+
}
316+
266317
/**
267318
* The test asserts that {@link CloseableIterator#hasNext()} returned by the {@link ArrowReader}
268319
* is idempotent.
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.iceberg.arrow.vectorized;
20+
21+
import static org.assertj.core.api.Assertions.assertThat;
22+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
23+
import static org.mockito.Mockito.mock;
24+
import static org.mockito.Mockito.when;
25+
26+
import java.math.BigDecimal;
27+
import java.util.function.Supplier;
28+
import org.apache.arrow.vector.IntVector;
29+
import org.apache.iceberg.types.Types;
30+
import org.apache.parquet.column.ColumnDescriptor;
31+
import org.apache.parquet.schema.PrimitiveType;
32+
import org.junit.jupiter.api.BeforeEach;
33+
import org.junit.jupiter.api.Test;
34+
import org.mockito.InjectMocks;
35+
import org.mockito.Mock;
36+
import org.mockito.MockitoAnnotations;
37+
38+
class GenericArrowVectorAccessorFactoryTest {
39+
@Mock
40+
Supplier<GenericArrowVectorAccessorFactory.DecimalFactory<BigDecimal>> decimalFactorySupplier;
41+
42+
@Mock Supplier<GenericArrowVectorAccessorFactory.StringFactory<String>> stringFactorySupplier;
43+
44+
@Mock
45+
Supplier<GenericArrowVectorAccessorFactory.StructChildFactory<Integer>>
46+
structChildFactorySupplier;
47+
48+
@Mock
49+
Supplier<GenericArrowVectorAccessorFactory.ArrayFactory<Integer, Integer[]>> arrayFactorySupplier;
50+
51+
@InjectMocks GenericArrowVectorAccessorFactory genericArrowVectorAccessorFactory;
52+
53+
@BeforeEach
54+
void before() {
55+
MockitoAnnotations.openMocks(this);
56+
}
57+
58+
@Test
59+
void testGetVectorAccessorWithIntVector() {
60+
IntVector vector = mock(IntVector.class);
61+
when(vector.get(0)).thenReturn(88);
62+
63+
Types.NestedField nestedField = Types.NestedField.optional(0, "a1", Types.IntegerType.get());
64+
ColumnDescriptor columnDescriptor =
65+
new ColumnDescriptor(
66+
new String[] {nestedField.name()}, PrimitiveType.PrimitiveTypeName.INT32, 0, 1);
67+
NullabilityHolder nullabilityHolder = new NullabilityHolder(10000);
68+
VectorHolder vectorHolder =
69+
new VectorHolder(columnDescriptor, vector, false, null, nullabilityHolder, nestedField);
70+
ArrowVectorAccessor actual = genericArrowVectorAccessorFactory.getVectorAccessor(vectorHolder);
71+
assertThat(actual).isNotNull();
72+
assertThat(actual).isInstanceOf(ArrowVectorAccessor.class);
73+
int intValue = actual.getInt(0);
74+
assertThat(intValue).isEqualTo(88);
75+
}
76+
77+
@Test
78+
void testGetVectorAccessorWithNullVector() {
79+
assertThatThrownBy(
80+
() -> {
81+
genericArrowVectorAccessorFactory.getVectorAccessor(VectorHolder.dummyHolder(1));
82+
})
83+
.isInstanceOf(UnsupportedOperationException.class)
84+
.hasMessage("Unsupported vector: null");
85+
}
86+
}

0 commit comments

Comments
 (0)