From 35548e6d30211cf155a366da2ad736d1281367bf Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 1 Feb 2018 07:55:30 +0000 Subject: [PATCH] The change for map support. --- .../vectorized/WritableColumnVector.java | 1 + .../spark/sql/vectorized/ColumnVector.java | 4 ++-- .../spark/sql/vectorized/ColumnarRow.java | 1 - .../vectorized/ColumnarBatchSuite.scala | 24 +++++++++++++++++++ 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java index 54b527f74ea5e..5275e4a91eac0 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java @@ -624,6 +624,7 @@ public final ColumnarArray getArray(int rowId) { // second child column vector, and puts the offsets and lengths in the current column vector. @Override public final ColumnarMap getMap(int rowId) { + if (isNullAt(rowId)) return null; return new ColumnarMap(getChild(0), getChild(1), getArrayOffset(rowId), getArrayLength(rowId)); } diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java index 834c26b705bd6..a02a600676f1c 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java @@ -233,13 +233,13 @@ public final ColumnarRow getStruct(int rowId) { public abstract ColumnarArray getArray(int rowId); /** - * Returns the map type value for rowId. + * Returns the map type value for rowId. If the slot for rowId is null, it should return null. * * In Spark, map type value is basically a key data array and a value data array. A key from the * key array with a index and a value from the value array with the same index contribute to * an entry of this map type value. * - * To support map type, implementations must construct an {@link ColumnarMap} and return it in + * To support map type, implementations must construct a {@link ColumnarMap} and return it in * this method. {@link ColumnarMap} requires a {@link ColumnVector} that stores the data of all * the keys of all the maps in this vector, and another {@link ColumnVector} that stores the data * of all the values of all the maps in this vector, and a pair of offset and length which diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java index 32433fdb7b9ff..62631efc85b14 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java +++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java @@ -150,7 +150,6 @@ public ColumnarArray getArray(int ordinal) { @Override public ColumnarMap getMap(int ordinal) { - if (data.getChild(ordinal).isNullAt(rowId)) return null; return data.getChild(ordinal).getMap(rowId); } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala index 06715a5cd01d2..ec1e51ad78e7c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala @@ -1373,4 +1373,28 @@ class ColumnarBatchSuite extends SparkFunSuite { column.putByteArray(idx, "Hello".getBytes(StandardCharsets.UTF_8)) assert(column.getBinary(idx) != null) } + + testVector("getMap should return null for null slot", 4, + new MapType(IntegerType, IntegerType, false)) { column => + assert(column.numNulls() == 0) + + var idx = 0 + column.putNull(idx) + assert(column.getBinary(idx) == null) + idx += 1 + column.putNull(idx) + assert(column.getBinary(idx) == null) + assert(column.numNulls() == 2) + + idx += 1 + val keyCol = column.getChild(0) + keyCol.putInt(0, 0) + keyCol.putInt(1, 1) + val valueCol = column.getChild(1) + valueCol.putInt(0, 0) + valueCol.putInt(1, 2) + + column.putArray(idx, 0, 2) + assert(column.getMap(idx) != null) + } }