Skip to content

Commit

Permalink
[GLUTEN-6961][VL][feat] Add decimal write support for ArrowWritableCo…
Browse files Browse the repository at this point in the history
…lumnVector (apache#6962)

Closes apache#6961
  • Loading branch information
jinchengchenghh authored and shamirchen committed Oct 14, 2024
1 parent 7625006 commit 9b6d857
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.gluten.vectorized;

import org.apache.spark.sql.execution.vectorized.MutableColumnarRow;
import org.apache.spark.sql.types.Decimal;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.util.TaskResources$;
import org.junit.Assert;
import org.junit.Test;

public class ArrowColumnVectorTest {

@Test
public void testWriteByMutableColumnarRow() {
TaskResources$.MODULE$.runUnsafe(
() -> {
final ArrowWritableColumnVector[] columns = newArrowColumns("a decimal(20, 1)", 20);
MutableColumnarRow row = new MutableColumnarRow(columns);
Decimal decimal = new Decimal();
decimal.set(234, 20, 1);
row.setDecimal(0, decimal, 20);
Assert.assertEquals(row.getDecimal(0, 20, 1), decimal);
return null;
});
}

private static ArrowWritableColumnVector[] newArrowColumns(String schema, int numRows) {
ArrowWritableColumnVector[] columns =
ArrowWritableColumnVector.allocateColumns(numRows, StructType.fromDDL(schema));
for (ArrowWritableColumnVector col : columns) {
col.setValueCount(numRows);
}
return columns;
}
}
50 changes: 49 additions & 1 deletion cpp/velox/tests/VeloxRowToColumnarTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,58 @@ TEST_F(VeloxRowToColumnarTest, allTypes) {
makeNullableFlatVector<bool>(
{std::nullopt, true, false, std::nullopt, true, true, false, true, std::nullopt, std::nullopt}),
makeFlatVector<velox::StringView>(
{"alice0", "bob1", "alice2", "bob3", "Alice4", "Bob5", "AlicE6", "boB7", "ALICE8", "BOB9"}),
{"alice0",
"bob1",
"alice2",
"bob3",
"Alice4",
"Bob5123456789098766notinline",
"AlicE6",
"boB7",
"ALICE8",
"BOB9"}),
makeNullableFlatVector<velox::StringView>(
{"alice", "bob", std::nullopt, std::nullopt, "Alice", "Bob", std::nullopt, "alicE", std::nullopt, "boB"}),
});
testRowVectorEqual(vector);
}

TEST_F(VeloxRowToColumnarTest, bigint) {
auto vector = makeRowVector({
makeNullableFlatVector<int64_t>({1, 2, 3, std::nullopt, 4, std::nullopt, 5, 6, std::nullopt, 7}),
});
testRowVectorEqual(vector);
}

TEST_F(VeloxRowToColumnarTest, decimal) {
auto vector = makeRowVector({
makeNullableFlatVector<int128_t>(
{123456, HugeInt::build(1045, 1789), 3678, std::nullopt, 4, std::nullopt, 5, 687987, std::nullopt, 7},
DECIMAL(38, 2)),
makeNullableFlatVector<int64_t>(
{178987, 2, 3, std::nullopt, 4, std::nullopt, 5, 6, std::nullopt, 7}, DECIMAL(12, 3)),
});
testRowVectorEqual(vector);
}

TEST_F(VeloxRowToColumnarTest, timestamp) {
auto vector = makeRowVector({
makeNullableFlatVector<Timestamp>(
{Timestamp(-946684800, 0),
Timestamp(-7266, 0),
Timestamp(0, 0),
Timestamp(946684800, 0),
Timestamp(9466848000, 0),
Timestamp(94668480000, 0),
Timestamp(946729316, 0),
Timestamp(946729316, 0),
Timestamp(946729316, 0),
Timestamp(7266, 0),
Timestamp(-50049331200, 0),
Timestamp(253405036800, 0),
Timestamp(-62480037600, 0),
std::nullopt}),
});
testRowVectorEqual(vector);
}
} // namespace gluten
Original file line number Diff line number Diff line change
Expand Up @@ -1256,7 +1256,11 @@ void setNull(int rowId) {
}

void setNotNull(int rowId) {
throw new UnsupportedOperationException();
// Arrow Java library doesn't usually expose this API from its vectors. So we have to
// allow no-op here than throwing exceptions which could fail caller. And basically it's
// acceptable because finally Spark will set value after this method returned,
// During which Arrow Java will set the validity buffer anyway. As if the call to
// `setNotNull` is just deferred.
}

void setNulls(int rowId, int count) {
Expand Down Expand Up @@ -1745,6 +1749,14 @@ final void setLong(int rowId, long value) {
final void setBytes(int rowId, BigDecimal value) {
writer.setSafe(rowId, value);
}

final void setBytes(int rowId, int count, byte[] src, int srcIndex) {
if (count == src.length && srcIndex == 0) {
writer.setBigEndianSafe(rowId, src);
return;
}
throw new UnsupportedOperationException();
}
}

private static class StringWriter extends ArrowVectorWriter {
Expand Down

0 comments on commit 9b6d857

Please sign in to comment.