From 451e5b0519de64d929da4f2534b13432193b66ce Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Sat, 3 Sep 2022 11:28:00 -0700 Subject: [PATCH] address comments --- cpp/src/lance/encodings/binary.h | 2 +- cpp/src/lance/encodings/plain.cc | 4 ++-- cpp/src/lance/encodings/plain_test.cc | 7 ++++--- cpp/src/lance/io/exec/filter.cc | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/cpp/src/lance/encodings/binary.h b/cpp/src/lance/encodings/binary.h index 1173c47b4d..befdd5a04f 100644 --- a/cpp/src/lance/encodings/binary.h +++ b/cpp/src/lance/encodings/binary.h @@ -103,7 +103,7 @@ ::arrow::Result> VarBinaryDecoder::ToArray( return ::arrow::Status::IndexError( fmt::format("VarBinaryDecoder::ToArray: out of range: start={} length={} page_length={}\n", start, - length.value_or(length_), + length.value_or(-1), length_)); } diff --git a/cpp/src/lance/encodings/plain.cc b/cpp/src/lance/encodings/plain.cc index bc19a2a216..bb6ccb0a3b 100644 --- a/cpp/src/lance/encodings/plain.cc +++ b/cpp/src/lance/encodings/plain.cc @@ -134,9 +134,9 @@ class PlainDecoderImpl : public Decoder { length.value_or(-1), length_)); } - auto byte_length = type_->byte_width(); + auto byte_width = type_->byte_width(); ARROW_ASSIGN_OR_RAISE(auto buf, - infile_->ReadAt(position_ + start * byte_length, len * byte_length)); + infile_->ReadAt(position_ + start * byte_width, len * byte_width)); return std::make_shared(type_, len, buf); } diff --git a/cpp/src/lance/encodings/plain_test.cc b/cpp/src/lance/encodings/plain_test.cc index 0e73fc58b7..52b35f410f 100644 --- a/cpp/src/lance/encodings/plain_test.cc +++ b/cpp/src/lance/encodings/plain_test.cc @@ -46,8 +46,8 @@ TEST_CASE("Test Write Int32 array") { } } -TEST_CASE("Read not full batch") { - auto arr = lance::arrow::ToArray({1, 2, 3, 4, 5, 6, 7, 8}).ValueOrDie(); +TEST_CASE("Do not read full batch") { + auto arr = lance::arrow::ToArray({10, 20, 30, 40, 50, 60, 70, 80}).ValueOrDie(); CHECK(arr->length() == 8); auto sink = arrow::io::BufferOutputStream::Create().ValueOrDie(); @@ -60,8 +60,9 @@ TEST_CASE("Read not full batch") { CHECK(decoder.Init().ok()); decoder.Reset(offset, arr->length()); + // Decode arr[6:8] auto values = decoder.ToArray(6, 8).ValueOrDie(); - CHECK(values->Equals(lance::arrow::ToArray({7, 8}).ValueOrDie())); + CHECK(values->Equals(lance::arrow::ToArray({70, 80}).ValueOrDie())); } TEST_CASE("Test take plain values") { diff --git a/cpp/src/lance/io/exec/filter.cc b/cpp/src/lance/io/exec/filter.cc index 2f438a519e..9396277b8b 100644 --- a/cpp/src/lance/io/exec/filter.cc +++ b/cpp/src/lance/io/exec/filter.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "filter.h" +#include "lance/io/exec/filter.h" #include #include @@ -45,7 +45,6 @@ ::arrow::Result Filter::Next() { } ARROW_ASSIGN_OR_RAISE(auto indices_and_values, Apply(*batch.batch)); auto [indices, values] = indices_and_values; - assert(indices->length() == values->num_rows()); ARROW_ASSIGN_OR_RAISE(auto values_arr, values->ToStructArray()); ARROW_ASSIGN_OR_RAISE( auto struct_arr, @@ -71,6 +70,7 @@ Filter::Apply(const ::arrow::RecordBatch& batch) const { auto indices = std::static_pointer_cast<::arrow::Int32Array>(indices_datum.make_array()); auto values_arr = values.make_array(); ARROW_ASSIGN_OR_RAISE(auto result_batch, ::arrow::RecordBatch::FromStructArray(values_arr)); + assert(indices->length() == result_batch->num_rows()); return std::make_tuple(indices, result_batch); }