-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] [parquet] Refactory parquet reader using spark code. #4982
base: master
Are you sure you want to change the base?
Conversation
3df041e
to
0254422
Compare
|
||
@Override | ||
public VectorizedColumnBatch getBatch() { | ||
return new VectorizedColumnBatch(children); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use batch from constructor
import org.apache.paimon.data.columnar.MapColumnVector; | ||
|
||
/** Wrap for MapColumnVector. */ | ||
public class WrapMapColumnVector implements MapColumnVector { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CastedMapColumnVector
?
import org.apache.paimon.data.columnar.ColumnarArray; | ||
|
||
/** Wrap for ArrayColumnVector. */ | ||
public class WrapArrayColumnVector implements ArrayColumnVector { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CastedArrayColumnVector
import org.apache.paimon.data.columnar.VectorizedColumnBatch; | ||
|
||
/** Wrap for RowColumnVector. */ | ||
public class WrapRowColumnVector implements RowColumnVector { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CastedRowColumnVector
implements WritableColumnVector { | ||
|
||
protected ColumnVector[] children; | ||
protected long[] offsets; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should introduce a AbstractArrayBasedVector
for array and map to contain offsets and lenths..
columnarRow.setRowId(i); | ||
return columnarRow; | ||
} | ||
|
||
@Override | ||
public VectorizedColumnBatch getBatch() { | ||
return new VectorizedColumnBatch(fields); | ||
return new VectorizedColumnBatch(children); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use vectorizedColumnBatch
import org.apache.paimon.data.columnar.ColumnVector; | ||
import org.apache.paimon.data.columnar.ColumnarArray; | ||
|
||
/** Wrap for ArrayColumnVector. */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add detailed comments.
import org.apache.paimon.data.columnar.ColumnarMap; | ||
import org.apache.paimon.data.columnar.MapColumnVector; | ||
|
||
/** Wrap for MapColumnVector. */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add detailed comments.
import org.apache.paimon.data.columnar.RowColumnVector; | ||
import org.apache.paimon.data.columnar.VectorizedColumnBatch; | ||
|
||
/** Test for RowColumnVector. */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add detailed comments.
@@ -46,4 +46,10 @@ public interface WritableIntVector extends WritableColumnVector, IntColumnVector | |||
|
|||
/** Fill the column vector with the provided value. */ | |||
void fill(int value); | |||
|
|||
@SuppressWarnings("unused") | |||
int appendInt(int v); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
void appendInt
int appendInt(int v); | ||
|
||
@SuppressWarnings("unused") | ||
int appendInts(int count, int v); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
void appendInts
@@ -204,6 +213,16 @@ public void fill(int value) { | |||
} | |||
} | |||
|
|||
@Override | |||
public int appendInt(int v) { | |||
return 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (vector instanceof WritableBytesVector) {
xxxxxx
}
|
||
@Override | ||
public int appendInts(int count, int v) { | ||
return 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto
@@ -262,7 +275,7 @@ void testContinuousRepetition(int rowGroupSize) throws IOException { | |||
@ParameterizedTest | |||
@MethodSource("parameters") | |||
void testLargeValue(int rowGroupSize) throws IOException { | |||
int number = 10000; | |||
int number = 1000; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
revert test codes?
@@ -569,15 +569,9 @@ abstract class InsertOverwriteTableTestBase extends PaimonSparkTestBase { | |||
s"CREATE TABLE t (i INT, s STRUCT<f1: INT, f2: INT>) TBLPROPERTIES ('file.format' = '$format')") | |||
sql( | |||
"INSERT INTO t VALUES (1, STRUCT(1, 1)), (2, null), (3, STRUCT(1, null)), (4, STRUCT(null, null))") | |||
if (format.equals("parquet")) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Zouxxyy Fix the bug here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fix #4785
Purpose
This pull request is not ready.
This closes #4785 too
Tests
API and Format
Documentation