Skip to content

Commit

Permalink
[Minor] Small refactor of raw index creator constructor to be more cl…
Browse files Browse the repository at this point in the history
…ear (#13093)
  • Loading branch information
Jackie-Jiang authored May 7, 2024
1 parent e07b576 commit b4dfd04
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,17 @@ public MultiValueFixedByteRawIndexCreator(File indexFile, ChunkCompressionType c
throws IOException {
// Store the length followed by the values
int totalMaxLength = Integer.BYTES + (maxNumberOfMultiValueElements * valueType.getStoredType().size());
int numDocsPerChunk = deriveNumDocsPerChunk ? Math.max(
targetMaxChunkSizeBytes / (totalMaxLength + VarByteChunkForwardIndexWriter.CHUNK_HEADER_ENTRY_ROW_OFFSET_SIZE),
1) : targetDocsPerChunk;
// For columns with very small max value, target chunk size should also be capped to reduce memory during read
int dynamicTargetChunkSize =
ForwardIndexUtils.getDynamicTargetChunkSize(totalMaxLength, targetDocsPerChunk, targetMaxChunkSizeBytes);
_indexWriter =
writerVersion < VarByteChunkForwardIndexWriterV4.VERSION ? new VarByteChunkForwardIndexWriter(indexFile,
compressionType, totalDocs, numDocsPerChunk, totalMaxLength, writerVersion)
: new VarByteChunkForwardIndexWriterV4(indexFile, compressionType, dynamicTargetChunkSize);
if (writerVersion < VarByteChunkForwardIndexWriterV4.VERSION) {
int numDocsPerChunk = deriveNumDocsPerChunk ? Math.max(targetMaxChunkSizeBytes / (totalMaxLength
+ VarByteChunkForwardIndexWriter.CHUNK_HEADER_ENTRY_ROW_OFFSET_SIZE), 1) : targetDocsPerChunk;
_indexWriter =
new VarByteChunkForwardIndexWriter(indexFile, compressionType, totalDocs, numDocsPerChunk, totalMaxLength,
writerVersion);
} else {
int chunkSize =
ForwardIndexUtils.getDynamicTargetChunkSize(totalMaxLength, targetDocsPerChunk, targetMaxChunkSizeBytes);
_indexWriter = new VarByteChunkForwardIndexWriterV4(indexFile, compressionType, chunkSize);
}
_valueType = valueType;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,20 @@ public MultiValueVarByteRawIndexCreator(File baseIndexDir, ChunkCompressionType
int totalDocs, DataType valueType, int writerVersion, int maxRowLengthInBytes, int maxNumberOfElements,
int targetMaxChunkSizeBytes, int targetDocsPerChunk)
throws IOException {
//we will prepend the actual content with numElements and length array containing length of each element
int totalMaxLength = getTotalRowStorageBytes(maxNumberOfElements, maxRowLengthInBytes);

File file = new File(baseIndexDir, column + Indexes.RAW_MV_FORWARD_INDEX_FILE_EXTENSION);
int numDocsPerChunk = Math.max(
targetMaxChunkSizeBytes / (totalMaxLength + VarByteChunkForwardIndexWriter.CHUNK_HEADER_ENTRY_ROW_OFFSET_SIZE),
1);
// For columns with very small max value, target chunk size should also be capped to reduce memory during read
int dynamicTargetChunkSize =
ForwardIndexUtils.getDynamicTargetChunkSize(totalMaxLength, targetDocsPerChunk, targetMaxChunkSizeBytes);
_indexWriter = writerVersion < VarByteChunkForwardIndexWriterV4.VERSION ? new VarByteChunkForwardIndexWriter(file,
compressionType, totalDocs, numDocsPerChunk, totalMaxLength, writerVersion)
: new VarByteChunkForwardIndexWriterV4(file, compressionType, dynamicTargetChunkSize);
// We will prepend the actual content with numElements and length array containing length of each element
int totalMaxLength = getTotalRowStorageBytes(maxNumberOfElements, maxRowLengthInBytes);
if (writerVersion < VarByteChunkForwardIndexWriterV4.VERSION) {
int numDocsPerChunk = Math.max(targetMaxChunkSizeBytes / (totalMaxLength
+ VarByteChunkForwardIndexWriter.CHUNK_HEADER_ENTRY_ROW_OFFSET_SIZE), 1);
_indexWriter =
new VarByteChunkForwardIndexWriter(file, compressionType, totalDocs, numDocsPerChunk, totalMaxLength,
writerVersion);
} else {
int chunkSize =
ForwardIndexUtils.getDynamicTargetChunkSize(totalMaxLength, targetDocsPerChunk, targetMaxChunkSizeBytes);
_indexWriter = new VarByteChunkForwardIndexWriterV4(file, compressionType, chunkSize);
}
_valueType = valueType;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,16 @@ public SingleValueVarByteRawIndexCreator(File baseIndexDir, ChunkCompressionType
int targetMaxChunkSizeBytes, int targetDocsPerChunk)
throws IOException {
File file = new File(baseIndexDir, column + V1Constants.Indexes.RAW_SV_FORWARD_INDEX_FILE_EXTENSION);
int numDocsPerChunk =
deriveNumDocsPerChunk ? getNumDocsPerChunk(maxLength, targetMaxChunkSizeBytes) : targetDocsPerChunk;

// For columns with very small max value, target chunk size should also be capped to reduce memory during read
int dynamicTargetChunkSize =
ForwardIndexUtils.getDynamicTargetChunkSize(maxLength, targetDocsPerChunk, targetMaxChunkSizeBytes);
_indexWriter = writerVersion < VarByteChunkForwardIndexWriterV4.VERSION ? new VarByteChunkForwardIndexWriter(file,
compressionType, totalDocs, numDocsPerChunk, maxLength, writerVersion)
: new VarByteChunkForwardIndexWriterV4(file, compressionType, dynamicTargetChunkSize);
if (writerVersion < VarByteChunkForwardIndexWriterV4.VERSION) {
int numDocsPerChunk =
deriveNumDocsPerChunk ? getNumDocsPerChunk(maxLength, targetMaxChunkSizeBytes) : targetDocsPerChunk;
_indexWriter = new VarByteChunkForwardIndexWriter(file, compressionType, totalDocs, numDocsPerChunk, maxLength,
writerVersion);
} else {
int chunkSize =
ForwardIndexUtils.getDynamicTargetChunkSize(maxLength, targetDocsPerChunk, targetMaxChunkSizeBytes);
_indexWriter = new VarByteChunkForwardIndexWriterV4(file, compressionType, chunkSize);
}
_valueType = valueType;
}

Expand Down

0 comments on commit b4dfd04

Please sign in to comment.