Skip to content

Commit

Permalink
Avoid a few allocations while reading PBC files
Browse files Browse the repository at this point in the history
This reduces extra short-lived allocations while reading PBC files
by using faststring buffers instead of heap-allocated arrays.
For short reads (<32 bytes) these are stack-allocated. Since PBC files
use length prefixes for records, this avoids a heap allocation per record.

This, combined with the prior patch to reduce temporary vector allocations,
sped up startup on a real host with 11M blocks a few more percent:

Before:
I0907 17:27:13.600186 14550 fs_manager.cc:335] Time spent opening block manager: real 13.401s user 0.000s sys 0.002s

After:
I0907 17:43:46.364558 21480 fs_manager.cc:335] Time spent opening block manager: real 12.377s user 0.000s sys 0.001s

This also sped up the LBM startup time benchmark a little bit:

Before:
I0907 17:07:40.230087  9782 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.495s      user 0.032s     sys 0.001s
I0907 17:07:41.642105  9782 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.412s      user 0.044s     sys 0.001s
I0907 17:07:43.082852  9782 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.441s      user 0.040s     sys 0.001s
I0907 17:07:44.512603  9782 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.430s      user 0.043s     sys 0.000s
I0907 17:07:45.956848  9782 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.444s      user 0.041s     sys 0.000s
I0907 17:07:47.386735  9782 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.430s      user 0.037s     sys 0.002s
I0907 17:07:48.787317  9782 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.401s      user 0.041s     sys 0.002s
I0907 17:07:50.244407  9782 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.457s      user 0.038s     sys 0.001s
I0907 17:07:51.682209  9782 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.438s      user 0.037s     sys 0.001s
I0907 17:07:53.116209  9782 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.434s      user 0.037s     sys 0.000s

After:
I0907 17:40:07.984983 10204 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.429s      user 0.032s     sys 0.001s
I0907 17:40:09.359199 10204 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.374s      user 0.037s     sys 0.000s
I0907 17:40:10.767251 10204 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.408s      user 0.038s     sys 0.001s
I0907 17:40:12.160768 10204 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.394s      user 0.037s     sys 0.001s
I0907 17:40:13.539149 10204 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.378s      user 0.035s     sys 0.001s
I0907 17:40:14.932631 10204 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.393s      user 0.037s     sys 0.000s
I0907 17:40:16.316269 10204 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.384s      user 0.037s     sys 0.000s
I0907 17:40:17.714495 10204 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.398s      user 0.038s     sys 0.000s
I0907 17:40:19.077134 10204 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.363s      user 0.036s     sys 0.001s
I0907 17:40:20.487521 10204 log_block_manager-test.cc:799] Time spent reopening block manager: real 1.410s      user 0.037s     sys 0.001s

Change-Id: I228f26416b750c5a30ec6cc0763257c7d8b8d56f
Reviewed-on: http://gerrit.cloudera.org:8080/8009
Tested-by: Todd Lipcon <[email protected]>
Reviewed-by: Adar Dembo <[email protected]>
  • Loading branch information
toddlipcon committed Sep 19, 2017
1 parent 142464e commit adedbc7
Showing 1 changed file with 15 additions and 28 deletions.
43 changes: 15 additions & 28 deletions src/kudu/util/pb_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ bool IsSupportedContainerVersion(uint32_t version) {
return false;
}

// Reads exactly 'length' bytes from the container file into 'scratch',
// Reads exactly 'length' bytes from the container file into 'result',
// validating that there is sufficient data in the file to read this length
// before attempting to do so, and validating that it has read that length
// after performing the read.
Expand All @@ -189,13 +189,11 @@ bool IsSupportedContainerVersion(uint32_t version) {
// Status::Incomplete.
// If there is an unexpected short read, returns Status::Corruption.
//
// A Slice of the bytes read into 'buf' is returned in 'result'.
//
// NOTE: the data in 'buf' may be modified even in the case of a failed read.
// NOTE: the data in 'result' may be modified even in the case of a failed read.
template<typename ReadableFileType>
Status ValidateAndReadData(ReadableFileType* reader, uint64_t file_size,
uint64_t* offset, uint64_t length,
Slice* result, unique_ptr<uint8_t[]>* scratch) {
faststring* result) {
// Validate the read length using the file size.
if (*offset + length > file_size) {
return Status::Incomplete("File size not large enough to be valid",
Expand All @@ -207,17 +205,9 @@ Status ValidateAndReadData(ReadableFileType* reader, uint64_t file_size,
}

// Perform the read.
unique_ptr<uint8_t[]> local_scratch(new uint8_t[length]);
Slice s(local_scratch.get(), length);
RETURN_NOT_OK(reader->Read(*offset, s));
CHECK_EQ(length, s.size()) // Should never trigger due to contract with reader APIs.
<< Substitute("Unexpected short read: Proto container file $0: Tried to read $1 bytes "
"but only read $2 bytes",
reader->filename(), length, s.size());

result->resize(length);
RETURN_NOT_OK(reader->Read(*offset, Slice(*result)));
*offset += length;
*result = s;
scratch->swap(local_scratch);
return Status::OK();
}

Expand Down Expand Up @@ -264,17 +254,16 @@ Status ReadPBStartingAt(ReadableFileType* reader, int version, uint64_t* offset,
// Version 2+ includes a checksum for the length field.
uint64_t length_buflen = (version == 1) ? sizeof(uint32_t)
: sizeof(uint32_t) + kPBContainerChecksumLen;
Slice len_and_cksum_slice;
unique_ptr<uint8_t[]> length_scratch;
faststring length_and_cksum_buf;
RETURN_NOT_OK_PREPEND(ValidateAndReadData(reader, file_size, &tmp_offset, length_buflen,
&len_and_cksum_slice, &length_scratch),
&length_and_cksum_buf),
Substitute("Could not read data length from proto container file $0 "
"at offset $1", reader->filename(), *offset));
Slice length(len_and_cksum_slice.data(), sizeof(uint32_t));
Slice length(length_and_cksum_buf.data(), sizeof(uint32_t));

// Versions >= 2 have an individual checksum for the data length.
if (version >= 2) {
Slice length_checksum(len_and_cksum_slice.data() + sizeof(uint32_t), kPBContainerChecksumLen);
Slice length_checksum(length_and_cksum_buf.data() + sizeof(uint32_t), kPBContainerChecksumLen);
RETURN_NOT_OK_PREPEND(ParseAndCompareChecksum(length_checksum.data(), { length }),
CHECKSUM_ERR_MSG("Data length checksum does not match",
reader->filename(), tmp_offset - kPBContainerChecksumLen));
Expand All @@ -283,15 +272,14 @@ Status ReadPBStartingAt(ReadableFileType* reader, int version, uint64_t* offset,

// Read body and checksum into buffer for checksum & parsing.
uint64_t data_and_cksum_buflen = data_length + kPBContainerChecksumLen;
Slice body_and_cksum_slice;
unique_ptr<uint8_t[]> body_scratch;
faststring body_and_cksum_buf;
RETURN_NOT_OK_PREPEND(ValidateAndReadData(reader, file_size, &tmp_offset, data_and_cksum_buflen,
&body_and_cksum_slice, &body_scratch),
&body_and_cksum_buf),
Substitute("Could not read PB message data from proto container file $0 "
"at offset $1",
reader->filename(), tmp_offset));
Slice body(body_and_cksum_slice.data(), data_length);
Slice record_checksum(body_and_cksum_slice.data() + data_length, kPBContainerChecksumLen);
Slice body(body_and_cksum_buf.data(), data_length);
Slice record_checksum(body_and_cksum_buf.data() + data_length, kPBContainerChecksumLen);

// Version 1 has a single checksum for length, body.
// Version 2+ has individual checksums for length and body, respectively.
Expand Down Expand Up @@ -350,10 +338,9 @@ Status ParsePBFileHeader(ReadableFileType* reader, uint64_t* offset, int* versio
// additional 4 bytes required by a V2+ header (vs V1) is still less than the
// minimum number of bytes required for a V1 format data record.
uint64_t tmp_offset = *offset;
Slice header;
unique_ptr<uint8_t[]> scratch;
faststring header;
RETURN_NOT_OK_PREPEND(ValidateAndReadData(reader, file_size, &tmp_offset, kPBContainerV2HeaderLen,
&header, &scratch),
&header),
Substitute("Could not read header for proto container file $0",
reader->filename()));
Slice magic_and_version(header.data(), kPBContainerMagicLen + sizeof(uint32_t));
Expand Down

0 comments on commit adedbc7

Please sign in to comment.