Skip to content

Commit

Permalink
limit max bytes that can be read/written per pread/write syscall
Browse files Browse the repository at this point in the history
Summary:
BlockBasedTable sst file size can grow to a large size when universal
compaction is used. When index block exceeds 2G, pread seems to fail and
return truncated data and causes "trucated block" error. I tried to use
```
  #define _FILE_OFFSET_BITS 64
```
But the problem still persists. Splitting a big write/read into smaller
batches seems to solve the problem.

Test Plan:
successfully compacted a case with resulting sst file at ~90G (2.1G
index block size)

Reviewers: yhchiang, igor, sdong

Reviewed By: sdong

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D22569
  • Loading branch information
Lei Jin committed Aug 30, 2014
1 parent d20b8cf commit 7e9f28c
Showing 1 changed file with 35 additions and 13 deletions.
48 changes: 35 additions & 13 deletions util/env_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -239,11 +239,23 @@ class PosixRandomAccessFile: public RandomAccessFile {
char* scratch) const {
Status s;
ssize_t r = -1;
do {
r = pread(fd_, scratch, n, static_cast<off_t>(offset));
} while (r < 0 && errno == EINTR);
IOSTATS_ADD_IF_POSITIVE(bytes_read, r);
*result = Slice(scratch, (r < 0) ? 0 : r);
size_t left = n;
char* ptr = scratch;
while (left > 0) {
r = pread(fd_, ptr, left, static_cast<off_t>(offset));
if (r <= 0) {
if (errno == EINTR) {
continue;
}
break;
}
ptr += r;
offset += r;
left -= r;
}

IOSTATS_ADD_IF_POSITIVE(bytes_read, n - left);
*result = Slice(scratch, (r < 0) ? 0 : n - left);
if (r < 0) {
// An error: return a non-ok status
s = IOError(filename_, errno);
Expand Down Expand Up @@ -907,9 +919,23 @@ class PosixRandomRWFile : public RandomRWFile {
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
Status s;
ssize_t r = pread(fd_, scratch, n, static_cast<off_t>(offset));
IOSTATS_ADD_IF_POSITIVE(bytes_read, r);
*result = Slice(scratch, (r < 0) ? 0 : r);
ssize_t r = -1;
size_t left = n;
char* ptr = scratch;
while (left > 0) {
r = pread(fd_, ptr, left, static_cast<off_t>(offset));
if (r <= 0) {
if (errno == EINTR) {
continue;
}
break;
}
ptr += r;
offset += r;
left -= r;
}
IOSTATS_ADD_IF_POSITIVE(bytes_read, n - left);
*result = Slice(scratch, (r < 0) ? 0 : n - left);
if (r < 0) {
s = IOError(filename_, errno);
}
Expand Down Expand Up @@ -1018,15 +1044,12 @@ class PosixFileLock : public FileLock {
std::string filename;
};


namespace {
void PthreadCall(const char* label, int result) {
if (result != 0) {
fprintf(stderr, "pthread %s: %s\n", label, strerror(result));
exit(1);
}
}
}

class PosixEnv : public Env {
public:
Expand Down Expand Up @@ -1724,12 +1747,11 @@ unsigned int PosixEnv::GetThreadPoolQueueLen(Priority pri) const {
return thread_pools_[pri].GetQueueLen();
}

namespace {
struct StartThreadState {
void (*user_function)(void*);
void* arg;
};
}

static void* StartThreadWrapper(void* arg) {
StartThreadState* state = reinterpret_cast<StartThreadState*>(arg);
state->user_function(state->arg);
Expand Down

0 comments on commit 7e9f28c

Please sign in to comment.