bees: use helper function for readahead

There seem to be multiple ways to do readahead in Linux, and only some of them work. Hopefully reading the actual data is one of them. This is an attempt to avoid page-by-page reads in the generic dedupe code. We load both extents into the VFS cache (read sequentially) and hope they are still there by the time we call dedupe on them. We also call readahead(2) and hopefully that either helps or does nothing. Signed-off-by: Zygo Blaxell <[email protected]>
Zygo · Jun 12, 2021 · 20b8f8a · kakra · Jun 22, 2021 · Zygo
1 parent 0afd285
commit 20b8f8a
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 5 deletions.
diff --git a/src/bees-context.cc b/src/bees-context.cc
@@ -344,7 +344,7 @@ BeesContext::scan_one_extent(const BeesFileRange &bfr, const Extent &e)
 	}
 
 	// OK we need to read extent now
-	readahead(bfr.fd(), bfr.begin(), bfr.size());
+	bees_readahead(bfr.fd(), bfr.begin(), bfr.size());
 
 	map<off_t, pair<BeesHash, BeesAddress>> insert_map;
 	set<off_t> noinsert_set;

diff --git a/src/bees-types.cc b/src/bees-types.cc
@@ -385,8 +385,8 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
 	BEESTRACE("e_second " << e_second);
 
 	// Preread entire extent
-	readahead(second.fd(), e_second.begin(), e_second.size());
-	readahead(first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size());
+	bees_readahead(second.fd(), e_second.begin(), e_second.size());
+	bees_readahead(first.fd(), e_second.begin() + first.begin() - second.begin(), e_second.size());
 
 	auto hash_table = ctx->hash_table();
 
@@ -405,7 +405,7 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
 				BEESCOUNT(pairbackward_hole);
 				break;
 			}
-			readahead(second.fd(), e_second.begin(), e_second.size());
+			bees_readahead(second.fd(), e_second.begin(), e_second.size());
 #else
 			// This tends to repeatedly process extents that were recently processed.
 			// We tend to catch duplicate blocks early since we scan them forwards.
@@ -514,7 +514,7 @@ BeesRangePair::grow(shared_ptr<BeesContext> ctx, bool constrained)
 				BEESCOUNT(pairforward_hole);
 				break;
 			}
-			readahead(second.fd(), e_second.begin(), e_second.size());
+			bees_readahead(second.fd(), e_second.begin(), e_second.size());
 		}
 		BEESCOUNT(pairforward_try);
 

diff --git a/src/bees.cc b/src/bees.cc
@@ -371,6 +371,31 @@ bees_sync(int fd)
 	BEESCOUNTADD(sync_ms, sync_timer.age() * 1000);
 }
 
+void
+bees_readahead(int const fd, off_t offset, size_t size)
+{
+	Timer readahead_timer;
+	BEESNOTE("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
+	BEESTOOLONG("readahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
+	// This might not do anything?
+	DIE_IF_NON_ZERO(readahead(fd, offset, size));
+	// Make sure this data is in page cache
+	// Note spelling: readahead vs read ahead
+	BEESNOTE("read ahead " << name_fd(fd) << " offset " << to_hex(offset) << " len " << pretty(size));
+	while (size) {
+		static uint8_t dummy[BEES_READAHEAD_SIZE];
+		size_t this_read_size = min(size, sizeof(dummy));
+		// Ignore errors and short reads.
+		// It turns out our size parameter isn't all that accurate.
+		pread(fd, dummy, this_read_size, offset);
+		BEESCOUNT(readahead_count);
+		BEESCOUNTADD(readahead_bytes, this_read_size);
+		offset += this_read_size;
+		size -= this_read_size;
+	}
+	BEESCOUNTADD(readahead_ms, readahead_timer.age() * 1000);
+}
+
 BeesStringFile::BeesStringFile(Fd dir_fd, string name, size_t limit) :
 	m_dir_fd(dir_fd),
 	m_name(name),

diff --git a/src/bees.h b/src/bees.h
@@ -119,6 +119,9 @@ const bool BEES_SERIALIZE_RESOLVE = false;
 // Workaround for tree mod log bugs
 const bool BEES_SERIALIZE_BALANCE = false;
 
+// Workaround for silly dedupe / ineffective readahead behavior
+const size_t BEES_READAHEAD_SIZE = 1024 * 1024;
+
 // Flags
 const int FLAGS_OPEN_COMMON   = O_NOFOLLOW | O_NONBLOCK | O_CLOEXEC | O_NOATIME | O_LARGEFILE | O_NOCTTY;
 const int FLAGS_OPEN_DIR      = FLAGS_OPEN_COMMON | O_RDONLY | O_DIRECTORY;
@@ -880,6 +883,7 @@ extern const char *BEES_USAGE;
 extern const char *BEES_VERSION;
 string pretty(double d);
 void bees_sync(int fd);
+void bees_readahead(int fd, off_t offset, size_t size);
 string format_time(time_t t);
 
 #endif