From 106db9be28ce7a4d8f28f87ce7ea6fac3d6346af Mon Sep 17 00:00:00 2001 From: FoundationDB CI Date: Mon, 11 Jul 2022 09:01:59 +0000 Subject: [PATCH 01/56] update version after 7.1.15 release --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 67d8270651b..508bdb478a0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ else() endif() project(foundationdb - VERSION 7.1.15 + VERSION 7.1.16 DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions." HOMEPAGE_URL "http://www.foundationdb.org/" LANGUAGES C CXX ASM) From b5e7ff308e0d44a191e29f75267cf554b59598c0 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Mon, 11 Jul 2022 10:28:34 -0700 Subject: [PATCH 02/56] Add 7.1.14 and 7.1.15 release notes --- .../source/release-notes/release-notes-710.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/documentation/sphinx/source/release-notes/release-notes-710.rst b/documentation/sphinx/source/release-notes/release-notes-710.rst index 54729502907..091bc410543 100644 --- a/documentation/sphinx/source/release-notes/release-notes-710.rst +++ b/documentation/sphinx/source/release-notes/release-notes-710.rst @@ -4,6 +4,18 @@ Release Notes ############# +7.1.15 +====== +* Same as 7.1.14 release with AVX enabled. + +7.1.14 +====== +* Released with AVX disabled. +* Fixed a high commit latency bug when there are data movement. `(PR #7548) `_ +* Fixed the primary locality on the sequencer by obtaining it from cluster controller. `(PR #7535) `_ +* Added StorageEngine type to StorageMetrics trace events. `(PR #7546) `_ +* Improved hasIncompleteVersionstamp performance in Java binding to use iteration rather than stream processing. `(PR #7559) `_ + 7.1.13 ====== * Same as 7.1.12 release with AVX enabled. From e96343d106f3da8693bff68e0ba62773c019bd84 Mon Sep 17 00:00:00 2001 From: neethuhaneesha Date: Thu, 7 Jul 2022 14:44:11 -0700 Subject: [PATCH 03/56] Adding rocksdb compression and data size stats. --- fdbserver/KeyValueStoreRocksDB.actor.cpp | 48 +++++++++++++++++++----- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/fdbserver/KeyValueStoreRocksDB.actor.cpp b/fdbserver/KeyValueStoreRocksDB.actor.cpp index 73bd49d2cf6..b3cfa7d7f8f 100644 --- a/fdbserver/KeyValueStoreRocksDB.actor.cpp +++ b/fdbserver/KeyValueStoreRocksDB.actor.cpp @@ -740,7 +740,8 @@ ACTOR Future rocksDBMetricLogger(UID id, std::shared_ptr perfContextMetrics, rocksdb::DB* db, std::shared_ptr readIterPool, - Counters* counters) { + Counters* counters, + CF cf) { state std::vector> tickerStats = { { "StallMicros", rocksdb::STALL_MICROS, 0 }, { "BytesRead", rocksdb::BYTES_READ, 0 }, @@ -778,7 +779,7 @@ ACTOR Future rocksDBMetricLogger(UID id, { "CountIterSkippedKeys", rocksdb::NUMBER_ITER_SKIP, 0 }, }; - state std::vector> propertyStats = { + state std::vector> intPropertyStats = { { "NumImmutableMemtables", rocksdb::DB::Properties::kNumImmutableMemTable }, { "NumImmutableMemtablesFlushed", rocksdb::DB::Properties::kNumImmutableMemTableFlushed }, { "IsMemtableFlushPending", rocksdb::DB::Properties::kMemTableFlushPending }, @@ -806,6 +807,14 @@ ACTOR Future rocksDBMetricLogger(UID id, { "LiveSstFilesSize", rocksdb::DB::Properties::kLiveSstFilesSize }, }; + state std::vector> strPropertyStats = { + { "LevelStats", rocksdb::DB::Properties::kLevelStats }, + }; + + state std::vector> levelStrPropertyStats = { + { "CompressionRatioAtLevel", rocksdb::DB::Properties::kCompressionRatioAtLevelPrefix }, + }; + state std::unordered_map readIteratorPoolStats = { { "NumReadIteratorsCreated", 0 }, { "NumTimesReadIteratorsReused", 0 }, @@ -815,21 +824,40 @@ ACTOR Future rocksDBMetricLogger(UID id, wait(delay(SERVER_KNOBS->ROCKSDB_METRICS_DELAY)); TraceEvent e("RocksDBMetrics", id); uint64_t stat; - for (auto& t : tickerStats) { - auto& [name, ticker, cum] = t; + for (auto& [name, ticker, cum] : tickerStats) { stat = statistics->getTickerCount(ticker); e.detail(name, stat - cum); cum = stat; } - for (auto& p : propertyStats) { - auto& [name, property] = p; + for (const auto& [name, property] : intPropertyStats) { stat = 0; // GetAggregatedIntProperty gets the aggregated int property from all column families. ASSERT(db->GetAggregatedIntProperty(property, &stat)); e.detail(name, stat); } + std::string propValue; + for (const auto& [name, property] : strPropertyStats) { + propValue = ""; + ASSERT(db->GetProperty(cf, property, &propValue)); + e.detail(name, propValue); + } + + rocksdb::ColumnFamilyMetaData cf_meta_data; + db->GetColumnFamilyMetaData(cf, &cf_meta_data); + int numLevels = static_cast(cf_meta_data.levels.size()); + std::string levelProp; + for (const auto& [name, property] : levelStrPropertyStats) { + levelProp = ""; + for (int level = 0; level < numLevels; level++) { + propValue = ""; + ASSERT(db->GetProperty(cf, property + std::to_string(level), &propValue)); + levelProp += std::to_string(level) + ":" + propValue + (level == numLevels - 1 ? "" : ","); + } + e.detail(name, levelProp); + } + stat = readIterPool->numReadIteratorsCreated(); e.detail("NumReadIteratorsCreated", stat - readIteratorPoolStats["NumReadIteratorsCreated"]); readIteratorPoolStats["NumReadIteratorsCreated"] = stat; @@ -983,13 +1011,13 @@ struct RocksDBKeyValueStore : IKeyValueStore { // The current thread and main thread are same when the code runs in simulation. // blockUntilReady() is getting the thread into deadlock state, so directly calling // the metricsLogger. - a.metrics = - rocksDBMetricLogger(id, options.statistics, perfContextMetrics, db, readIterPool, &a.counters) && - flowLockLogger(id, a.readLock, a.fetchLock) && refreshReadIteratorPool(readIterPool); + a.metrics = rocksDBMetricLogger( + id, options.statistics, perfContextMetrics, db, readIterPool, &a.counters, cf) && + flowLockLogger(id, a.readLock, a.fetchLock) && refreshReadIteratorPool(readIterPool); } else { onMainThread([&] { a.metrics = rocksDBMetricLogger( - id, options.statistics, perfContextMetrics, db, readIterPool, &a.counters) && + id, options.statistics, perfContextMetrics, db, readIterPool, &a.counters, cf) && flowLockLogger(id, a.readLock, a.fetchLock) && refreshReadIteratorPool(readIterPool); return Future(true); }).blockUntilReady(); From e98b32fa778fbbbfb6587b7687d41550c2d149bc Mon Sep 17 00:00:00 2001 From: Renxuan Wang Date: Mon, 18 Jul 2022 13:47:08 -0700 Subject: [PATCH 04/56] Fix processDiskReadSeconds and processDiskWriteSeconds. (#7609) --- flow/Platform.actor.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/flow/Platform.actor.cpp b/flow/Platform.actor.cpp index c07ff01bca6..486025e5877 100644 --- a/flow/Platform.actor.cpp +++ b/flow/Platform.actor.cpp @@ -1734,15 +1734,10 @@ SystemStatistics getSystemStatistics(std::string const& dataFolder, 0, returnStats.elapsed - std::min(returnStats.elapsed, (nowIOMilliSecs - (*statState)->lastIOMilliSecs) / 1000.0)); - returnStats.processDiskReadSeconds = std::max( - 0, - returnStats.elapsed - std::min(returnStats.elapsed, - (nowReadMilliSecs - (*statState)->lastReadMilliSecs) / 1000.0)); + returnStats.processDiskReadSeconds = + std::min(returnStats.elapsed, (nowReadMilliSecs - (*statState)->lastReadMilliSecs) / 1000.0); returnStats.processDiskWriteSeconds = - std::max(0, - returnStats.elapsed - - std::min(returnStats.elapsed, - (nowWriteMilliSecs - (*statState)->lastWriteMilliSecs) / 1000.0)); + std::min(returnStats.elapsed, (nowWriteMilliSecs - (*statState)->lastWriteMilliSecs) / 1000.0); returnStats.processDiskRead = (nowReads - (*statState)->lastReads); returnStats.processDiskWrite = (nowWrites - (*statState)->lastWrites); returnStats.processDiskWriteSectors = (nowWriteSectors - (*statState)->lastWriteSectors); From 7f9ee8cbf2b22415878e0d8ca941f5ee11b03199 Mon Sep 17 00:00:00 2001 From: Dan Lambright Date: Tue, 19 Jul 2022 12:54:39 -0400 Subject: [PATCH 05/56] cherry pick 7613 - update tag client cache on getKeyLocation (#7616) --- fdbclient/NativeAPI.actor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index e11249f8c67..844dabae3d8 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -2834,6 +2834,7 @@ ACTOR Future getKeyLocation_internal(Database cx, auto locationInfo = cx->setCachedLocation(tenant, rep.tenantEntry, rep.results[0].first, rep.results[0].second); updateTssMappings(cx, rep); + updateTagMappings(cx, rep); return KeyRangeLocationInfo( rep.tenantEntry, From 9819054ea75f00d4bbb8214da86d1b80a4beb892 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 19 Jul 2022 22:27:29 -0700 Subject: [PATCH 06/56] Fix ScopeEventFieldTypeMismatch error for TLogMetrics Backport changes made to TLogServer in #4616 back to old TLog implementations. Old implementations were using SpecialCounter, which is string. The new type is int64_t. Without the change, we can have many events like: --- fdbserver/OldTLogServer_4_6.actor.cpp | 36 +++++++++++++-------------- fdbserver/OldTLogServer_6_0.actor.cpp | 36 +++++++++++++-------------- fdbserver/OldTLogServer_6_2.actor.cpp | 36 +++++++++++++-------------- 3 files changed, 51 insertions(+), 57 deletions(-) diff --git a/fdbserver/OldTLogServer_4_6.actor.cpp b/fdbserver/OldTLogServer_4_6.actor.cpp index 7fa6573a533..b523212f49c 100644 --- a/fdbserver/OldTLogServer_4_6.actor.cpp +++ b/fdbserver/OldTLogServer_4_6.actor.cpp @@ -458,24 +458,6 @@ struct LogData : NonCopyable, public ReferenceCounted { specialCounter(cc, "Version", [this]() { return this->version.get(); }); specialCounter(cc, "SharedBytesInput", [tLogData]() { return tLogData->bytesInput; }); specialCounter(cc, "SharedBytesDurable", [tLogData]() { return tLogData->bytesDurable; }); - specialCounter( - cc, "KvstoreBytesUsed", [tLogData]() { return tLogData->persistentData->getStorageBytes().used; }); - specialCounter( - cc, "KvstoreBytesFree", [tLogData]() { return tLogData->persistentData->getStorageBytes().free; }); - specialCounter(cc, "KvstoreBytesAvailable", [tLogData]() { - return tLogData->persistentData->getStorageBytes().available; - }); - specialCounter( - cc, "KvstoreBytesTotal", [tLogData]() { return tLogData->persistentData->getStorageBytes().total; }); - specialCounter( - cc, "QueueDiskBytesUsed", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().used; }); - specialCounter( - cc, "QueueDiskBytesFree", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().free; }); - specialCounter(cc, "QueueDiskBytesAvailable", [tLogData]() { - return tLogData->rawPersistentQueue->getStorageBytes().available; - }); - specialCounter( - cc, "QueueDiskBytesTotal", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().total; }); specialCounter(cc, "ActivePeekStreams", [tLogData]() { return tLogData->activePeekStreams; }); } @@ -1424,7 +1406,23 @@ ACTOR Future tLogCore(TLogData* self, Reference logData) { logData->logId, SERVER_KNOBS->STORAGE_LOGGING_DELAY, &logData->cc, - logData->logId.toString() + "/TLogMetrics")); + logData->logId.toString() + "/TLogMetrics", + [self = self](TraceEvent& te) { + StorageBytes sbTlog = self->persistentData->getStorageBytes(); + te.detail("KvstoreBytesUsed", sbTlog.used); + te.detail("KvstoreBytesFree", sbTlog.free); + te.detail("KvstoreBytesAvailable", sbTlog.available); + te.detail("KvstoreBytesTotal", sbTlog.total); + te.detail("KvstoreBytesTemp", sbTlog.temp); + + StorageBytes sbQueue = self->rawPersistentQueue->getStorageBytes(); + te.detail("QueueDiskBytesUsed", sbQueue.used); + te.detail("QueueDiskBytesFree", sbQueue.free); + te.detail("QueueDiskBytesAvailable", sbQueue.available); + te.detail("QueueDiskBytesTotal", sbQueue.total); + te.detail("QueueDiskBytesTemp", sbQueue.temp); + })); + logData->addActor.send(serveTLogInterface(self, logData->tli, logData, warningCollectorInput)); try { diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 2d016d4a6a5..61ed31fc057 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -554,24 +554,6 @@ struct LogData : NonCopyable, public ReferenceCounted { specialCounter(cc, "SharedBytesDurable", [tLogData]() { return tLogData->bytesDurable; }); specialCounter(cc, "SharedOverheadBytesInput", [tLogData]() { return tLogData->overheadBytesInput; }); specialCounter(cc, "SharedOverheadBytesDurable", [tLogData]() { return tLogData->overheadBytesDurable; }); - specialCounter( - cc, "KvstoreBytesUsed", [tLogData]() { return tLogData->persistentData->getStorageBytes().used; }); - specialCounter( - cc, "KvstoreBytesFree", [tLogData]() { return tLogData->persistentData->getStorageBytes().free; }); - specialCounter(cc, "KvstoreBytesAvailable", [tLogData]() { - return tLogData->persistentData->getStorageBytes().available; - }); - specialCounter( - cc, "KvstoreBytesTotal", [tLogData]() { return tLogData->persistentData->getStorageBytes().total; }); - specialCounter( - cc, "QueueDiskBytesUsed", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().used; }); - specialCounter( - cc, "QueueDiskBytesFree", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().free; }); - specialCounter(cc, "QueueDiskBytesAvailable", [tLogData]() { - return tLogData->rawPersistentQueue->getStorageBytes().available; - }); - specialCounter( - cc, "QueueDiskBytesTotal", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().total; }); specialCounter(cc, "ActivePeekStreams", [tLogData]() { return tLogData->activePeekStreams; }); } @@ -2241,7 +2223,23 @@ ACTOR Future tLogCore(TLogData* self, logData->logId, SERVER_KNOBS->STORAGE_LOGGING_DELAY, &logData->cc, - logData->logId.toString() + "/TLogMetrics")); + logData->logId.toString() + "/TLogMetrics", + [self = self](TraceEvent& te) { + StorageBytes sbTlog = self->persistentData->getStorageBytes(); + te.detail("KvstoreBytesUsed", sbTlog.used); + te.detail("KvstoreBytesFree", sbTlog.free); + te.detail("KvstoreBytesAvailable", sbTlog.available); + te.detail("KvstoreBytesTotal", sbTlog.total); + te.detail("KvstoreBytesTemp", sbTlog.temp); + + StorageBytes sbQueue = self->rawPersistentQueue->getStorageBytes(); + te.detail("QueueDiskBytesUsed", sbQueue.used); + te.detail("QueueDiskBytesFree", sbQueue.free); + te.detail("QueueDiskBytesAvailable", sbQueue.available); + te.detail("QueueDiskBytesTotal", sbQueue.total); + te.detail("QueueDiskBytesTemp", sbQueue.temp); + })); + logData->addActor.send(serveTLogInterface(self, tli, logData, warningCollectorInput)); logData->addActor.send(cleanupPeekTrackers(logData.getPtr())); logData->addActor.send(logPeekTrackers(logData.getPtr())); diff --git a/fdbserver/OldTLogServer_6_2.actor.cpp b/fdbserver/OldTLogServer_6_2.actor.cpp index ffb449aff58..55498be35b5 100644 --- a/fdbserver/OldTLogServer_6_2.actor.cpp +++ b/fdbserver/OldTLogServer_6_2.actor.cpp @@ -641,24 +641,6 @@ struct LogData : NonCopyable, public ReferenceCounted { specialCounter(cc, "SharedBytesDurable", [tLogData]() { return tLogData->bytesDurable; }); specialCounter(cc, "SharedOverheadBytesInput", [tLogData]() { return tLogData->overheadBytesInput; }); specialCounter(cc, "SharedOverheadBytesDurable", [tLogData]() { return tLogData->overheadBytesDurable; }); - specialCounter( - cc, "KvstoreBytesUsed", [tLogData]() { return tLogData->persistentData->getStorageBytes().used; }); - specialCounter( - cc, "KvstoreBytesFree", [tLogData]() { return tLogData->persistentData->getStorageBytes().free; }); - specialCounter(cc, "KvstoreBytesAvailable", [tLogData]() { - return tLogData->persistentData->getStorageBytes().available; - }); - specialCounter( - cc, "KvstoreBytesTotal", [tLogData]() { return tLogData->persistentData->getStorageBytes().total; }); - specialCounter( - cc, "QueueDiskBytesUsed", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().used; }); - specialCounter( - cc, "QueueDiskBytesFree", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().free; }); - specialCounter(cc, "QueueDiskBytesAvailable", [tLogData]() { - return tLogData->rawPersistentQueue->getStorageBytes().available; - }); - specialCounter( - cc, "QueueDiskBytesTotal", [tLogData]() { return tLogData->rawPersistentQueue->getStorageBytes().total; }); specialCounter(cc, "PeekMemoryReserved", [tLogData]() { return tLogData->peekMemoryLimiter.activePermits(); }); specialCounter(cc, "PeekMemoryRequestsStalled", [tLogData]() { return tLogData->peekMemoryLimiter.waiters(); }); specialCounter(cc, "ActivePeekStreams", [tLogData]() { return tLogData->activePeekStreams; }); @@ -2701,7 +2683,23 @@ ACTOR Future tLogCore(TLogData* self, logData->logId, SERVER_KNOBS->STORAGE_LOGGING_DELAY, &logData->cc, - logData->logId.toString() + "/TLogMetrics")); + logData->logId.toString() + "/TLogMetrics", + [self = self](TraceEvent& te) { + StorageBytes sbTlog = self->persistentData->getStorageBytes(); + te.detail("KvstoreBytesUsed", sbTlog.used); + te.detail("KvstoreBytesFree", sbTlog.free); + te.detail("KvstoreBytesAvailable", sbTlog.available); + te.detail("KvstoreBytesTotal", sbTlog.total); + te.detail("KvstoreBytesTemp", sbTlog.temp); + + StorageBytes sbQueue = self->rawPersistentQueue->getStorageBytes(); + te.detail("QueueDiskBytesUsed", sbQueue.used); + te.detail("QueueDiskBytesFree", sbQueue.free); + te.detail("QueueDiskBytesAvailable", sbQueue.available); + te.detail("QueueDiskBytesTotal", sbQueue.total); + te.detail("QueueDiskBytesTemp", sbQueue.temp); + })); + logData->addActor.send(serveTLogInterface(self, tli, logData, warningCollectorInput)); logData->addActor.send(cleanupPeekTrackers(logData.getPtr())); logData->addActor.send(logPeekTrackers(logData.getPtr())); From ef06bc475dca7f87e8821881dd95b5da7940f0ff Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 19 Jul 2022 16:30:44 -0700 Subject: [PATCH 07/56] Add getMappedRange latency metrics --- fdbserver/storageserver.actor.cpp | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 991410f6ad7..3cd60ef61db 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -940,6 +940,9 @@ struct StorageServer { LatencySample readLatencySample; LatencyBands readLatencyBands; + LatencySample mappedRangeSample; // Samples getMappedRange latency + LatencySample mappedRangeRemoteSample; // Samples getMappedRange remote subquery latency + LatencySample mappedRangeLocalSample; // Samples getMappedRange local subquery latency Counters(StorageServer* self) : cc("StorageServer", self->thisServerID.toString()), allQueries("QueryQueue", cc), @@ -971,7 +974,19 @@ struct StorageServer { self->thisServerID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, SERVER_KNOBS->LATENCY_SAMPLE_SIZE), - readLatencyBands("ReadLatencyBands", self->thisServerID, SERVER_KNOBS->STORAGE_LOGGING_DELAY) { + readLatencyBands("ReadLatencyBands", self->thisServerID, SERVER_KNOBS->STORAGE_LOGGING_DELAY), + mappedRangeSample("GetMappedRangeMetrics", + self->thisServerID, + SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, + SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + mappedRangeRemoteSample("GetMappedRangeRemoteMetrics", + self->thisServerID, + SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, + SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + mappedRangeLocalSample("GetMappedRangeLocalMetrics", + self->thisServerID, + SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, + SERVER_KNOBS->LATENCY_SAMPLE_SIZE) { specialCounter(cc, "LastTLogVersion", [self]() { return self->lastTLogVersion; }); specialCounter(cc, "Version", [self]() { return self->version.get(); }); specialCounter(cc, "StorageVersion", [self]() { return self->storageVersion(); }); @@ -2808,6 +2823,7 @@ ACTOR Future quickGetValue(StorageServer* data, // To provide span context, tags, debug ID to underlying lookups. GetMappedKeyValuesRequest* pOriginalReq) { state GetValueReqAndResultRef getValue; + state double getValueStart = g_network->timer(); getValue.key = key; if (data->shards[key]->isReadable()) { @@ -2828,6 +2844,8 @@ ACTOR Future quickGetValue(StorageServer* data, if (!reply.error.present()) { ++data->counters.quickGetValueHit; copyOptionalValue(a, getValue, reply.value); + const double duration = g_network->timer() - getValueStart; + data->counters.mappedRangeLocalSample.addMeasurement(duration); return getValue; } // Otherwise fallback. @@ -2847,6 +2865,8 @@ ACTOR Future quickGetValue(StorageServer* data, // TODO: async in case it needs to read from other servers. Optional valueOption = wait(valueFuture); copyOptionalValue(a, getValue, valueOption); + const double duration = g_network->timer() - getValueStart; + data->counters.mappedRangeRemoteSample.addMeasurement(duration); return getValue; } else { throw quick_get_value_miss(); @@ -3412,6 +3432,7 @@ ACTOR Future quickGetKeyValues( // To provide span context, tags, debug ID to underlying lookups. GetMappedKeyValuesRequest* pOriginalReq) { state GetRangeReqAndResultRef getRange; + state double getValuesStart = g_network->timer(); getRange.begin = firstGreaterOrEqual(KeyRef(*a, prefix)); getRange.end = firstGreaterOrEqual(strinc(prefix, *a)); try { @@ -3442,6 +3463,8 @@ ACTOR Future quickGetKeyValues( // Convert GetKeyValuesReply to RangeResult. a->dependsOn(reply.arena); getRange.result = RangeResultRef(reply.data, reply.more); + const double duration = g_network->timer() - getValuesStart; + data->counters.mappedRangeLocalSample.addMeasurement(duration); return getRange; } // Otherwise fallback. @@ -3461,6 +3484,8 @@ ACTOR Future quickGetKeyValues( RangeResult rangeResult = wait(rangeResultFuture); a->dependsOn(rangeResult.arena()); getRange.result = rangeResult; + const double duration = g_network->timer() - getValuesStart; + data->counters.mappedRangeRemoteSample.addMeasurement(duration); return getRange; } else { throw quick_get_key_values_miss(); @@ -3995,6 +4020,7 @@ ACTOR Future getMappedKeyValuesQ(StorageServer* data, GetMappedKeyValuesRe double duration = g_network->timer() - req.requestTime(); data->counters.readLatencySample.addMeasurement(duration); + data->counters.mappedRangeSample.addMeasurement(duration); if (data->latencyBandConfig.present()) { int maxReadBytes = data->latencyBandConfig.get().readConfig.maxReadBytes.orDefault(std::numeric_limits::max()); From c4fbc0b508611297034e3c86f5861f99a401c0df Mon Sep 17 00:00:00 2001 From: Zhe Wu Date: Wed, 20 Jul 2022 11:54:32 -0700 Subject: [PATCH 08/56] Increase AllTags field length in TLogReady --- fdbserver/OldTLogServer_6_0.actor.cpp | 6 ++++-- fdbserver/OldTLogServer_6_2.actor.cpp | 6 ++++-- fdbserver/TLogServer.actor.cpp | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index 61ed31fc057..1502dbaa2b0 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -2768,8 +2768,10 @@ ACTOR Future tLogStart(TLogData* self, InitializeTLogRequest req, Locality req.reply.send(recruited); TraceEvent("TLogReady", logData->logId) - .detail("AllTags", describe(req.allTags)) - .detail("Locality", logData->locality); + .detail("Locality", logData->locality) + .setMaxEventLength(11000) + .setMaxFieldLength(10000) + .detail("AllTags", describe(req.allTags)); updater = Void(); wait(tLogCore(self, logData, recruited, pulledRecoveryVersions)); diff --git a/fdbserver/OldTLogServer_6_2.actor.cpp b/fdbserver/OldTLogServer_6_2.actor.cpp index 55498be35b5..ce9b4e5c407 100644 --- a/fdbserver/OldTLogServer_6_2.actor.cpp +++ b/fdbserver/OldTLogServer_6_2.actor.cpp @@ -3259,8 +3259,10 @@ ACTOR Future tLogStart(TLogData* self, InitializeTLogRequest req, Locality req.reply.send(recruited); TraceEvent("TLogReady", logData->logId) - .detail("AllTags", describe(req.allTags)) - .detail("Locality", logData->locality); + .detail("Locality", logData->locality) + .setMaxEventLength(11000) + .setMaxFieldLength(10000) + .detail("AllTags", describe(req.allTags)); updater = Void(); wait(tLogCore(self, logData, recruited, pulledRecoveryVersions)); diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 755bacf9aeb..546c72be222 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -3532,8 +3532,10 @@ ACTOR Future tLogStart(TLogData* self, InitializeTLogRequest req, Locality req.reply.send(recruited); TraceEvent("TLogReady", logData->logId) - .detail("AllTags", describe(req.allTags)) - .detail("Locality", logData->locality); + .detail("Locality", logData->locality) + .setMaxEventLength(11000) + .setMaxFieldLength(10000) + .detail("AllTags", describe(req.allTags)); updater = Void(); wait(tLogCore(self, logData, recruited, pulledRecoveryVersions)); From 5891f8be05dec560523d09aa4711b06181041d5e Mon Sep 17 00:00:00 2001 From: Zhe Wu Date: Wed, 20 Jul 2022 15:05:10 -0700 Subject: [PATCH 09/56] Log tlog initialization --- fdbserver/TagPartitionedLogSystem.actor.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fdbserver/TagPartitionedLogSystem.actor.cpp b/fdbserver/TagPartitionedLogSystem.actor.cpp index 30b6ddf4cbf..755ff7a1147 100644 --- a/fdbserver/TagPartitionedLogSystem.actor.cpp +++ b/fdbserver/TagPartitionedLogSystem.actor.cpp @@ -2614,6 +2614,8 @@ ACTOR Future TagPartitionedLogSystem::newRemoteEpoch(TagPartitionedLogSyst req.tLogLocalities = localities; req.tLogPolicy = logSet->tLogPolicy; req.locality = remoteLocality; + TraceEvent("RemoteTLogRouterReplies", self->dbgid) + .detail("WorkerID", remoteWorkers.logRouters[i % remoteWorkers.logRouters.size()].id()); logRouterInitializationReplies.push_back(transformErrors( throwErrorOr( remoteWorkers.logRouters[i % remoteWorkers.logRouters.size()].logRouter.getReplyUnlessFailedFor( @@ -2693,11 +2695,13 @@ ACTOR Future TagPartitionedLogSystem::newRemoteEpoch(TagPartitionedLogSyst } remoteTLogInitializationReplies.reserve(remoteWorkers.remoteTLogs.size()); - for (int i = 0; i < remoteWorkers.remoteTLogs.size(); i++) + for (int i = 0; i < remoteWorkers.remoteTLogs.size(); i++) { + TraceEvent("RemoteTLogReplies", self->dbgid).detail("WorkerID", remoteWorkers.remoteTLogs[i].id()); remoteTLogInitializationReplies.push_back(transformErrors( throwErrorOr(remoteWorkers.remoteTLogs[i].tLog.getReplyUnlessFailedFor( remoteTLogReqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)), cluster_recovery_failed())); + } TraceEvent("RemoteLogRecruitment_InitializingRemoteLogs") .detail("StartVersion", logSet->startVersion) @@ -2966,11 +2970,13 @@ ACTOR Future> TagPartitionedLogSystem::newEpoch( } initializationReplies.reserve(recr.tLogs.size()); - for (int i = 0; i < recr.tLogs.size(); i++) + for (int i = 0; i < recr.tLogs.size(); i++) { + TraceEvent("PrimaryTLogReplies", logSystem->getDebugID()).detail("WorkerID", recr.tLogs[i].id()); initializationReplies.push_back(transformErrors( throwErrorOr(recr.tLogs[i].tLog.getReplyUnlessFailedFor( reqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)), cluster_recovery_failed())); + } state std::vector> recoveryComplete; @@ -3034,11 +3040,14 @@ ACTOR Future> TagPartitionedLogSystem::newEpoch( } satelliteInitializationReplies.reserve(recr.satelliteTLogs.size()); - for (int i = 0; i < recr.satelliteTLogs.size(); i++) + for (int i = 0; i < recr.satelliteTLogs.size(); i++) { + TraceEvent("PrimarySatelliteTLogReplies", logSystem->getDebugID()) + .detail("WorkerID", recr.satelliteTLogs[i].id()); satelliteInitializationReplies.push_back(transformErrors( throwErrorOr(recr.satelliteTLogs[i].tLog.getReplyUnlessFailedFor( sreqs[i], SERVER_KNOBS->TLOG_TIMEOUT, SERVER_KNOBS->MASTER_FAILURE_SLOPE_DURING_RECOVERY)), cluster_recovery_failed())); + } wait(waitForAll(satelliteInitializationReplies) || oldRouterRecruitment); From ea95420823fad9f41474fb7600be082a0d5c21f2 Mon Sep 17 00:00:00 2001 From: Dan Lambright Date: Fri, 22 Jul 2022 11:00:36 -0400 Subject: [PATCH 10/56] cherry pick 7653 - Track when version not found in version vector. --- fdbclient/DatabaseContext.h | 1 + fdbclient/NativeAPI.actor.cpp | 43 +++++++++++++++++++++++------------ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index 6eed163162f..470e3488386 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -528,6 +528,7 @@ class DatabaseContext : public ReferenceCounted, public FastAll Counter transactionsExpensiveClearCostEstCount; Counter transactionGrvFullBatches; Counter transactionGrvTimedOutBatches; + Counter transactionCommitVersionNotFoundForSS; ContinuousSample latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit, bytesPerCommit, bgLatencies, bgGranulesPerRequest; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 844dabae3d8..3fe9f4456f5 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -240,7 +240,7 @@ void DatabaseContext::getLatestCommitVersions(const Reference& loc return; } - if (ssVersionVectorCache.getMaxVersion() != invalidVersion && readVersion > ssVersionVectorCache.getMaxVersion()) { + if (readVersion > ssVersionVectorCache.getMaxVersion()) { if (!CLIENT_KNOBS->FORCE_GRV_CACHE_OFF && !info->options.skipGrvCache && info->options.useGrvCache) { return; } else { @@ -253,16 +253,30 @@ void DatabaseContext::getLatestCommitVersions(const Reference& loc std::map> versionMap; // order the versions to be returned for (int i = 0; i < locationInfo->locations()->size(); i++) { - UID uid = locationInfo->locations()->getId(i); - if (ssidTagMapping.find(uid) != ssidTagMapping.end()) { - Tag tag = ssidTagMapping[uid]; + bool updatedVersionMap = false; + Version commitVersion = invalidVersion; + Tag tag = invalidTag; + auto iter = ssidTagMapping.find(locationInfo->locations()->getId(i)); + if (iter != ssidTagMapping.end()) { + tag = iter->second; if (ssVersionVectorCache.hasVersion(tag)) { - Version commitVersion = ssVersionVectorCache.getVersion(tag); // latest commit version + commitVersion = ssVersionVectorCache.getVersion(tag); // latest commit version if (commitVersion < readVersion) { + updatedVersionMap = true; versionMap[commitVersion].insert(tag); } } } + if (!updatedVersionMap) { + // This can have a performance impact if vv is enabled, so provide some diags + TraceEvent(SevDebug, "CommitVersionNotFoundForSS") + .detail("InSSIDMap", iter != ssidTagMapping.end() ? 1 : 0) + .detail("Tag", tag) + .detail("CommitVersion", commitVersion) + .detail("ReadVersion", readVersion) + .detail("VersionVector", ssVersionVectorCache.toString()); + ++transactionCommitVersionNotFoundForSS; + } } // insert the commit versions in the version vector. @@ -1445,13 +1459,13 @@ DatabaseContext::DatabaseContext(ReferenceSHARD_STAT_SMOOTH_AMOUNT), + transactionCommitVersionNotFoundForSS("CommitVersionNotFoundForSS", cc), latencies(1000), readLatencies(1000), + commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), bgLatencies(1000), + bgGranulesPerRequest(1000), outstandingWatches(0), sharedStatePtr(nullptr), lastGrvTime(0.0), cachedReadVersion(0), + lastRkBatchThrottleTime(0.0), lastRkDefaultThrottleTime(0.0), lastProxyRequestTime(0.0), + transactionTracingSample(false), taskID(taskID), clientInfo(clientInfo), clientInfoMonitor(clientInfoMonitor), + coordinator(coordinator), apiVersion(apiVersion), mvCacheInsertLocation(0), healthMetricsLastUpdated(0), + detailedHealthMetricsLastUpdated(0), smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT), specialKeySpace(std::make_unique(specialKeys.begin, specialKeys.end, /* test */ false)), connectToDatabaseEventCacheHolder(format("ConnectToDatabase/%s", dbId.toString().c_str())) { dbId = deterministicRandom()->randomUniqueID(); @@ -1719,8 +1733,9 @@ DatabaseContext::DatabaseContext(const Error& err) transactionsProcessBehind("ProcessBehind", cc), transactionsThrottled("Throttled", cc), transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc), transactionGrvFullBatches("NumGrvFullBatches", cc), transactionGrvTimedOutBatches("NumGrvTimedOutBatches", cc), - latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), - bytesPerCommit(1000), bgLatencies(1000), bgGranulesPerRequest(1000), transactionTracingSample(false), + transactionCommitVersionNotFoundForSS("CommitVersionNotFoundForSS", cc), latencies(1000), readLatencies(1000), + commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), bgLatencies(1000), + bgGranulesPerRequest(1000), transactionTracingSample(false), smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT), connectToDatabaseEventCacheHolder(format("ConnectToDatabase/%s", dbId.toString().c_str())) {} From f7a6c1d64041a684681d9df8ea46bc0eeb1bd0aa Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Thu, 21 Jul 2022 19:38:59 -0700 Subject: [PATCH 11/56] Add TraceEvent to the construction/destruction of DatabaseContext This is to figure out why huge databaseLogger related TraceEvents generated after 7.1 released. --- fdbclient/NativeAPI.actor.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 844dabae3d8..3a2cab5db65 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1454,6 +1454,9 @@ DatabaseContext::DatabaseContext(ReferenceSHARD_STAT_SMOOTH_AMOUNT), specialKeySpace(std::make_unique(specialKeys.begin, specialKeys.end, /* test */ false)), connectToDatabaseEventCacheHolder(format("ConnectToDatabase/%s", dbId.toString().c_str())) { + + TraceEvent("DatabaseContextCreated").backtrace(); + dbId = deterministicRandom()->randomUniqueID(); connected = (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size()) ? Void() @@ -1762,6 +1765,8 @@ DatabaseContext::~DatabaseContext() { it->second->notifyContextDestroyed(); ASSERT_ABORT(server_interf.empty()); locationCache.insert(allKeys, Reference()); + + TraceEvent("DatabaseContextDestructed").backtrace(); } Optional DatabaseContext::getCachedLocation(const Optional& tenantName, From dd1a145a1ca34e373900ac1ac54e164a3d7bf83a Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Thu, 21 Jul 2022 23:11:33 -0700 Subject: [PATCH 12/56] fixup! Update code per comments --- fdbclient/NativeAPI.actor.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 3a2cab5db65..794e56d151f 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1455,9 +1455,10 @@ DatabaseContext::DatabaseContext(Reference(specialKeys.begin, specialKeys.end, /* test */ false)), connectToDatabaseEventCacheHolder(format("ConnectToDatabase/%s", dbId.toString().c_str())) { - TraceEvent("DatabaseContextCreated").backtrace(); - dbId = deterministicRandom()->randomUniqueID(); + + TraceEvent("DatabaseContextCreated", dbId).backtrace(); + connected = (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size()) ? Void() : clientInfo->onChange(); @@ -1766,7 +1767,7 @@ DatabaseContext::~DatabaseContext() { ASSERT_ABORT(server_interf.empty()); locationCache.insert(allKeys, Reference()); - TraceEvent("DatabaseContextDestructed").backtrace(); + TraceEvent("DatabaseContextDestructed", dbId).backtrace(); } Optional DatabaseContext::getCachedLocation(const Optional& tenantName, From d04976d1aa0332ebe1c4b7e32c9e73b1e7645367 Mon Sep 17 00:00:00 2001 From: Zhe Wu Date: Thu, 21 Jul 2022 15:00:00 -0700 Subject: [PATCH 13/56] Add CommitDebug event after Commit Proxy reporting committed version to master --- fdbserver/CommitProxyServer.actor.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fdbserver/CommitProxyServer.actor.cpp b/fdbserver/CommitProxyServer.actor.cpp index 86ed60d3223..1918a575a06 100644 --- a/fdbserver/CommitProxyServer.actor.cpp +++ b/fdbserver/CommitProxyServer.actor.cpp @@ -1465,7 +1465,7 @@ ACTOR Future reply(CommitBatchContext* self) { state ProxyCommitData* const pProxyCommitData = self->pProxyCommitData; state Span span("MP:reply"_loc, self->span.context); - const Optional& debugID = self->debugID; + state const Optional& debugID = self->debugID; if (self->prevVersion && self->commitVersion - self->prevVersion < SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT / 2) { //TraceEvent("CPAdvanceMinVersion", self->pProxyCommitData->dbgid).detail("PrvVersion", self->prevVersion).detail("CommitVersion", self->commitVersion).detail("Master", self->pProxyCommitData->master.id().toString()).detail("TxSize", self->trs.size()); @@ -1506,6 +1506,12 @@ ACTOR Future reply(CommitBatchContext* self) { writtenTags), TaskPriority::ProxyMasterVersionReply)); } + + if (debugID.present()) { + g_traceBatch.addEvent( + "CommitDebug", debugID.get().first(), "CommitProxyServer.commitBatch.AfterReportRawCommittedVersion"); + } + if (self->commitVersion > pProxyCommitData->committedVersion.get()) { pProxyCommitData->locked = self->lockedAfter; pProxyCommitData->metadataVersion = self->metadataVersionAfter; From eee02bfd66d3bdcd07201f66b17624cfe02b1aa3 Mon Sep 17 00:00:00 2001 From: Dan Lambright Date: Mon, 25 Jul 2022 11:35:18 -0400 Subject: [PATCH 14/56] Respond to review comments. --- fdbclient/NativeAPI.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 3fe9f4456f5..7ef8f9a313b 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -267,7 +267,7 @@ void DatabaseContext::getLatestCommitVersions(const Reference& loc } } } - if (!updatedVersionMap) { + if (!updatedVersionMap && commitVersion != readVersion) { // This can have a performance impact if vv is enabled, so provide some diags TraceEvent(SevDebug, "CommitVersionNotFoundForSS") .detail("InSSIDMap", iter != ssidTagMapping.end() ? 1 : 0) From bd94146331ea823c470c51a7a37f29df5d071011 Mon Sep 17 00:00:00 2001 From: Zhe Wu Date: Sun, 24 Jul 2022 16:51:18 -0700 Subject: [PATCH 15/56] Error in FetchKey GRV shouldn't fail storageserver --- fdbserver/storageserver.actor.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 3cd60ef61db..7cfb72262cc 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -5728,11 +5728,18 @@ ACTOR Future fetchKeys(StorageServer* data, AddingShard* shard) { // At iteration 1, dest SS selects GRV as fetchVersion and (suppose) can read the data from src SS. // Then dest SS waits its version catch up with this GRV version and write the data to disk. // Note that dest SS waits outside the fetchKeysParallelismLock. + fetchVersion = std::max(shard->fetchVersion, data->version.get()); if (lastError.code() == error_code_transaction_too_old) { - Version grvVersion = wait(tr.getRawReadVersion()); - fetchVersion = std::max(grvVersion, data->version.get()); - } else { - fetchVersion = std::max(shard->fetchVersion, data->version.get()); + try { + Version grvVersion = wait(tr.getRawReadVersion()); + fetchVersion = std::max(grvVersion, fetchVersion); + } catch (Error& e) { + // Note that error in getting GRV doesn't affect any storage server state. Therefore, we catch all + // errors here without failing the storage server. When error happens, fetchVersion fall back to + // the above computed fetchVersion. + TraceEvent(SevWarn, "FetchKeyGRVError", data->thisServerID).error(e); + lastError = e; + } } ASSERT(fetchVersion >= shard->fetchVersion); // at this point, shard->fetchVersion is the last fetchVersion shard->fetchVersion = fetchVersion; From 3da627b4ce6009448dbd2d999ca226d753bd343e Mon Sep 17 00:00:00 2001 From: Zhe Wu Date: Sun, 24 Jul 2022 17:20:49 -0700 Subject: [PATCH 16/56] Add testing mechanism --- fdbserver/storageserver.actor.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 7cfb72262cc..7f87fd0b4d4 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -5729,11 +5729,23 @@ ACTOR Future fetchKeys(StorageServer* data, AddingShard* shard) { // Then dest SS waits its version catch up with this GRV version and write the data to disk. // Note that dest SS waits outside the fetchKeysParallelismLock. fetchVersion = std::max(shard->fetchVersion, data->version.get()); + if (g_network->isSimulated() && BUGGIFY_WITH_PROB(0.01)) { + // Test using GRV version for fetchKey. + lastError = transaction_too_old(); + } if (lastError.code() == error_code_transaction_too_old) { try { Version grvVersion = wait(tr.getRawReadVersion()); + if (g_network->isSimulated() && BUGGIFY_WITH_PROB(0.01)) { + // Test failed GRV request. + throw proxy_memory_limit_exceeded(); + } fetchVersion = std::max(grvVersion, fetchVersion); } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) { + throw e; + } + // Note that error in getting GRV doesn't affect any storage server state. Therefore, we catch all // errors here without failing the storage server. When error happens, fetchVersion fall back to // the above computed fetchVersion. From 7fb2146c0c69d51759d07db5d480116eefde885b Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Sun, 24 Jul 2022 13:38:06 -0700 Subject: [PATCH 17/56] Add timeout to places during Status generation Otherwise, the status generation becomes a blocking call, which is undesirable, because many operational tools require a timely response, even if it is in- complete. --- fdbserver/Status.actor.cpp | 43 +++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 123ea1e48a8..d4b52bb47eb 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -832,7 +832,8 @@ ACTOR static Future processStatusFetcher( } } - std::vector addressVec = wait(coordinators.ccr->getConnectionString().tryResolveHostnames()); + std::vector addressVec = + wait(timeoutError(coordinators.ccr->getConnectionString().tryResolveHostnames(), 5.0)); for (const auto& coordinator : addressVec) { roles.addCoordinatorRole(coordinator); } @@ -1928,7 +1929,7 @@ ACTOR static Future>> ge std::vector{ "StorageMetrics", "ReadLatencyMetrics", "ReadLatencyBands", "BusiestReadTag" })) && store(busiestWriteTags, getServerBusiestWriteTags(servers, address_workers, rkWorker)) && - store(metadata, getServerMetadata(servers, cx, true))); + store(metadata, timeoutError(getServerMetadata(servers, cx, true), 5.0))); ASSERT(busiestWriteTags.size() == results.size() && metadata.size() == results.size()); for (int i = 0; i < results.size(); ++i) { @@ -2757,18 +2758,29 @@ ACTOR Future storageWigglerStatsFetcher(DatabaseConfiguration state Reference tr(new ReadYourWritesTransaction(cx)); state Optional primaryV; state Optional remoteV; - loop { - try { - if (use_system_priority) { - tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); + state Future readTimeout = delay(5); // avoid looping forever + try { + loop { + try { + if (readTimeout.isReady()) { + throw timed_out(); + } + + if (use_system_priority) { + tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); + } + int64_t timeout_ms = 5000; + tr->setOption(FDBTransactionOptions::TIMEOUT, StringRef((uint8_t*)&timeout_ms, sizeof(int64_t))); + wait(store(primaryV, StorageWiggleMetrics::runGetTransaction(tr, true)) && + store(remoteV, StorageWiggleMetrics::runGetTransaction(tr, false))); + wait(tr->commit()); + break; + } catch (Error& e) { + wait(tr->onError(e)); } - wait(store(primaryV, StorageWiggleMetrics::runGetTransaction(tr, true)) && - store(remoteV, StorageWiggleMetrics::runGetTransaction(tr, false))); - wait(tr->commit()); - break; - } catch (Error& e) { - wait(tr->onError(e)); } + } catch (Error& e) { + TraceEvent(SevWarn, "StorageWigglerStatsError").error(e); } JsonBuilderObject res; @@ -3009,9 +3021,10 @@ ACTOR Future clusterGetStatus( if (configuration.get().perpetualStorageWiggleSpeed > 0) { state Future>> primaryWiggleValues; state Future>> remoteWiggleValues; + double timeout = g_network->isSimulated() && BUGGIFY_WITH_PROB(0.01) ? 0.0 : 2.0; - primaryWiggleValues = readStorageWiggleValues(cx, true, true); - remoteWiggleValues = readStorageWiggleValues(cx, false, true); + primaryWiggleValues = timeoutError(readStorageWiggleValues(cx, true, true), timeout); + remoteWiggleValues = timeoutError(readStorageWiggleValues(cx, false, true), timeout); wait(store(storageWiggler, storageWigglerStatsFetcher(configuration.get(), cx, true)) && success(primaryWiggleValues) && success(remoteWiggleValues)); @@ -3025,7 +3038,7 @@ ACTOR Future clusterGetStatus( wait(success(primaryDCFO)); std::vector coordinatorAddresses = - wait(coordinators.ccr->getConnectionString().tryResolveHostnames()); + wait(timeoutError(coordinators.ccr->getConnectionString().tryResolveHostnames(), 5.0)); int logFaultTolerance = 100; if (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) { From 1a83855a70dc4614001ddb440a3da1fdf635e23b Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Sun, 24 Jul 2022 14:30:42 -0700 Subject: [PATCH 18/56] Fix test failure when getting wiggle value timed out --- .../source/mr-status-json-schemas.rst.inc | 3 +- fdbclient/Schemas.cpp | 3 +- fdbserver/Status.actor.cpp | 29 +++++++++++++------ 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/documentation/sphinx/source/mr-status-json-schemas.rst.inc b/documentation/sphinx/source/mr-status-json-schemas.rst.inc index 6047bbc37f6..ac9ed77da39 100644 --- a/documentation/sphinx/source/mr-status-json-schemas.rst.inc +++ b/documentation/sphinx/source/mr-status-json-schemas.rst.inc @@ -523,7 +523,8 @@ "duplicate_mutation_streams", "duplicate_mutation_fetch_timeout", "primary_dc_missing", - "fetch_primary_dc_timeout" + "fetch_primary_dc_timeout", + "fetch_storage_wiggler_stats_timeout" ] }, "issues":[ diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index 519a10c4597..9b8ff098dc7 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -554,7 +554,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "duplicate_mutation_streams", "duplicate_mutation_fetch_timeout", "primary_dc_missing", - "fetch_primary_dc_timeout" + "fetch_primary_dc_timeout", + "fetch_storage_wiggler_stats_timeout" ] }, "issues":[ diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index d4b52bb47eb..de8cdade44c 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -3023,15 +3023,26 @@ ACTOR Future clusterGetStatus( state Future>> remoteWiggleValues; double timeout = g_network->isSimulated() && BUGGIFY_WITH_PROB(0.01) ? 0.0 : 2.0; - primaryWiggleValues = timeoutError(readStorageWiggleValues(cx, true, true), timeout); - remoteWiggleValues = timeoutError(readStorageWiggleValues(cx, false, true), timeout); - wait(store(storageWiggler, storageWigglerStatsFetcher(configuration.get(), cx, true)) && - success(primaryWiggleValues) && success(remoteWiggleValues)); - - for (auto& p : primaryWiggleValues.get()) - wiggleServers.insert(p.first); - for (auto& p : remoteWiggleValues.get()) - wiggleServers.insert(p.first); + try { + primaryWiggleValues = timeoutError(readStorageWiggleValues(cx, true, true), timeout); + remoteWiggleValues = timeoutError(readStorageWiggleValues(cx, false, true), timeout); + wait(store(storageWiggler, storageWigglerStatsFetcher(configuration.get(), cx, true)) && + success(primaryWiggleValues) && success(remoteWiggleValues)); + + for (auto& p : primaryWiggleValues.get()) + wiggleServers.insert(p.first); + for (auto& p : remoteWiggleValues.get()) + wiggleServers.insert(p.first); + } catch (Error& e) { + if (!primaryWiggleValues.canGet()) + messages.push_back( + JsonString::makeMessage("fetch_storage_wiggler_stats_timeout", + "Fetching wiggling servers in the primary region timed out.")); + if (!remoteWiggleValues.canGet()) + messages.push_back( + JsonString::makeMessage("fetch_storage_wiggler_stats_timeout", + "Fetching wiggling servers in the remote region timed out.")); + } } state std::vector workerStatuses = wait(getAll(futures2)); From cbd11a24afd54c1e93a6afb48b3d59ec250f122c Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Sun, 24 Jul 2022 14:51:49 -0700 Subject: [PATCH 19/56] Reduce a DNS lookup in Status generation --- fdbserver/Status.actor.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index de8cdade44c..8a4f29a5314 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -735,6 +735,7 @@ ACTOR static Future processStatusFetcher( std::vector> grvProxies, std::vector blobWorkers, ServerCoordinators coordinators, + std::vector coordinatorAddresses, Database cx, Optional configuration, Optional healthyZone, @@ -832,9 +833,7 @@ ACTOR static Future processStatusFetcher( } } - std::vector addressVec = - wait(timeoutError(coordinators.ccr->getConnectionString().tryResolveHostnames(), 5.0)); - for (const auto& coordinator : addressVec) { + for (const auto& coordinator : coordinatorAddresses) { roles.addCoordinatorRole(coordinator); } @@ -2964,6 +2963,7 @@ ACTOR Future clusterGetStatus( statusObj["machines"] = machineStatusFetcher(mMetrics, workers, configuration, &status_incomplete_reasons); + state std::vector coordinatorAddresses; if (configuration.present()) { // Do the latency probe by itself to avoid interference from other status activities state bool isAvailable = true; @@ -3048,8 +3048,9 @@ ACTOR Future clusterGetStatus( state std::vector workerStatuses = wait(getAll(futures2)); wait(success(primaryDCFO)); - std::vector coordinatorAddresses = + std::vector addresses = wait(timeoutError(coordinators.ccr->getConnectionString().tryResolveHostnames(), 5.0)); + coordinatorAddresses = std::move(addresses); int logFaultTolerance = 100; if (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) { @@ -3190,6 +3191,7 @@ ACTOR Future clusterGetStatus( grvProxies, blobWorkers, coordinators, + coordinatorAddresses, cx, configuration, loadResult.present() ? loadResult.get().healthyZone : Optional(), From ed6e29b8d4ee1b2cd40fd18598b0019492247a18 Mon Sep 17 00:00:00 2001 From: Zhe Wu Date: Sun, 24 Jul 2022 21:51:10 -0700 Subject: [PATCH 20/56] TLog track unpopped recovery tag --- fdbserver/TLogServer.actor.cpp | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 546c72be222..d2fc0a9def9 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -518,7 +518,8 @@ struct LogData : NonCopyable, public ReferenceCounted { Deque>>> messageBlocks; std::vector>> tag_data; // tag.locality | tag.id - int unpoppedRecoveredTags; + int unpoppedRecoveredTagCount; + std::set unpoppedRecoveredTags; std::map> waitingTags; Reference getTagData(Tag tag) { @@ -642,7 +643,7 @@ struct LogData : NonCopyable, public ReferenceCounted { std::string context) : stopped(false), initialized(false), queueCommittingVersion(0), knownCommittedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), minPoppedTagVersion(0), - minPoppedTag(invalidTag), unpoppedRecoveredTags(0), cc("TLog", interf.id().toString()), + minPoppedTag(invalidTag), unpoppedRecoveredTagCount(0), cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), blockingPeeks("BlockingPeeks", cc), blockingPeekTimeouts("BlockingPeekTimeouts", cc), emptyPeeks("EmptyPeeks", cc), nonEmptyPeeks("NonEmptyPeeks", cc), logId(interf.id()), protocolVersion(protocolVersion), @@ -1196,14 +1197,20 @@ ACTOR Future tLogPopCore(TLogData* self, Tag inputTag, Version to, Referen if (tagData->unpoppedRecovered && upTo > logData->recoveredAt) { tagData->unpoppedRecovered = false; - logData->unpoppedRecoveredTags--; + logData->unpoppedRecoveredTagCount--; + logData->unpoppedRecoveredTags.erase(tag); TraceEvent("TLogPoppedTag", logData->logId) - .detail("Tags", logData->unpoppedRecoveredTags) + .detail("Tags", logData->unpoppedRecoveredTagCount) .detail("Tag", tag.toString()) .detail("DurableKCVer", logData->durableKnownCommittedVersion) - .detail("RecoveredAt", logData->recoveredAt); - if (logData->unpoppedRecoveredTags == 0 && logData->durableKnownCommittedVersion >= logData->recoveredAt && - logData->recoveryComplete.canBeSet()) { + .detail("RecoveredAt", logData->recoveredAt) + .detail("UnpoppedTags", describe(logData->unpoppedRecoveredTags)); + if (logData->unpoppedRecoveredTagCount == 0 && + logData->durableKnownCommittedVersion >= logData->recoveredAt && logData->recoveryComplete.canBeSet()) { + TraceEvent("TLogRecoveryComplete", logData->logId) + .detail("Tags", logData->unpoppedRecoveredTagCount) + .detail("DurableKCVer", logData->durableKnownCommittedVersion) + .detail("RecoveredAt", logData->recoveredAt); logData->recoveryComplete.send(Void()); } } @@ -2148,10 +2155,10 @@ ACTOR Future doQueueCommit(TLogData* self, ASSERT(ver > logData->queueCommittedVersion.get()); logData->durableKnownCommittedVersion = knownCommittedVersion; - if (logData->unpoppedRecoveredTags == 0 && knownCommittedVersion >= logData->recoveredAt && + if (logData->unpoppedRecoveredTagCount == 0 && knownCommittedVersion >= logData->recoveredAt && logData->recoveryComplete.canBeSet()) { TraceEvent("TLogRecoveryComplete", logData->logId) - .detail("Tags", logData->unpoppedRecoveredTags) + .detail("Tags", logData->unpoppedRecoveredTagCount) .detail("DurableKCVer", logData->durableKnownCommittedVersion) .detail("RecoveredAt", logData->recoveredAt); logData->recoveryComplete.send(Void()); @@ -3438,7 +3445,8 @@ ACTOR Future tLogStart(TLogData* self, InitializeTLogRequest req, Locality logData->queueCommittedVersion.set(logData->unrecoveredBefore - 1); logData->version.set(logData->unrecoveredBefore - 1); - logData->unpoppedRecoveredTags = req.allTags.size(); + logData->unpoppedRecoveredTagCount = req.allTags.size(); + logData->unpoppedRecoveredTags = std::set(req.allTags.begin(), req.allTags.end()); wait(initPersistentState(self, logData) || logData->removed); TraceEvent("TLogRecover", self->dbgid) From be054596a076872249660a275fedf5efbdcab674 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 26 Jul 2022 11:00:11 -0700 Subject: [PATCH 21/56] Fix a crash bug during CC shutdown process actor_cancel should be thrown, otherwise the rest code can access invalid memory. --- fdbserver/Status.actor.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 8a4f29a5314..8b0d938ecb9 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -2708,6 +2708,9 @@ ACTOR Future lockedStatusFetcher(Referenceinsert(format("Unable to determine if database is locked (%s).", e.what())); break; } From 65cc17087405dbf8dbe0621154dea450c02fd9a3 Mon Sep 17 00:00:00 2001 From: FoundationDB CI Date: Tue, 26 Jul 2022 21:52:29 +0000 Subject: [PATCH 22/56] disable AVX for 7.1.16 release --- cmake/ConfigureCompiler.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 398dfe5fb61..cbd7935e464 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -253,7 +253,7 @@ else() set(USE_AVX512F OFF) endif() endif() - set(USE_AVX ON CACHE BOOL "Enable AVX instructions") + set(USE_AVX OFF CACHE BOOL "Enable AVX instructions") if (USE_AVX) if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^x86") add_compile_options(-mavx) From adc3f0b0341cd3ea26e858dfee432701690fb83d Mon Sep 17 00:00:00 2001 From: FoundationDB CI Date: Tue, 26 Jul 2022 22:05:31 +0000 Subject: [PATCH 23/56] enable AVX and update version for 7.1.17 release --- CMakeLists.txt | 2 +- cmake/ConfigureCompiler.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 508bdb478a0..91e4f314ae1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ else() endif() project(foundationdb - VERSION 7.1.16 + VERSION 7.1.17 DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions." HOMEPAGE_URL "http://www.foundationdb.org/" LANGUAGES C CXX ASM) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index cbd7935e464..398dfe5fb61 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -253,7 +253,7 @@ else() set(USE_AVX512F OFF) endif() endif() - set(USE_AVX OFF CACHE BOOL "Enable AVX instructions") + set(USE_AVX ON CACHE BOOL "Enable AVX instructions") if (USE_AVX) if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^x86") add_compile_options(-mavx) From 9914e7ced16a0ea3c747e91ee50b8aa2300cbd54 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 27 Jul 2022 09:51:34 -0700 Subject: [PATCH 24/56] Add 7.1.16, 7.1.17 release notes --- .../source/release-notes/release-notes-710.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/documentation/sphinx/source/release-notes/release-notes-710.rst b/documentation/sphinx/source/release-notes/release-notes-710.rst index 091bc410543..7847ea457f7 100644 --- a/documentation/sphinx/source/release-notes/release-notes-710.rst +++ b/documentation/sphinx/source/release-notes/release-notes-710.rst @@ -4,6 +4,22 @@ Release Notes ############# +7.1.17 +====== +* Same as 7.1.16 release with AVX enabled. + +7.1.16 +====== +* Released with AVX disabled. +* Fixed a crash bug when cluster controller shuts down. `(PR #7706) `_ +* Fixed a storage server failure when getReadVersion returns an error. `(PR #7688) `_ +* Fixed unbounded status json generation. `(PR #7680) `_ +* Fixed ScopeEventFieldTypeMismatch error for TLogMetrics. `(PR #7640) `_ +* Added getMappedRange latency metrics. `(PR #7632) `_ +* Fixed a version vector performance bug due to not updating client side tag cache. `(PR #7616) `_ +* Fixed DiskReadSeconds and DiskWriteSeconds calculaion in ProcessMetrics. `(PR #7609) `_ +* Added Rocksdb compression and data size stats. `(PR #7596) `_ + 7.1.15 ====== * Same as 7.1.14 release with AVX enabled. From 1df82c5cb18bc21f42e64f0626cec372737ea2f6 Mon Sep 17 00:00:00 2001 From: Chaoguang Lin Date: Thu, 28 Jul 2022 10:46:28 -0700 Subject: [PATCH 25/56] Validate subrange reads in simulation (#7597) (#7720) * Add extra validation to special key space reads in simulation * Fix bugs turned up by validating subrange reads * Change to validateSpecialSubrangeRead It is in general not safe to expect that a read from the special key space returns the same results if performed again, since the transaction may be being modified concurrently. * Add comment * Add comment Co-authored-by: Andrew Noyes --- fdbclient/SpecialKeySpace.actor.cpp | 89 ++++++++++++++++++- fdbclient/SpecialKeySpace.actor.h | 10 +++ .../workloads/ReportConflictingKeys.actor.cpp | 9 +- 3 files changed, 104 insertions(+), 4 deletions(-) diff --git a/fdbclient/SpecialKeySpace.actor.cpp b/fdbclient/SpecialKeySpace.actor.cpp index a5b559a91e0..c0212ea4988 100644 --- a/fdbclient/SpecialKeySpace.actor.cpp +++ b/fdbclient/SpecialKeySpace.actor.cpp @@ -676,13 +676,14 @@ Future ConflictingKeysImpl::getRange(ReadYourWritesTransaction* ryw if (ryw->getTransactionState()->conflictingKeys) { auto krMapPtr = ryw->getTransactionState()->conflictingKeys.get(); auto beginIter = krMapPtr->rangeContaining(kr.begin); - if (beginIter->begin() != kr.begin) - ++beginIter; auto endIter = krMapPtr->rangeContaining(kr.end); + + if (!kr.contains(beginIter->begin()) && beginIter != endIter) + ++beginIter; for (auto it = beginIter; it != endIter; ++it) { result.push_back_deep(result.arena(), KeyValueRef(it->begin(), it->value())); } - if (endIter->begin() != kr.end) + if (kr.contains(endIter->begin())) result.push_back_deep(result.arena(), KeyValueRef(endIter->begin(), endIter->value())); } return result; @@ -2817,3 +2818,85 @@ Future> TenantMapRangeImpl::commit(ReadYourWritesTransacti return tag(waitForAll(tenantManagementFutures), Optional()); } + +ACTOR Future validateSpecialSubrangeRead(ReadYourWritesTransaction* ryw, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + Reverse reverse, + RangeResult result) { + if (!result.size()) { + RangeResult testResult = wait(ryw->getRange(begin, end, limits, Snapshot::True, reverse)); + ASSERT(testResult == result); + return Void(); + } + + if (reverse) { + ASSERT(std::is_sorted(result.begin(), result.end(), KeyValueRef::OrderByKeyBack{})); + } else { + ASSERT(std::is_sorted(result.begin(), result.end(), KeyValueRef::OrderByKey{})); + } + + // Generate a keyrange where we can determine the expected result based solely on the previous readrange, and whose + // boundaries may or may not be keys in result. + std::vector candidateKeys; + if (reverse) { + for (int i = result.size() - 1; i >= 0; --i) { + candidateKeys.emplace_back(result[i].key); + if (i - 1 >= 0) { + candidateKeys.emplace_back(keyBetween(KeyRangeRef(result[i].key, result[i - 1].key))); + } + } + } else { + for (int i = 0; i < result.size(); ++i) { + candidateKeys.emplace_back(result[i].key); + if (i + 1 < result.size()) { + candidateKeys.emplace_back(keyBetween(KeyRangeRef(result[i].key, result[i + 1].key))); + } + } + } + std::sort(candidateKeys.begin(), candidateKeys.end()); + int originalSize = candidateKeys.size(); + // Add more candidate keys so that we might read a range between two adjacent result keys. + for (int i = 0; i < originalSize - 1; ++i) { + candidateKeys.emplace_back(keyBetween(KeyRangeRef(candidateKeys[i], candidateKeys[i + 1]))); + } + std::vector keys; + keys = { deterministicRandom()->randomChoice(candidateKeys), deterministicRandom()->randomChoice(candidateKeys) }; + std::sort(keys.begin(), keys.end()); + state KeySelector testBegin = firstGreaterOrEqual(keys[0]); + state KeySelector testEnd = firstGreaterOrEqual(keys[1]); + + // Generate expected result. Linear time is ok here since we're in simulation, and there's a benefit to keeping this + // simple (as we're using it as an test oracle) + state RangeResult expectedResult; + // The reverse parameter should be the same as for the original read, so if + // reverse is true then the results are _already_ in reverse order. + for (const auto& kr : result) { + if (kr.key >= keys[0] && kr.key < keys[1]) { + expectedResult.push_back(expectedResult.arena(), kr); + } + } + + // Test + RangeResult testResult = wait(ryw->getRange(testBegin, testEnd, limits, Snapshot::True, reverse)); + if (testResult != expectedResult) { + fmt::print("Reverse: {}\n", reverse); + fmt::print("Original range: [{}, {})\n", begin.toString(), end.toString()); + fmt::print("Original result:\n"); + for (const auto& kr : result) { + fmt::print(" {} -> {}\n", kr.key.printable(), kr.value.printable()); + } + fmt::print("Test range: [{}, {})\n", testBegin.getKey().printable(), testEnd.getKey().printable()); + fmt::print("Expected:\n"); + for (const auto& kr : expectedResult) { + fmt::print(" {} -> {}\n", kr.key.printable(), kr.value.printable()); + } + fmt::print("Got:\n"); + for (const auto& kr : testResult) { + fmt::print(" {} -> {}\n", kr.key.printable(), kr.value.printable()); + } + ASSERT(testResult == expectedResult); + } + return Void(); +} diff --git a/fdbclient/SpecialKeySpace.actor.h b/fdbclient/SpecialKeySpace.actor.h index 836567ecf4a..d2b8563d9e7 100644 --- a/fdbclient/SpecialKeySpace.actor.h +++ b/fdbclient/SpecialKeySpace.actor.h @@ -550,5 +550,15 @@ class TenantMapRangeImpl : public SpecialKeyRangeRWImpl { Future> commit(ReadYourWritesTransaction* ryw) override; }; +// If the underlying set of key-value pairs of a key space is not changing, then we expect repeating a read to give the +// same result. Additionally, we can generate the expected result of any read if that read is reading a subrange. This +// actor performs a read of an arbitrary subrange of [begin, end) and validates the results. +ACTOR Future validateSpecialSubrangeRead(ReadYourWritesTransaction* ryw, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + Reverse reverse, + RangeResult result); + #include "flow/unactorcompiler.h" #endif diff --git a/fdbserver/workloads/ReportConflictingKeys.actor.cpp b/fdbserver/workloads/ReportConflictingKeys.actor.cpp index 33ac14584af..854612333bb 100644 --- a/fdbserver/workloads/ReportConflictingKeys.actor.cpp +++ b/fdbserver/workloads/ReportConflictingKeys.actor.cpp @@ -192,9 +192,16 @@ struct ReportConflictingKeysWorkload : TestWorkload { LiteralStringRef("\xff\xff").withPrefix(conflictingKeysRange.begin)); // The getRange here using the special key prefix "\xff\xff/transaction/conflicting_keys/" happens // locally Thus, the error handling is not needed here - Future conflictingKeyRangesFuture = tr2->getRange(ckr, CLIENT_KNOBS->TOO_MANY); + state Future conflictingKeyRangesFuture = tr2->getRange(ckr, CLIENT_KNOBS->TOO_MANY); ASSERT(conflictingKeyRangesFuture.isReady()); + wait(validateSpecialSubrangeRead(tr2.getPtr(), + firstGreaterOrEqual(ckr.begin), + firstGreaterOrEqual(ckr.end), + GetRangeLimits(), + Reverse::False, + conflictingKeyRangesFuture.get())); + tr2 = makeReference(cx); const RangeResult conflictingKeyRanges = conflictingKeyRangesFuture.get(); From a62282a1b6d3b445eac401120f590ded66585534 Mon Sep 17 00:00:00 2001 From: Dan Lambright Date: Thu, 28 Jul 2022 21:09:43 -0400 Subject: [PATCH 26/56] Fix bug where too many logs were generated. --- fdbclient/NativeAPI.actor.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 7ef8f9a313b..6718c92dcfa 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -267,14 +267,16 @@ void DatabaseContext::getLatestCommitVersions(const Reference& loc } } } + // do not log if commitVersion == readVersion as it is common. if (!updatedVersionMap && commitVersion != readVersion) { - // This can have a performance impact if vv is enabled, so provide some diags TraceEvent(SevDebug, "CommitVersionNotFoundForSS") .detail("InSSIDMap", iter != ssidTagMapping.end() ? 1 : 0) .detail("Tag", tag) .detail("CommitVersion", commitVersion) .detail("ReadVersion", readVersion) - .detail("VersionVector", ssVersionVectorCache.toString()); + .detail("VersionVector", ssVersionVectorCache.toString()) + .setMaxEventLength(11000) + .setMaxFieldLength(10000); ++transactionCommitVersionNotFoundForSS; } } From 8bfbc2712df0dba7f27e4ed6dff85496210c3aa2 Mon Sep 17 00:00:00 2001 From: Hao Fu <77984096+hfu94@users.noreply.github.com> Date: Fri, 29 Jul 2022 16:35:33 -0700 Subject: [PATCH 27/56] Add g_traceBatch for prefetch query (#7730) (#7732) --- fdbserver/storageserver.actor.cpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 7f87fd0b4d4..2882afdb0be 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -3435,6 +3435,9 @@ ACTOR Future quickGetKeyValues( state double getValuesStart = g_network->timer(); getRange.begin = firstGreaterOrEqual(KeyRef(*a, prefix)); getRange.end = firstGreaterOrEqual(strinc(prefix, *a)); + if (pOriginalReq->debugID.present()) + g_traceBatch.addEvent( + "TransactionDebug", pOriginalReq->debugID.get().first(), "storageserver.quickGetKeyValues.Before"); try { // TODO: Use a lower level API may be better? Or tweak priorities? GetKeyValuesRequest req; @@ -3465,6 +3468,10 @@ ACTOR Future quickGetKeyValues( getRange.result = RangeResultRef(reply.data, reply.more); const double duration = g_network->timer() - getValuesStart; data->counters.mappedRangeLocalSample.addMeasurement(duration); + if (pOriginalReq->debugID.present()) + g_traceBatch.addEvent("TransactionDebug", + pOriginalReq->debugID.get().first(), + "storageserver.quickGetKeyValues.AfterLocalFetch"); return getRange; } // Otherwise fallback. @@ -3486,6 +3493,10 @@ ACTOR Future quickGetKeyValues( getRange.result = rangeResult; const double duration = g_network->timer() - getValuesStart; data->counters.mappedRangeRemoteSample.addMeasurement(duration); + if (pOriginalReq->debugID.present()) + g_traceBatch.addEvent("TransactionDebug", + pOriginalReq->debugID.get().first(), + "storageserver.quickGetKeyValues.AfterRemoteFetch"); return getRange; } else { throw quick_get_key_values_miss(); @@ -3772,7 +3783,9 @@ ACTOR Future mapKeyValues(StorageServer* data, result.arena.dependsOn(input.arena); result.data.reserve(result.arena, input.data.size()); - + if (pOriginalReq->debugID.present()) + g_traceBatch.addEvent( + "TransactionDebug", pOriginalReq->debugID.get().first(), "storageserver.mapKeyValues.Start"); state Tuple mappedKeyFormatTuple; state Tuple mappedKeyTuple; @@ -3791,6 +3804,9 @@ ACTOR Future mapKeyValues(StorageServer* data, state std::vector kvms(k); state std::vector> subqueries; state int offset = 0; + if (pOriginalReq->debugID.present()) + g_traceBatch.addEvent( + "TransactionDebug", pOriginalReq->debugID.get().first(), "storageserver.mapKeyValues.BeforeLoop"); for (; offset < sz; offset += SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE) { // Divide into batches of MAX_PARALLEL_QUICK_GET_VALUE subqueries for (int i = 0; i + offset < sz && i < SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE; i++) { @@ -3810,11 +3826,17 @@ ACTOR Future mapKeyValues(StorageServer* data, mapSubquery(data, input.version, pOriginalReq, &result.arena, isRangeQuery, it, kvm, mappedKey)); } wait(waitForAll(subqueries)); + if (pOriginalReq->debugID.present()) + g_traceBatch.addEvent( + "TransactionDebug", pOriginalReq->debugID.get().first(), "storageserver.mapKeyValues.AfterBatch"); subqueries.clear(); for (int i = 0; i + offset < sz && i < SERVER_KNOBS->MAX_PARALLEL_QUICK_GET_VALUE; i++) { result.data.push_back(result.arena, kvms[i]); } } + if (pOriginalReq->debugID.present()) + g_traceBatch.addEvent( + "TransactionDebug", pOriginalReq->debugID.get().first(), "storageserver.mapKeyValues.AfterAll"); return result; } From 1ce8b6edadb6ec800006a55ea12cb54959885e34 Mon Sep 17 00:00:00 2001 From: Renxuan Wang Date: Mon, 1 Aug 2022 15:20:21 -0700 Subject: [PATCH 28/56] Prefer IPv6 in hostname resolving. (#7750) --- flow/Hostname.actor.cpp | 4 ++-- flow/network.cpp | 20 +++++++++++++++++++- flow/network.h | 16 ++++++++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/flow/Hostname.actor.cpp b/flow/Hostname.actor.cpp index 110061e5c98..84a3cc78971 100644 --- a/flow/Hostname.actor.cpp +++ b/flow/Hostname.actor.cpp @@ -43,7 +43,7 @@ ACTOR Future> resolveImpl(Hostname* self) { try { std::vector addresses = wait(INetworkConnections::net()->resolveTCPEndpointWithDNSCache(self->host, self->service)); - NetworkAddress address = addresses[deterministicRandom()->randomInt(0, addresses.size())]; + NetworkAddress address = INetworkConnections::pickOneAddress(addresses); address.flags = 0; // Reset the parsed address to public address.fromHostname = NetworkAddressFromHostname::True; if (self->isTLS) { @@ -84,7 +84,7 @@ Optional Hostname::resolveBlocking() { try { std::vector addresses = INetworkConnections::net()->resolveTCPEndpointBlockingWithDNSCache(host, service); - NetworkAddress address = addresses[deterministicRandom()->randomInt(0, addresses.size())]; + NetworkAddress address = INetworkConnections::pickOneAddress(addresses); address.flags = 0; // Reset the parsed address to public address.fromHostname = NetworkAddressFromHostname::True; if (isTLS) { diff --git a/flow/network.cpp b/flow/network.cpp index 0e592b7a6af..ed9b3c80943 100644 --- a/flow/network.cpp +++ b/flow/network.cpp @@ -279,7 +279,7 @@ Future> INetworkConnections::connect(const std::string& h // Use map to create an actor that returns an endpoint or throws Future pickEndpoint = map(resolveTCPEndpoint(host, service), [=](std::vector const& addresses) -> NetworkAddress { - NetworkAddress addr = addresses[deterministicRandom()->randomInt(0, addresses.size())]; + NetworkAddress addr = INetworkConnections::pickOneAddress(addresses); addr.fromHostname = true; if (isTLS) { addr.flags = NetworkAddress::FLAG_TLS; @@ -347,4 +347,22 @@ TEST_CASE("/flow/network/ipaddress") { return Void(); } +TEST_CASE("/flow/network/ipV6Preferred") { + std::vector addresses; + for (int i = 0; i < 50; ++i) { + std::string s = format("%d.%d.%d.%d:%d", i, i, i, i, i); + addresses.push_back(NetworkAddress::parse(s)); + } + std::string ipv6 = "[2001:db8:85a3::8a2e:370:7334]:4800"; + addresses.push_back(NetworkAddress::parse(ipv6)); + for (int i = 50; i < 100; ++i) { + std::string s = format("%d.%d.%d.%d:%d", i, i, i, i, i); + addresses.push_back(NetworkAddress::parse(s)); + } + // Confirm IPv6 is always preferred. + ASSERT(INetworkConnections::pickOneAddress(addresses).toString() == ipv6); + + return Void(); +} + NetworkInfo::NetworkInfo() : handshakeLock(new FlowLock(FLOW_KNOBS->TLS_HANDSHAKE_LIMIT)) {} diff --git a/flow/network.h b/flow/network.h index 2fdd7ab32d5..f6900a648b2 100644 --- a/flow/network.h +++ b/flow/network.h @@ -734,6 +734,22 @@ class INetworkConnections { return static_cast((void*)g_network->global(INetwork::enNetworkConnections)); } + // If a DNS name can be resolved to both and IPv4 and IPv6 addresses, we want IPv6 addresses when running the + // clusters on IPv6. + // This function takes a vector of addresses and return a random one, preferring IPv6 over IPv4. + static NetworkAddress pickOneAddress(const std::vector& addresses) { + std::vector ipV6Addresses; + for (const NetworkAddress& addr : addresses) { + if (addr.isV6()) { + ipV6Addresses.push_back(addr); + } + } + if (ipV6Addresses.size() > 0) { + return ipV6Addresses[deterministicRandom()->randomInt(0, ipV6Addresses.size())]; + } + return addresses[deterministicRandom()->randomInt(0, addresses.size())]; + } + void removeCachedDNS(const std::string& host, const std::string& service) { dnsCache.remove(host, service); } DNSCache dnsCache; From 9c1050203a0dc63b594ba580285f089c49e0fb15 Mon Sep 17 00:00:00 2001 From: Hao Fu <77984096+hfu94@users.noreply.github.com> Date: Mon, 1 Aug 2022 21:59:08 -0700 Subject: [PATCH 29/56] Retain debug id in prefetch server-server call (#7755) Co-authored-by: Renxuan Wang --- fdbserver/storageserver.actor.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 2882afdb0be..9d742989ffe 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -3483,6 +3483,9 @@ ACTOR Future quickGetKeyValues( if (SERVER_KNOBS->QUICK_GET_KEY_VALUES_FALLBACK) { state Transaction tr(data->cx, pOriginalReq->tenantInfo.name); tr.setVersion(version); + if (pOriginalReq->debugID.present()) { + tr.debugTransaction(pOriginalReq->debugID.get()); + } // TODO: is DefaultPromiseEndpoint the best priority for this? tr.trState->taskID = TaskPriority::DefaultPromiseEndpoint; Future rangeResultFuture = From 4f903724bfcc506a21693f630e8b8b47c1e2f142 Mon Sep 17 00:00:00 2001 From: Chaoguang Lin Date: Thu, 4 Aug 2022 12:45:45 -0700 Subject: [PATCH 30/56] Add test coverage for SpecialKeyRangeAsyncImpl::getRange(release-7.1) (#7778) * Validate subrange reads in simulation (#7597) * Add extra validation to special key space reads in simulation * Fix bugs turned up by validating subrange reads * Change to validateSpecialSubrangeRead It is in general not safe to expect that a read from the special key space returns the same results if performed again, since the transaction may be being modified concurrently. * Add comment * Add comment * Add test coverage for SpecialKeyRangeAsyncImpl::getRange (#7671) * Add getRange test coverage for SpecialKeyRangeAsyncImpl * Fix the bug in SpecialKeyRangeAsyncImpl found by the test * Refactor ConflictingKeysImpl::getRange to use containedRanges to simplify the code * Fix file format * Initialize SpecialKeyRangeAsyncImpl cache with correct end key * Add release notes * Revert "Refactor ConflictingKeysImpl::getRange to use containedRanges to simplify the code" This reverts commit fdd298f469bf73b107e5ca29072c49774820d3d6. Co-authored-by: Andrew Noyes --- fdbclient/SpecialKeySpace.actor.cpp | 41 +++++++++++++------ fdbclient/SpecialKeySpace.actor.h | 23 +++++++---- .../SpecialKeySpaceCorrectness.actor.cpp | 30 +++++++++----- 3 files changed, 64 insertions(+), 30 deletions(-) diff --git a/fdbclient/SpecialKeySpace.actor.cpp b/fdbclient/SpecialKeySpace.actor.cpp index c0212ea4988..e084b329a6d 100644 --- a/fdbclient/SpecialKeySpace.actor.cpp +++ b/fdbclient/SpecialKeySpace.actor.cpp @@ -147,7 +147,7 @@ RangeResult rywGetRange(ReadYourWritesTransaction* ryw, const KeyRangeRef& kr, c ACTOR Future moveKeySelectorOverRangeActor(const SpecialKeyRangeReadImpl* skrImpl, ReadYourWritesTransaction* ryw, KeySelector* ks, - Optional* cache) { + KeyRangeMap>* cache) { // should be removed before calling ASSERT(!ks->orEqual); @@ -233,7 +233,7 @@ ACTOR Future normalizeKeySelectorActor(SpecialKeySpace* sks, KeyRangeRef boundary, int* actualOffset, RangeResult* result, - Optional* cache) { + KeyRangeMap>* cache) { // If offset < 1, where we need to move left, iter points to the range containing at least one smaller key // (It's a wasting of time to walk through the range whose begin key is same as ks->key) // (rangeContainingKeyBefore itself handles the case where ks->key == Key()) @@ -336,8 +336,9 @@ ACTOR Future SpecialKeySpace::getRangeAggregationActor(SpecialKeySp state int actualBeginOffset; state int actualEndOffset; state KeyRangeRef moduleBoundary; - // used to cache result from potential first read - state Optional cache; + // used to cache results from potential first async read + // the current implementation will read the whole range result to save in the cache + state KeyRangeMap> cache(Optional(), specialKeys.end); if (ryw->specialKeySpaceRelaxed()) { moduleBoundary = sks->range; @@ -384,7 +385,7 @@ ACTOR Future SpecialKeySpace::getRangeAggregationActor(SpecialKeySp KeyRangeRef kr = iter->range(); KeyRef keyStart = kr.contains(begin.getKey()) ? begin.getKey() : kr.begin; KeyRef keyEnd = kr.contains(end.getKey()) ? end.getKey() : kr.end; - if (iter->value()->isAsync() && cache.present()) { + if (iter->value()->isAsync() && cache.rangeContaining(keyStart).value().present()) { const SpecialKeyRangeAsyncImpl* ptr = dynamic_cast(iter->value()); RangeResult pairs_ = wait(ptr->getRange(ryw, KeyRangeRef(keyStart, keyEnd), limits, &cache)); pairs = pairs_; @@ -415,7 +416,7 @@ ACTOR Future SpecialKeySpace::getRangeAggregationActor(SpecialKeySp KeyRangeRef kr = iter->range(); KeyRef keyStart = kr.contains(begin.getKey()) ? begin.getKey() : kr.begin; KeyRef keyEnd = kr.contains(end.getKey()) ? end.getKey() : kr.end; - if (iter->value()->isAsync() && cache.present()) { + if (iter->value()->isAsync() && cache.rangeContaining(keyStart).value().present()) { const SpecialKeyRangeAsyncImpl* ptr = dynamic_cast(iter->value()); RangeResult pairs_ = wait(ptr->getRange(ryw, KeyRangeRef(keyStart, keyEnd), limits, &cache)); pairs = pairs_; @@ -626,12 +627,8 @@ Future SpecialKeySpace::commit(ReadYourWritesTransaction* ryw) { return commitActor(this, ryw); } -SKSCTestImpl::SKSCTestImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} - -Future SKSCTestImpl::getRange(ReadYourWritesTransaction* ryw, - KeyRangeRef kr, - GetRangeLimits limitsHint) const { - ASSERT(range.contains(kr)); +// For SKSCTestRWImpl and SKSCTestAsyncReadImpl +Future SKSCTestGetRangeBase(ReadYourWritesTransaction* ryw, KeyRangeRef kr, GetRangeLimits limitsHint) { auto resultFuture = ryw->getRange(kr, CLIENT_KNOBS->TOO_MANY); // all keys are written to RYW, since GRV is set, the read should happen locally ASSERT(resultFuture.isReady()); @@ -641,11 +638,29 @@ Future SKSCTestImpl::getRange(ReadYourWritesTransaction* ryw, return rywGetRange(ryw, kr, kvs); } -Future> SKSCTestImpl::commit(ReadYourWritesTransaction* ryw) { +SKSCTestRWImpl::SKSCTestRWImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} + +Future SKSCTestRWImpl::getRange(ReadYourWritesTransaction* ryw, + KeyRangeRef kr, + GetRangeLimits limitsHint) const { + ASSERT(range.contains(kr)); + return SKSCTestGetRangeBase(ryw, kr, limitsHint); +} + +Future> SKSCTestRWImpl::commit(ReadYourWritesTransaction* ryw) { ASSERT(false); return Optional(); } +SKSCTestAsyncReadImpl::SKSCTestAsyncReadImpl(KeyRangeRef kr) : SpecialKeyRangeAsyncImpl(kr) {} + +Future SKSCTestAsyncReadImpl::getRange(ReadYourWritesTransaction* ryw, + KeyRangeRef kr, + GetRangeLimits limitsHint) const { + ASSERT(range.contains(kr)); + return SKSCTestGetRangeBase(ryw, kr, limitsHint); +} + ReadConflictRangeImpl::ReadConflictRangeImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {} ACTOR static Future getReadConflictRangeImpl(ReadYourWritesTransaction* ryw, KeyRange kr) { diff --git a/fdbclient/SpecialKeySpace.actor.h b/fdbclient/SpecialKeySpace.actor.h index d2b8563d9e7..9d233689272 100644 --- a/fdbclient/SpecialKeySpace.actor.h +++ b/fdbclient/SpecialKeySpace.actor.h @@ -126,7 +126,7 @@ class SpecialKeyRangeAsyncImpl : public SpecialKeyRangeReadImpl { Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr, GetRangeLimits limitsHint, - Optional* cache) const { + KeyRangeMap>* cache) const { return getRangeAsyncActor(this, ryw, kr, limitsHint, cache); } @@ -136,17 +136,18 @@ class SpecialKeyRangeAsyncImpl : public SpecialKeyRangeReadImpl { ReadYourWritesTransaction* ryw, KeyRangeRef kr, GetRangeLimits limits, - Optional* cache) { + KeyRangeMap>* cache) { ASSERT(skrAyncImpl->getKeyRange().contains(kr)); ASSERT(cache != nullptr); - if (!cache->present()) { + ASSERT(cache->rangeContaining(kr.begin) == cache->rangeContainingKeyBefore(kr.end)); + if (!(*cache)[kr.begin].present()) { // For simplicity, every time we need to cache, we read the whole range // Although sometimes the range can be narrowed, // there is not a general way to do it in complicated scenarios RangeResult result_ = wait(skrAyncImpl->getRange(ryw, skrAyncImpl->getKeyRange(), limits)); - *cache = result_; + cache->insert(skrAyncImpl->getKeyRange(), result_); } - const auto& allResults = cache->get(); + const auto& allResults = (*cache)[kr.begin].get(); int start = 0, end = allResults.size(); while (start < allResults.size() && allResults[start].key < kr.begin) ++start; @@ -271,15 +272,23 @@ class SpecialKeySpace { }; // Used for SpecialKeySpaceCorrectnessWorkload -class SKSCTestImpl : public SpecialKeyRangeRWImpl { +class SKSCTestRWImpl : public SpecialKeyRangeRWImpl { public: - explicit SKSCTestImpl(KeyRangeRef kr); + explicit SKSCTestRWImpl(KeyRangeRef kr); Future getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr, GetRangeLimits limitsHint) const override; Future> commit(ReadYourWritesTransaction* ryw) override; }; +class SKSCTestAsyncReadImpl : public SpecialKeyRangeAsyncImpl { +public: + explicit SKSCTestAsyncReadImpl(KeyRangeRef kr); + Future getRange(ReadYourWritesTransaction* ryw, + KeyRangeRef kr, + GetRangeLimits limitsHint) const override; +}; + // Use special key prefix "\xff\xff/transaction/conflicting_keys/", // to retrieve keys which caused latest not_committed(conflicting with another transaction) error. // The returned key value pairs are interpreted as : diff --git a/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp b/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp index b1b326b56ac..5437ca418ac 100644 --- a/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp +++ b/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp @@ -39,8 +39,10 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload { double testDuration, absoluteRandomProb, transactionsPerSecond; PerfIntCounter wrongResults, keysCount; Reference ryw; // used to store all populated data - std::vector> impls; + std::vector> rwImpls; + std::vector> asyncReadImpls; Standalone> keys; + Standalone> rwKeys; SpecialKeySpaceCorrectnessWorkload(WorkloadContext const& wcx) : TestWorkload(wcx), wrongResults("Wrong Results"), keysCount("Number of generated keys") { @@ -81,12 +83,20 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload { Key startKey(baseKey + "/"); Key endKey(baseKey + "/\xff"); self->keys.push_back_deep(self->keys.arena(), KeyRangeRef(startKey, endKey)); - self->impls.push_back(std::make_shared(KeyRangeRef(startKey, endKey))); - // Although there are already ranges registered, the testing range will replace them - cx->specialKeySpace->registerKeyRange(SpecialKeySpace::MODULE::TESTONLY, - SpecialKeySpace::IMPLTYPE::READWRITE, - self->keys.back(), - self->impls.back().get()); + if (deterministicRandom()->random01() < 0.2) { + self->asyncReadImpls.push_back(std::make_shared(KeyRangeRef(startKey, endKey))); + cx->specialKeySpace->registerKeyRange(SpecialKeySpace::MODULE::TESTONLY, + SpecialKeySpace::IMPLTYPE::READONLY, + self->keys.back(), + self->asyncReadImpls.back().get()); + } else { + self->rwImpls.push_back(std::make_shared(KeyRangeRef(startKey, endKey))); + // Although there are already ranges registered, the testing range will replace them + cx->specialKeySpace->registerKeyRange(SpecialKeySpace::MODULE::TESTONLY, + SpecialKeySpace::IMPLTYPE::READWRITE, + self->keys.back(), + self->rwImpls.back().get()); + } // generate keys in each key range int keysInRange = deterministicRandom()->randomInt(self->minKeysPerRange, self->maxKeysPerRange + 1); self->keysCount += keysInRange; @@ -154,7 +164,7 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload { } // check ryw result consistency - KeyRange rkr = self->randomKeyRange(); + KeyRange rkr = self->randomRWKeyRange(); KeyRef rkey1 = rkr.begin; KeyRef rkey2 = rkr.end; // randomly set/clear two keys or clear a key range @@ -238,8 +248,8 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload { return true; } - KeyRange randomKeyRange() { - Key prefix = keys[deterministicRandom()->randomInt(0, rangeCount)].begin; + KeyRange randomRWKeyRange() { + Key prefix = rwImpls[deterministicRandom()->randomInt(0, rwImpls.size())]->getKeyRange().begin; Key rkey1 = Key(deterministicRandom()->randomAlphaNumeric(deterministicRandom()->randomInt(0, keyBytes))) .withPrefix(prefix); Key rkey2 = Key(deterministicRandom()->randomAlphaNumeric(deterministicRandom()->randomInt(0, keyBytes))) From e202a391bce8f14869e2c63c802f97c3d347a662 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Mon, 8 Aug 2022 14:30:16 -0700 Subject: [PATCH 31/56] Add knobs for min/max Ratekeeper limit The default has no effects. --- fdbclient/ServerKnobs.cpp | 4 +++- fdbclient/ServerKnobs.h | 2 ++ fdbserver/Ratekeeper.actor.cpp | 6 ++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 6024cb2c57f..d7b2af6f193 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -591,9 +591,11 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( SLOW_SMOOTHING_AMOUNT, 10.0 ); if( slowRatekeeper ) SLOW_SMOOTHING_AMOUNT = 50.0; init( METRIC_UPDATE_RATE, .1 ); if( slowRatekeeper ) METRIC_UPDATE_RATE = 0.5; init( DETAILED_METRIC_UPDATE_RATE, 5.0 ); - init (RATEKEEPER_DEFAULT_LIMIT, 1e6 ); if( randomize && BUGGIFY ) RATEKEEPER_DEFAULT_LIMIT = 0; + init( RATEKEEPER_DEFAULT_LIMIT, 1e6 ); if( randomize && BUGGIFY ) RATEKEEPER_DEFAULT_LIMIT = 0; init( RATEKEEPER_LIMIT_REASON_SAMPLE_RATE, 0.1 ); init( RATEKEEPER_PRINT_LIMIT_REASON, false ); if( randomize && BUGGIFY ) RATEKEEPER_PRINT_LIMIT_REASON = true; + init( RATEKEEPER_MIN_RATE, 0.0 ); + init( RATEKEEPER_MAX_RATE, 1e9 ); bool smallStorageTarget = randomize && BUGGIFY; init( TARGET_BYTES_PER_STORAGE_SERVER, 1000e6 ); if( smallStorageTarget ) TARGET_BYTES_PER_STORAGE_SERVER = 3000e3; diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index 28aedbc11ea..a3319ae7f90 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -529,6 +529,8 @@ class ServerKnobs : public KnobsImpl { double RATEKEEPER_DEFAULT_LIMIT; double RATEKEEPER_LIMIT_REASON_SAMPLE_RATE; bool RATEKEEPER_PRINT_LIMIT_REASON; + double RATEKEEPER_MIN_RATE; + double RATEKEEPER_MAX_RATE; int64_t TARGET_BYTES_PER_STORAGE_SERVER; int64_t SPRING_BYTES_STORAGE_SERVER; diff --git a/fdbserver/Ratekeeper.actor.cpp b/fdbserver/Ratekeeper.actor.cpp index 9b7c3103948..25475c52882 100644 --- a/fdbserver/Ratekeeper.actor.cpp +++ b/fdbserver/Ratekeeper.actor.cpp @@ -19,6 +19,7 @@ */ #include "fdbserver/DataDistribution.actor.h" +#include "fdbserver/Knobs.h" #include "fdbserver/Ratekeeper.h" #include "fdbserver/TagThrottler.h" #include "fdbserver/WaitFailure.h" @@ -901,6 +902,11 @@ void Ratekeeper::updateRate(RatekeeperLimits* limits) { limits->tpsLimitMetric = std::min(limits->tpsLimit, 1e6); limits->reasonMetric = limitReason; + if (limits->priority == TransactionPriority::DEFAULT) { + limits->tpsLimit = std::max(limits->tpsLimit, SERVER_KNOBS->RATEKEEPER_MIN_RATE); + limits->tpsLimit = std::min(limits->tpsLimit, SERVER_KNOBS->RATEKEEPER_MAX_RATE); + } + if (deterministicRandom()->random01() < 0.1) { const std::string& name = limits->rkUpdateEventCacheHolder.getPtr()->trackingKey; TraceEvent(name.c_str(), id) From c9b32037966668d14c8f870c516db94b03d03bd2 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 9 Aug 2022 11:28:05 -0700 Subject: [PATCH 32/56] Added 7.1.18, 7.1.19 release notes --- .../source/release-notes/release-notes-710.rst | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/documentation/sphinx/source/release-notes/release-notes-710.rst b/documentation/sphinx/source/release-notes/release-notes-710.rst index 7847ea457f7..4854230c1d0 100644 --- a/documentation/sphinx/source/release-notes/release-notes-710.rst +++ b/documentation/sphinx/source/release-notes/release-notes-710.rst @@ -4,6 +4,19 @@ Release Notes ############# +7.1.19 +====== +* Same as 7.1.18 release with AVX enabled. + +7.1.18 +====== +* Released with AVX disabled. +* Added knobs for the minimum and the maximum of the Ratekeeper's default priority. `(PR #7820) `_ +* Fixed bugs in ``getRange`` of the special key space. `(PR #7778) `_, `(PR #7720) `_ +* Added debug ID for secondary queries in index prefetching. `(PR #7755) `_ +* Changed hostname resolving to prefer IPv6 addresses. `(PR #7750) `_ +* Added more transaction debug events for prefetch queries. `(PR #7732) `_ + 7.1.17 ====== * Same as 7.1.16 release with AVX enabled. @@ -17,7 +30,7 @@ Release Notes * Fixed ScopeEventFieldTypeMismatch error for TLogMetrics. `(PR #7640) `_ * Added getMappedRange latency metrics. `(PR #7632) `_ * Fixed a version vector performance bug due to not updating client side tag cache. `(PR #7616) `_ -* Fixed DiskReadSeconds and DiskWriteSeconds calculaion in ProcessMetrics. `(PR #7609) `_ +* Fixed DiskReadSeconds and DiskWriteSeconds calculation in ProcessMetrics. `(PR #7609) `_ * Added Rocksdb compression and data size stats. `(PR #7596) `_ 7.1.15 @@ -76,7 +89,7 @@ Release Notes * Added support of the reboot command in go bindings. `(PR #7270) `_ * Fixed several issues in profiling special keys using GlobalConfig. `(PR #7120) `_ * Fixed a stuck transaction system bug due to inconsistent recovery transaction version. `(PR #7261) `_ -* Fixed a unknown_error crash due to not resolving hostnames. `(PR #7254) `_ +* Fixed an unknown_error crash due to not resolving hostnames. `(PR #7254) `_ * Fixed a heap-use-after-free bug. `(PR #7250) `_ * Fixed a performance issue that remote TLogs are sending too many pops to log routers. `(PR #7235) `_ * Fixed an issue that SharedTLogs are not displaced and leaking disk space. `(PR #7246) `_ From 7e95cd74674c2267858efe2d8e6744bbeb802907 Mon Sep 17 00:00:00 2001 From: FoundationDB CI Date: Tue, 9 Aug 2022 19:55:33 +0000 Subject: [PATCH 33/56] update version after 7.1.17 release --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 91e4f314ae1..5e260c5ec3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ else() endif() project(foundationdb - VERSION 7.1.17 + VERSION 7.1.18 DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions." HOMEPAGE_URL "http://www.foundationdb.org/" LANGUAGES C CXX ASM) From 4e8b4db5500a6d98609a8c5653b5b2b628d52ee8 Mon Sep 17 00:00:00 2001 From: FoundationDB CI Date: Tue, 9 Aug 2022 19:58:02 +0000 Subject: [PATCH 34/56] disable AVX for 7.1.18 release --- cmake/ConfigureCompiler.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 398dfe5fb61..cbd7935e464 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -253,7 +253,7 @@ else() set(USE_AVX512F OFF) endif() endif() - set(USE_AVX ON CACHE BOOL "Enable AVX instructions") + set(USE_AVX OFF CACHE BOOL "Enable AVX instructions") if (USE_AVX) if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^x86") add_compile_options(-mavx) From 473861420dfa020f623cfb1300936a45130abeb9 Mon Sep 17 00:00:00 2001 From: FoundationDB CI Date: Tue, 9 Aug 2022 20:03:32 +0000 Subject: [PATCH 35/56] enable AVX and update version for 7.1.19 release --- CMakeLists.txt | 2 +- cmake/ConfigureCompiler.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e260c5ec3a..98f19d8f226 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ else() endif() project(foundationdb - VERSION 7.1.18 + VERSION 7.1.19 DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions." HOMEPAGE_URL "http://www.foundationdb.org/" LANGUAGES C CXX ASM) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index cbd7935e464..398dfe5fb61 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -253,7 +253,7 @@ else() set(USE_AVX512F OFF) endif() endif() - set(USE_AVX OFF CACHE BOOL "Enable AVX instructions") + set(USE_AVX ON CACHE BOOL "Enable AVX instructions") if (USE_AVX) if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^x86") add_compile_options(-mavx) From b4e91059a11670d3cd4f8ae4bede7ebb3d624bb6 Mon Sep 17 00:00:00 2001 From: FoundationDB CI Date: Wed, 10 Aug 2022 15:58:21 +0000 Subject: [PATCH 36/56] update version after 7.1.19 release --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 98f19d8f226..740fdded801 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ else() endif() project(foundationdb - VERSION 7.1.19 + VERSION 7.1.20 DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions." HOMEPAGE_URL "http://www.foundationdb.org/" LANGUAGES C CXX ASM) From fd24a072d686c8d58be8b42ab81af7e1473afd20 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 9 Aug 2022 20:31:45 -0700 Subject: [PATCH 37/56] Add fault injection to GRV proxy to return error response This turns out to fail a lot of tests that we need to fix. --- fdbserver/GrvProxyServer.actor.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fdbserver/GrvProxyServer.actor.cpp b/fdbserver/GrvProxyServer.actor.cpp index 61b100779ec..fc3d9b5d233 100644 --- a/fdbserver/GrvProxyServer.actor.cpp +++ b/fdbserver/GrvProxyServer.actor.cpp @@ -29,6 +29,7 @@ #include "fdbserver/WaitFailure.h" #include "fdbserver/WorkerInterface.actor.h" #include "fdbrpc/sim_validation.h" +#include "flow/IRandom.h" #include "flow/flow.h" #include "flow/actorcompiler.h" // This must be the last #include. @@ -425,7 +426,9 @@ ACTOR Future queueGetReadVersionRequests(Reference // WARNING: this code is run at a high priority, so it needs to do as little work as possible bool canBeQueued = true; if (stats->txnRequestIn.getValue() - stats->txnRequestOut.getValue() > - SERVER_KNOBS->START_TRANSACTION_MAX_QUEUE_SIZE) { + SERVER_KNOBS->START_TRANSACTION_MAX_QUEUE_SIZE || + (g_network->isSimulated() && !g_simulator.speedUpSimulation && + deterministicRandom()->random01() < 0.01)) { // When the limit is hit, try to drop requests from the lower priority queues. if (req.priority == TransactionPriority::BATCH) { canBeQueued = false; From 0fd57b801093c665568ae4ff26ef8f2d10c8286f Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 9 Aug 2022 22:45:44 -0700 Subject: [PATCH 38/56] Client needs to handle proxy_memory_limit_exceeded error --- fdbclient/NativeAPI.actor.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 794e56d151f..75e7531a20d 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -6632,9 +6632,12 @@ ACTOR Future getConsistentReadVersion(SpanID parentSpan, } } } catch (Error& e) { - if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled) + if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled && + e.code() != error_code_proxy_memory_limit_exceeded) TraceEvent(SevError, "GetConsistentReadVersionError").error(e); - if (e.code() == error_code_batch_transaction_throttled && !cx->apiVersionAtLeast(630)) { + if ((e.code() == error_code_batch_transaction_throttled || + e.code() == error_code_proxy_memory_limit_exceeded) && + !cx->apiVersionAtLeast(630)) { wait(delayJittered(5.0)); } else { throw; From 3d8257c9e068ad0157b69cc7cb00e5a0e4ca503c Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 10 Aug 2022 11:21:37 -0700 Subject: [PATCH 39/56] Add retry in waitForCommittedVersion for GRV error response --- fdbclient/NativeAPI.actor.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 75e7531a20d..cd86940ada9 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -3460,8 +3460,8 @@ ACTOR Future getKey(Reference trState, ACTOR Future waitForCommittedVersion(Database cx, Version version, SpanID spanContext) { state Span span("NAPI:waitForCommittedVersion"_loc, { spanContext }); - try { - loop { + loop { + try { choose { when(wait(cx->onProxiesChanged())) {} when(GetReadVersionReply v = wait(basicLoadBalance( @@ -3487,10 +3487,16 @@ ACTOR Future waitForCommittedVersion(Database cx, Version version, Span wait(delay(CLIENT_KNOBS->FUTURE_VERSION_RETRY_DELAY, cx->taskID)); } } + } catch (Error& e) { + if (e.code() == error_code_batch_transaction_throttled || + e.code() == error_code_proxy_memory_limit_exceeded) { + // GRV Proxy returns an error + wait(delayJittered(5.0)); + } else { + TraceEvent(SevError, "WaitForCommittedVersionError").error(e); + throw; + } } - } catch (Error& e) { - TraceEvent(SevError, "WaitForCommittedVersionError").error(e); - throw; } } From c5c8e73f6e435863cc15b96e70959c5b4aa4e3d4 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 10 Aug 2022 14:13:03 -0700 Subject: [PATCH 40/56] Fix backup worker to handle GRV errors --- fdbserver/BackupWorker.actor.cpp | 38 ++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/fdbserver/BackupWorker.actor.cpp b/fdbserver/BackupWorker.actor.cpp index 0ac5b56a7d6..4163bca42ca 100644 --- a/fdbserver/BackupWorker.actor.cpp +++ b/fdbserver/BackupWorker.actor.cpp @@ -428,20 +428,30 @@ struct BackupData { ACTOR static Future _getMinKnownCommittedVersion(BackupData* self) { state Span span("BA:GetMinCommittedVersion"_loc); loop { - GetReadVersionRequest request(span.context, - 0, - TransactionPriority::DEFAULT, - invalidVersion, - GetReadVersionRequest::FLAG_USE_MIN_KNOWN_COMMITTED_VERSION); - choose { - when(wait(self->cx->onProxiesChanged())) {} - when(GetReadVersionReply reply = - wait(basicLoadBalance(self->cx->getGrvProxies(UseProvisionalProxies::False), - &GrvProxyInterface::getConsistentReadVersion, - request, - self->cx->taskID))) { - self->cx->ssVersionVectorCache.applyDelta(reply.ssVersionVectorDelta); - return reply.version; + try { + GetReadVersionRequest request(span.context, + 0, + TransactionPriority::DEFAULT, + invalidVersion, + GetReadVersionRequest::FLAG_USE_MIN_KNOWN_COMMITTED_VERSION); + choose { + when(wait(self->cx->onProxiesChanged())) {} + when(GetReadVersionReply reply = + wait(basicLoadBalance(self->cx->getGrvProxies(UseProvisionalProxies::False), + &GrvProxyInterface::getConsistentReadVersion, + request, + self->cx->taskID))) { + self->cx->ssVersionVectorCache.applyDelta(reply.ssVersionVectorDelta); + return reply.version; + } + } + } catch (Error& e) { + if (e.code() == error_code_batch_transaction_throttled || + e.code() == error_code_proxy_memory_limit_exceeded) { + // GRV Proxy returns an error + wait(delayJittered(5.0)); + } else { + throw; } } } From b99ecfd9733ebc126f5b1ff7975a69c030583383 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 10 Aug 2022 14:44:17 -0700 Subject: [PATCH 41/56] Fix FuzzApiCorrectness workload --- fdbserver/workloads/FuzzApiCorrectness.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/workloads/FuzzApiCorrectness.actor.cpp b/fdbserver/workloads/FuzzApiCorrectness.actor.cpp index bec0240257a..e04ea8a48cb 100644 --- a/fdbserver/workloads/FuzzApiCorrectness.actor.cpp +++ b/fdbserver/workloads/FuzzApiCorrectness.actor.cpp @@ -61,7 +61,7 @@ struct ExceptionContract { e.code() == error_code_future_version || e.code() == error_code_transaction_cancelled || e.code() == error_code_key_too_large || e.code() == error_code_value_too_large || e.code() == error_code_process_behind || e.code() == error_code_batch_transaction_throttled || - e.code() == error_code_tag_throttled) { + e.code() == error_code_tag_throttled || e.code() == error_code_proxy_memory_limit_exceeded) { return; } From def709c0f4fefbee743735d2687f6f1f5ecb5382 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 10 Aug 2022 14:59:48 -0700 Subject: [PATCH 42/56] Fix SpecialKeySpaceCorrectness workload --- .../SpecialKeySpaceCorrectness.actor.cpp | 43 +++++++++++++------ 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp b/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp index 5437ca418ac..495dd050dfe 100644 --- a/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp +++ b/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp @@ -874,41 +874,58 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload { } TraceEvent(SevDebug, "DatabaseLocked").log(); // if database locked, fdb read should get database_locked error - try { - tx->reset(); - tx->setOption(FDBTransactionOptions::RAW_ACCESS); - RangeResult res = wait(tx->getRange(normalKeys, 1)); - } catch (Error& e) { - if (e.code() == error_code_actor_cancelled) - throw; - ASSERT(e.code() == error_code_database_locked); + tx->reset(); + loop { + try { + tx->setOption(FDBTransactionOptions::RAW_ACCESS); + RangeResult res = wait(tx->getRange(normalKeys, 1)); + } catch (Error& e) { + if (e.code() == error_code_actor_cancelled) + throw; + if (e.code() == error_code_proxy_memory_limit_exceeded || + e.code() == error_code_batch_transaction_throttled) { + wait(tx->onError(e)); + } else { + ASSERT(e.code() == error_code_database_locked); + break; + } + } } // make sure we unlock the database // unlock is idempotent, thus we can commit many times until successful + tx->reset(); loop { try { - tx->reset(); tx->setOption(FDBTransactionOptions::RAW_ACCESS); tx->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES); // unlock the database tx->clear(SpecialKeySpace::getManagementApiCommandPrefix("lock")); wait(tx->commit()); TraceEvent(SevDebug, "DatabaseUnlocked").log(); - tx->reset(); + break; + } catch (Error& e) { + TraceEvent(SevDebug, "DatabaseUnlockFailure").error(e); + ASSERT(e.code() != error_code_database_locked); + wait(tx->onError(e)); + } + } + + tx->reset(); + loop { + try { // read should be successful tx->setOption(FDBTransactionOptions::RAW_ACCESS); RangeResult res = wait(tx->getRange(normalKeys, 1)); - tx->reset(); break; } catch (Error& e) { - TraceEvent(SevDebug, "DatabaseUnlockFailure").error(e); - ASSERT(e.code() != error_code_database_locked); wait(tx->onError(e)); } } + // test consistencycheck which only used by ConsistencyCheck Workload // Note: we have exclusive ownership of fdbShouldConsistencyCheckBeSuspended, // no existing workloads can modify the key + tx->reset(); { try { tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); From 93eb64fa9fb558907c3c3c8f47de67df454af922 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 10 Aug 2022 16:21:04 -0700 Subject: [PATCH 43/56] Fix LocalRatekeeper workload --- fdbserver/workloads/LocalRatekeeper.actor.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fdbserver/workloads/LocalRatekeeper.actor.cpp b/fdbserver/workloads/LocalRatekeeper.actor.cpp index 97f8af04bc2..67d84d309f5 100644 --- a/fdbserver/workloads/LocalRatekeeper.actor.cpp +++ b/fdbserver/workloads/LocalRatekeeper.actor.cpp @@ -18,6 +18,7 @@ * limitations under the License. */ +#include "fdbclient/FDBTypes.h" #include "fdbserver/workloads/workloads.actor.h" #include #include @@ -82,7 +83,16 @@ struct LocalRatekeeperWorkload : TestWorkload { .detail("Actual", metrics.localRateLimit); } tr.reset(); - Version readVersion = wait(tr.getReadVersion()); + state Version readVersion = invalidVersion; + loop { + try { + Version v = wait(tr.getReadVersion()); + readVersion = v; + break; + } catch (Error& e) { + wait(tr.onError(e)); + } + } requests.clear(); // we send 100 requests to this storage node and count how many of those get rejected for (int i = 0; i < 100; ++i) { From 3fe1f978f33feba894bfd022a8c44c48cffeb596 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 11 Aug 2022 11:23:44 -0700 Subject: [PATCH 44/56] Add a client knob GRV_ERROR_RETRY_DELAY --- fdbclient/ClientKnobs.cpp | 1 + fdbclient/ClientKnobs.h | 1 + fdbclient/NativeAPI.actor.cpp | 2 +- fdbserver/BackupWorker.actor.cpp | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fdbclient/ClientKnobs.cpp b/fdbclient/ClientKnobs.cpp index 7427078d5b5..d9d7331f146 100644 --- a/fdbclient/ClientKnobs.cpp +++ b/fdbclient/ClientKnobs.cpp @@ -63,6 +63,7 @@ void ClientKnobs::initialize(Randomize randomize) { init( WRONG_SHARD_SERVER_DELAY, .01 ); if( randomize && BUGGIFY ) WRONG_SHARD_SERVER_DELAY = deterministicRandom()->random01(); // FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is mostly wrong (e.g. dumping the database after a test) init( FUTURE_VERSION_RETRY_DELAY, .01 ); if( randomize && BUGGIFY ) FUTURE_VERSION_RETRY_DELAY = deterministicRandom()->random01();// FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY; + init( GRV_ERROR_RETRY_DELAY, 5.0 ); if( randomize && BUGGIFY ) FUTURE_VERSION_RETRY_DELAY = 0.01 + 5 * deterministicRandom()->random01(); init( UNKNOWN_TENANT_RETRY_DELAY, 0.0 ); if( randomize && BUGGIFY ) UNKNOWN_TENANT_RETRY_DELAY = deterministicRandom()->random01(); init( REPLY_BYTE_LIMIT, 80000 ); init( DEFAULT_BACKOFF, .01 ); if( randomize && BUGGIFY ) DEFAULT_BACKOFF = deterministicRandom()->random01(); diff --git a/fdbclient/ClientKnobs.h b/fdbclient/ClientKnobs.h index ca3dd1494ee..5978c43b1fe 100644 --- a/fdbclient/ClientKnobs.h +++ b/fdbclient/ClientKnobs.h @@ -61,6 +61,7 @@ class ClientKnobs : public KnobsImpl { double WRONG_SHARD_SERVER_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is // mostly wrong (e.g. dumping the database after a test) double FUTURE_VERSION_RETRY_DELAY; + double GRV_ERROR_RETRY_DELAY; double UNKNOWN_TENANT_RETRY_DELAY; int REPLY_BYTE_LIMIT; double DEFAULT_BACKOFF; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index cd86940ada9..91f1f62c2d3 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -3491,7 +3491,7 @@ ACTOR Future waitForCommittedVersion(Database cx, Version version, Span if (e.code() == error_code_batch_transaction_throttled || e.code() == error_code_proxy_memory_limit_exceeded) { // GRV Proxy returns an error - wait(delayJittered(5.0)); + wait(delayJittered(CLIENT_KNOBS->GRV_ERROR_RETRY_DELAY)); } else { TraceEvent(SevError, "WaitForCommittedVersionError").error(e); throw; diff --git a/fdbserver/BackupWorker.actor.cpp b/fdbserver/BackupWorker.actor.cpp index 4163bca42ca..789935bd2cd 100644 --- a/fdbserver/BackupWorker.actor.cpp +++ b/fdbserver/BackupWorker.actor.cpp @@ -449,7 +449,7 @@ struct BackupData { if (e.code() == error_code_batch_transaction_throttled || e.code() == error_code_proxy_memory_limit_exceeded) { // GRV Proxy returns an error - wait(delayJittered(5.0)); + wait(delayJittered(CLIENT_KNOBS->GRV_ERROR_RETRY_DELAY)); } else { throw; } From 581819e438bece9d0d230f7f19872ce8fdf185b8 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 11 Aug 2022 14:06:16 -0700 Subject: [PATCH 45/56] Fix attribute_not_found error in bulkSetup() --- fdbserver/workloads/BulkSetup.actor.h | 43 ++++++++++++++++++++------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/fdbserver/workloads/BulkSetup.actor.h b/fdbserver/workloads/BulkSetup.actor.h index 4d1acdbb449..8e8c8b48753 100644 --- a/fdbserver/workloads/BulkSetup.actor.h +++ b/fdbserver/workloads/BulkSetup.actor.h @@ -158,6 +158,37 @@ ACTOR Future>> trackInsertionCount(Datab std::vector countsOfInterest, double checkInterval); +ACTOR template +Future waitForLowInFlight(Database cx, T* workload) { + state Future timeout = delay(300.0); + loop { + try { + if (timeout.isReady()) { + throw timed_out(); + } + + int64_t inFlight = wait(getDataInFlight(cx, workload->dbInfo)); + TraceEvent("DynamicWarming").detail("InFlight", inFlight); + if (inFlight > 1e6) { // Wait for just 1 MB to be in flight + wait(delay(1.0)); + } else { + wait(delay(1.0)); + TraceEvent("DynamicWarmingDone").log(); + break; + } + } catch (Error& e) { + if (e.code() == error_code_attribute_not_found) { + // DD may not be initialized yet and attribute "DataInFlight" can be missing + wait(delay(1.0)); + } else { + TraceEvent(SevWarn, "WaitForLowInFlightError").error(e); + throw; + } + } + } + return Void(); +} + ACTOR template Future bulkSetup(Database cx, T* workload, @@ -279,17 +310,7 @@ Future bulkSetup(Database cx, if (postSetupWarming != 0) { try { wait(delay(5.0)); // Wait for the data distribution in a small test to start - loop { - int64_t inFlight = wait(getDataInFlight(cx, workload->dbInfo)); - TraceEvent("DynamicWarming").detail("InFlight", inFlight); - if (inFlight > 1e6) { // Wait for just 1 MB to be in flight - wait(delay(1.0)); - } else { - wait(delay(1.0)); - TraceEvent("DynamicWarmingDone").log(); - break; - } - } + wait(waitForLowInFlight(cx, workload)); // Wait for the data distribution in a small test to start } catch (Error& e) { if (e.code() == error_code_actor_cancelled) throw; From 6a65f700bed3e8a4f1d5ba72626fbb7ae327a061 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Mon, 15 Aug 2022 14:29:33 -0700 Subject: [PATCH 46/56] Increase waitForLowInFlight timeout Seeing a CheckRelocation test failed with 300s timeout. --- fdbclient/ClientKnobs.cpp | 2 +- fdbserver/workloads/BulkSetup.actor.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbclient/ClientKnobs.cpp b/fdbclient/ClientKnobs.cpp index d9d7331f146..8e1c1a277cd 100644 --- a/fdbclient/ClientKnobs.cpp +++ b/fdbclient/ClientKnobs.cpp @@ -63,7 +63,7 @@ void ClientKnobs::initialize(Randomize randomize) { init( WRONG_SHARD_SERVER_DELAY, .01 ); if( randomize && BUGGIFY ) WRONG_SHARD_SERVER_DELAY = deterministicRandom()->random01(); // FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is mostly wrong (e.g. dumping the database after a test) init( FUTURE_VERSION_RETRY_DELAY, .01 ); if( randomize && BUGGIFY ) FUTURE_VERSION_RETRY_DELAY = deterministicRandom()->random01();// FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY; - init( GRV_ERROR_RETRY_DELAY, 5.0 ); if( randomize && BUGGIFY ) FUTURE_VERSION_RETRY_DELAY = 0.01 + 5 * deterministicRandom()->random01(); + init( GRV_ERROR_RETRY_DELAY, 5.0 ); if( randomize && BUGGIFY ) GRV_ERROR_RETRY_DELAY = 0.01 + 5 * deterministicRandom()->random01(); init( UNKNOWN_TENANT_RETRY_DELAY, 0.0 ); if( randomize && BUGGIFY ) UNKNOWN_TENANT_RETRY_DELAY = deterministicRandom()->random01(); init( REPLY_BYTE_LIMIT, 80000 ); init( DEFAULT_BACKOFF, .01 ); if( randomize && BUGGIFY ) DEFAULT_BACKOFF = deterministicRandom()->random01(); diff --git a/fdbserver/workloads/BulkSetup.actor.h b/fdbserver/workloads/BulkSetup.actor.h index 8e8c8b48753..07f30052af1 100644 --- a/fdbserver/workloads/BulkSetup.actor.h +++ b/fdbserver/workloads/BulkSetup.actor.h @@ -160,7 +160,7 @@ ACTOR Future>> trackInsertionCount(Datab ACTOR template Future waitForLowInFlight(Database cx, T* workload) { - state Future timeout = delay(300.0); + state Future timeout = delay(600.0); loop { try { if (timeout.isReady()) { From e9c25e546f41c5bcdf5cc2d0485905be0e0e1ded Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Sat, 13 Aug 2022 13:04:59 -0700 Subject: [PATCH 47/56] Remove extremely spammy trace event by only logging the re-enabling of load balance in FetchKeys if it was disabled. --- fdbserver/storageserver.actor.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 9d742989ffe..5bcf54bb0df 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -5913,8 +5913,10 @@ ACTOR Future fetchKeys(StorageServer* data, AddingShard* shard) { } // FIXME: remove when we no longer support upgrades from 5.X - data->cx->enableLocalityLoadBalance = EnableLocalityLoadBalance::True; - TraceEvent(SevWarnAlways, "FKReenableLB").detail("FKID", fetchKeysID); + if (!data->cx->enableLocalityLoadBalance) { + data->cx->enableLocalityLoadBalance = EnableLocalityLoadBalance::True; + TraceEvent(SevWarnAlways, "FKReenableLB").detail("FKID", fetchKeysID); + } // We have completed the fetch and write of the data, now we wait for MVCC window to pass. // As we have finished this work, we will allow more work to start... From 24e5f6ee44d38044697308256321d467ceb34738 Mon Sep 17 00:00:00 2001 From: Bala Namasivayam Date: Mon, 15 Aug 2022 13:22:17 -0700 Subject: [PATCH 48/56] Fix ErrorCommitInfo event parser --- .../transaction_profiling_analyzer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/contrib/transaction_profiling_analyzer/transaction_profiling_analyzer.py b/contrib/transaction_profiling_analyzer/transaction_profiling_analyzer.py index 2ffc94065a6..25b7834698b 100644 --- a/contrib/transaction_profiling_analyzer/transaction_profiling_analyzer.py +++ b/contrib/transaction_profiling_analyzer/transaction_profiling_analyzer.py @@ -277,6 +277,12 @@ def __init__(self, bb, protocol_version, full_output=True): if protocol_version >= PROTOCOL_VERSION_6_3: self.report_conflicting_keys = bb.get_bool() + if protocol_version >= PROTOCOL_VERSION_7_1: + lock_aware = bb.get_bool() + if bb.get_bool(): + spanId = bb.get_bytes(16) + + class UnsupportedProtocolVersionError(Exception): def __init__(self, protocol_version): super().__init__("Unsupported protocol version 0x%0.2X" % protocol_version) From ead4049427ecd463f4309fc6bbb963341f12a127 Mon Sep 17 00:00:00 2001 From: Aaron Molitor Date: Tue, 23 Aug 2022 11:40:33 -0500 Subject: [PATCH 49/56] move global variables into function args/local variables, build both regular and debug images by default --- packaging/docker/build-images.sh | 54 ++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/packaging/docker/build-images.sh b/packaging/docker/build-images.sh index 02574515d94..4b306a1535b 100755 --- a/packaging/docker/build-images.sh +++ b/packaging/docker/build-images.sh @@ -2,12 +2,17 @@ set -Eeuo pipefail script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd -P) reset=$(tput sgr0) +red=$(tput setaf 1) blue=$(tput setaf 4) -function logg() { +function logg () { printf "${blue}##### $(date +"%H:%M:%S") # %-56.55s #####${reset}\n" "${1}" } + function loge () { + printf "${red}##### $(date +"%H:%M:%S") # %-56.55s #####${reset}\n" "${1}" + } + function pushd () { command pushd "$@" > /dev/null } @@ -16,7 +21,19 @@ function popd () { command popd > /dev/null } -function create_fake_website_directory() { +function error_exit () { + echo "${red}################################################################################${reset}" + loge "${0} FAILED" + echo "${red}################################################################################${reset}" + } + + trap error_exit ERR + + function create_fake_website_directory () { + if [ ${#} -ne 1 ]; then + loge "INCORRECT NUMBER OF ARGS FOR ${FUNCNAME[0]}" + fi + local stripped_binaries_and_from_where="${1}" fdb_binaries=( 'fdbbackup' 'fdbcli' 'fdbserver' 'fdbmonitor' ) logg "PREPARING WEBSITE" website_directory="${script_dir}/website" @@ -112,7 +129,7 @@ function create_fake_website_directory() { fdb_website="file:///tmp/website" } -function compile_ycsb() { +function compile_ycsb () { logg "COMPILING YCSB" if [ "${use_development_java_bindings}" == "true" ]; then logg "INSTALL JAVA BINDINGS" @@ -150,7 +167,13 @@ function compile_ycsb() { popd || exit 128 } -function build_and_push_images(){ +function build_and_push_images () { + if [ ${#} -ne 3 ]; then + loge "INCORRECT NUMBER OF ARGS FOR ${FUNCNAME[0]}" + fi + local dockerfile_name="${1}" + local use_development_java_bindings="${2}" + local push_docker_images="${3}" declare -a tags_to_push=() for image in "${image_list[@]}"; do logg "BUILDING ${image}" @@ -237,11 +260,6 @@ image_list=( ) registry="" tag_base="foundationdb/" -# THESE CONTROL THE PATH OF FUNCTIONS THAT ARE CALLED BELOW -stripped_binaries_and_from_where="stripped_local" # MUST BE ONE OF ( "unstripped_artifactory" "stripped_artifactory" "unstripped_local" "stripped_local" ) -dockerfile_name="Dockerfile" -use_development_java_bindings="false" -push_docker_images="false" if [ -n "${OKTETO_NAMESPACE+x}" ]; then logg "RUNNING IN OKTETO/AWS" @@ -258,20 +276,24 @@ if [ -n "${OKTETO_NAMESPACE+x}" ]; then else tag_postfix="${OKTETO_NAME:-dev}" fi - stripped_binaries_and_from_where="unstripped_local" # MUST BE ONE OF ( "unstripped_artifactory" "stripped_artifactory" "unstripped_local" "stripped_local" ) - dockerfile_name="Dockerfile.eks" - use_development_java_bindings="true" - push_docker_images="true" + + # build regular images + create_fake_website_directory stripped_local + build_and_push_images Dockerfile true true + + # build debug images + create_fake_website_directory unstripped_local + build_and_push_images Dockerfile.eks true true else echo "Dear ${USER}, you probably need to edit this file before running it. " echo "${0} has a very narrow set of situations where it will be successful," echo "or even useful, when executed unedited" exit 1 + # this set of options will creat standard images from a local build + # create_fake_website_directory stripped_local + # build_and_push_images Dockerfile false false fi -create_fake_website_directory -build_and_push_images - echo "${blue}################################################################################${reset}" logg "COMPLETED ${0}" echo "${blue}################################################################################${reset}" From c3e94e2478797b4c4236131d4202fb68e67e143c Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 25 Aug 2022 13:47:51 -0700 Subject: [PATCH 50/56] Fix missing localities for fdbserver The localities are stored in ServerDBInfo for calculating distances to other processes. The localities are not set when creating ServerDBInfo, thus any distances calculated before UpdateServerDBInfoRequest will be wrong. This PR fixes this issue, thus preventing unnecessary cross DC calls, especially for index prefetching on the storage servers. --- fdbserver/worker.actor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 4b720a54c01..94ae374f07a 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -2917,7 +2917,9 @@ ACTOR Future fdbd(Reference connRecord, auto ci = makeReference>>(); auto asyncPriorityInfo = makeReference>(getCCPriorityInfo(fitnessFilePath, processClass)); - auto dbInfo = makeReference>(); + auto serverDBInfo = ServerDBInfo(); + serverDBInfo.myLocality = localities; + auto dbInfo = makeReference>(serverDBInfo); actors.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo")); From 432bc89ce4f7d6d80b65bfe4abe7ccfd4d46731f Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 25 Aug 2022 14:11:37 -0700 Subject: [PATCH 51/56] Add more traces for load balancing --- fdbrpc/LoadBalance.actor.h | 32 ++++++++++++++++++++++++++++++++ fdbrpc/MultiInterface.h | 1 + fdbserver/worker.actor.cpp | 1 + 3 files changed, 34 insertions(+) diff --git a/fdbrpc/LoadBalance.actor.h b/fdbrpc/LoadBalance.actor.h index 76326c48a4e..6388f81bed3 100644 --- a/fdbrpc/LoadBalance.actor.h +++ b/fdbrpc/LoadBalance.actor.h @@ -486,6 +486,11 @@ Future loadBalance( // server count is within "LOAD_BALANCE_MAX_BAD_OPTIONS". We // do not need to consider any remote servers. break; + } else if (badServers == alternatives->countBest() && i == badServers) { + TraceEvent("AllLocalAlternativesFailed") + .detail("Alternatives", alternatives->description()) + .detail("Total", alternatives->size()) + .detail("Best", alternatives->countBest()); } RequestStream const* thisStream = &alternatives->get(i, channel); @@ -587,6 +592,7 @@ Future loadBalance( // nextAlt. This logic matters only if model == nullptr. Otherwise, the // bestAlt and nextAlt have been decided. state RequestStream const* stream = nullptr; + state LBDistance::Type distance; for (int alternativeNum = 0; alternativeNum < alternatives->size(); alternativeNum++) { int useAlt = nextAlt; if (nextAlt == startAlt) @@ -595,6 +601,7 @@ Future loadBalance( useAlt = (nextAlt + alternatives->size() - 1) % alternatives->size(); stream = &alternatives->get(useAlt, channel); + distance = alternatives->getDistance(useAlt); if (!IFailureMonitor::failureMonitor().getState(stream->getEndpoint()).failed && (!firstRequestEndpoint.present() || stream->getEndpoint().token.first() != firstRequestEndpoint.get())) break; @@ -602,6 +609,7 @@ Future loadBalance( if (nextAlt == startAlt) triedAllOptions = TriedAllOptions::True; stream = nullptr; + distance = LBDistance::DISTANT; } if (!stream && !firstRequestData.isValid()) { @@ -637,6 +645,18 @@ Future loadBalance( firstRequestEndpoint = Optional(); } else if (firstRequestData.isValid()) { // Issue a second request, the first one is taking a long time. + if (distance == LBDistance::DISTANT) { + TraceEvent("LBDistant2nd") + .suppressFor(0.1) + .detail("Distance", (int)distance) + .detail("BackOff", backoff) + .detail("TriedAllOptions", triedAllOptions) + .detail("Alternatives", alternatives->description()) + .detail("Token", stream->getEndpoint().token) + .detail("Total", alternatives->size()) + .detail("Best", alternatives->countBest()) + .detail("Attempts", numAttempts); + } secondRequestData.startRequest(backoff, triedAllOptions, stream, request, model, alternatives, channel); state bool firstFinished = false; @@ -666,6 +686,18 @@ Future loadBalance( } } else { // Issue a request, if it takes too long to get a reply, go around the loop + if (distance == LBDistance::DISTANT) { + TraceEvent("LBDistant") + .suppressFor(0.1) + .detail("Distance", (int)distance) + .detail("BackOff", backoff) + .detail("TriedAllOptions", triedAllOptions) + .detail("Alternatives", alternatives->description()) + .detail("Token", stream->getEndpoint().token) + .detail("Total", alternatives->size()) + .detail("Best", alternatives->countBest()) + .detail("Attempts", numAttempts); + } firstRequestData.startRequest(backoff, triedAllOptions, stream, request, model, alternatives, channel); firstRequestEndpoint = stream->getEndpoint().token.first(); diff --git a/fdbrpc/MultiInterface.h b/fdbrpc/MultiInterface.h index 4f15dbf0873..85fa195206b 100644 --- a/fdbrpc/MultiInterface.h +++ b/fdbrpc/MultiInterface.h @@ -226,6 +226,7 @@ class MultiInterface> : public ReferenceCountedinterf; } + LBDistance::Type getDistance(int index) const { return (LBDistance::Type)alternatives[index]->distance; } UID getId(int index) const { return alternatives[index]->interf.id(); } bool hasInterface(UID id) const { for (const auto& ref : alternatives) { diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 94ae374f07a..301a9ed622b 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -2920,6 +2920,7 @@ ACTOR Future fdbd(Reference connRecord, auto serverDBInfo = ServerDBInfo(); serverDBInfo.myLocality = localities; auto dbInfo = makeReference>(serverDBInfo); + TraceEvent("MyLocality").detail("Locality", dbInfo->get().myLocality.toString()); actors.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo")); From f99e20b6c38330f4a7bb56b645362138a588904a Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 25 Aug 2022 17:17:16 -0700 Subject: [PATCH 52/56] Suppress AllLocalAlternativesFailed events --- fdbrpc/LoadBalance.actor.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fdbrpc/LoadBalance.actor.h b/fdbrpc/LoadBalance.actor.h index 6388f81bed3..b25a1ea73e4 100644 --- a/fdbrpc/LoadBalance.actor.h +++ b/fdbrpc/LoadBalance.actor.h @@ -488,6 +488,7 @@ Future loadBalance( break; } else if (badServers == alternatives->countBest() && i == badServers) { TraceEvent("AllLocalAlternativesFailed") + .suppressFor(1.0) .detail("Alternatives", alternatives->description()) .detail("Total", alternatives->size()) .detail("Best", alternatives->countBest()); From 53d37cb384d259d17b0d95e8d94271ef68698771 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 30 Aug 2022 13:40:51 -0700 Subject: [PATCH 53/56] Fix a race that check happened before last configure command By adding a delay before start checking. --- fdbserver/workloads/ConfigureDatabase.actor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/fdbserver/workloads/ConfigureDatabase.actor.cpp b/fdbserver/workloads/ConfigureDatabase.actor.cpp index d648b1d097b..e04ce4c8dc6 100644 --- a/fdbserver/workloads/ConfigureDatabase.actor.cpp +++ b/fdbserver/workloads/ConfigureDatabase.actor.cpp @@ -284,6 +284,7 @@ struct ConfigureDatabaseWorkload : TestWorkload { } ACTOR Future _check(ConfigureDatabaseWorkload* self, Database cx) { + wait(delay(30.0)); // only storage_migration_type=gradual && perpetual_storage_wiggle=1 need this check because in QuietDatabase // perpetual wiggle will be forced to close For other cases, later ConsistencyCheck will check KV store type // there From 3636907fb3d0fb4cb7b336b9c7357dfd94142199 Mon Sep 17 00:00:00 2001 From: FoundationDB CI Date: Wed, 31 Aug 2022 02:04:22 +0000 Subject: [PATCH 54/56] Add 7.1.20, 7.1.21 release notes --- .../source/release-notes/release-notes-710.rst | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/documentation/sphinx/source/release-notes/release-notes-710.rst b/documentation/sphinx/source/release-notes/release-notes-710.rst index 4854230c1d0..05a33625e8e 100644 --- a/documentation/sphinx/source/release-notes/release-notes-710.rst +++ b/documentation/sphinx/source/release-notes/release-notes-710.rst @@ -1,9 +1,18 @@ -.. _release-notes: - ############# Release Notes ############# +7.1.21 +====== +* Same as 7.1.20 release with AVX enabled. + +7.1.20 +====== +* Released with AVX disabled. +* Fixed missing localities for fdbserver that can cause cross DC calls among storage servers. `(PR #7995) `_ +* Removed extremely spammy trace event in FetchKeys and fixed transaction_profiling_analyzer.py. `(PR #7934) `_ +* Fixed bugs when GRV proxy returns an error. `(PR #7860) `_ + 7.1.19 ====== * Same as 7.1.18 release with AVX enabled. From d0f15fe64b8e01ecb091deb590a90d93d361f44c Mon Sep 17 00:00:00 2001 From: FoundationDB CI Date: Wed, 31 Aug 2022 02:06:21 +0000 Subject: [PATCH 55/56] disable AVX for 7.1.20 release --- cmake/ConfigureCompiler.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 398dfe5fb61..cbd7935e464 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -253,7 +253,7 @@ else() set(USE_AVX512F OFF) endif() endif() - set(USE_AVX ON CACHE BOOL "Enable AVX instructions") + set(USE_AVX OFF CACHE BOOL "Enable AVX instructions") if (USE_AVX) if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^x86") add_compile_options(-mavx) From 57ae31f72b3c789f597a7870a6001d39e8331d09 Mon Sep 17 00:00:00 2001 From: FoundationDB CI Date: Wed, 31 Aug 2022 02:07:04 +0000 Subject: [PATCH 56/56] enable AVX and update version for 7.1.21 release --- CMakeLists.txt | 2 +- cmake/ConfigureCompiler.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 740fdded801..df70998a624 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ else() endif() project(foundationdb - VERSION 7.1.20 + VERSION 7.1.21 DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions." HOMEPAGE_URL "http://www.foundationdb.org/" LANGUAGES C CXX ASM) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index cbd7935e464..398dfe5fb61 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -253,7 +253,7 @@ else() set(USE_AVX512F OFF) endif() endif() - set(USE_AVX OFF CACHE BOOL "Enable AVX instructions") + set(USE_AVX ON CACHE BOOL "Enable AVX instructions") if (USE_AVX) if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^x86") add_compile_options(-mavx)