Skip to content
This repository has been archived by the owner on Sep 26, 2019. It is now read-only.

RocksDB Statistics in Metrics #1169

Merged
merged 19 commits into from
Apr 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@ public enum MetricCategory {
NETWORK("network"),
PEERS("peers"),
PROCESS("process", false),
ROCKSDB("rocksdb"),
KVSTORE_ROCKSDB("rocksdb"),
KVSTORE_ROCKSDB_STATS("rocksdb", false),
RPC("rpc"),
SYNCHRONIZER("synchronizer"),
TRANSACTION_POOL("transaction_pool");

// Why not BIG_QUEUE and ROCKSDB? They hurt performance under load.
public static final Set<MetricCategory> DEFAULT_METRIC_CATEGORIES =
EnumSet.complementOf(EnumSet.of(BIG_QUEUE, ROCKSDB));
EnumSet.complementOf(EnumSet.of(BIG_QUEUE, KVSTORE_ROCKSDB, KVSTORE_ROCKSDB_STATS));

private final String name;
private final boolean pantheonSpecific;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public LabelledMetric<tech.pegasys.pantheon.metrics.Counter> createLabelledCount
(k) -> {
if (enabledCategories.contains(category)) {
final Counter counter = Counter.build(metricName, help).labelNames(labelNames).create();
addCollector(category, counter);
addCollectorUnchecked(category, counter);
return new PrometheusCounter(counter);
} else {
return NoOpMetricsSystem.getCounterLabelledMetric(labelNames.length);
Expand Down Expand Up @@ -125,7 +125,7 @@ public LabelledMetric<OperationTimer> createLabelledTimer(
.quantile(1.0, 0)
.labelNames(labelNames)
.create();
addCollector(category, summary);
addCollectorUnchecked(category, summary);
return new PrometheusTimer(summary);
} else {
return NoOpMetricsSystem.getOperationTimerLabelledMetric(labelNames.length);
Expand All @@ -142,11 +142,17 @@ public void createGauge(
final String metricName = convertToPrometheusName(category, name);
if (enabledCategories.contains(category)) {
final Collector collector = new CurrentValueCollector(metricName, help, valueSupplier);
addCollector(category, collector);
addCollectorUnchecked(category, collector);
}
}

private void addCollector(final MetricCategory category, final Collector metric) {
public void addCollector(final MetricCategory category, final Collector metric) {
if (enabledCategories.contains(category)) {
addCollectorUnchecked(category, metric);
}
}

private void addCollectorUnchecked(final MetricCategory category, final Collector metric) {
metric.register(registry);
collectors
.computeIfAbsent(category, key -> Collections.newSetFromMap(new ConcurrentHashMap<>()))
Expand Down Expand Up @@ -213,7 +219,7 @@ private Observation convertSummarySampleNamesToLabels(
labelValues);
}

private String convertToPrometheusName(final MetricCategory category, final String name) {
public static String convertToPrometheusName(final MetricCategory category, final String name) {
return prometheusPrefix(category) + name;
}

Expand All @@ -222,7 +228,7 @@ private String convertFromPrometheusName(final MetricCategory category, final St
return metricName.startsWith(prefix) ? metricName.substring(prefix.length()) : metricName;
}

private String prometheusPrefix(final MetricCategory category) {
private static String prometheusPrefix(final MetricCategory category) {
return category.isPantheonSpecific()
? PANTHEON_PREFIX + category.getName() + "_"
: category.getName() + "_";
Expand Down
6 changes: 4 additions & 2 deletions services/kvstore/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ jar {

dependencies {
api project(':util')

implementation project(':metrics')
implementation project(':services:util')

implementation 'org.apache.logging.log4j:log4j-api'
implementation 'com.google.guava:guava'
implementation 'org.rocksdb:rocksdbjni'
implementation 'info.picocli:picocli'
implementation 'io.prometheus:simpleclient'
implementation 'org.apache.logging.log4j:log4j-api'
implementation 'org.rocksdb:rocksdbjni'

runtime 'org.apache.logging.log4j:log4j-core'

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
/*
* Copyright 2019 ConsenSys AG.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/
package tech.pegasys.pantheon.services.kvstore;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about moving this to: tech.pegasys.pantheon.metrics.rocksdb. We have a similar paradigm for vertx utilities in tech.pegasys.pantheon.metrics.vertx.


import static tech.pegasys.pantheon.metrics.MetricCategory.KVSTORE_ROCKSDB_STATS;

import tech.pegasys.pantheon.metrics.prometheus.PrometheusMetricsSystem;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import io.prometheus.client.Collector;
import org.rocksdb.HistogramData;
import org.rocksdb.HistogramType;
import org.rocksdb.Statistics;
import org.rocksdb.TickerType;

class RocksDBStats {

static final List<String> LABELS = Collections.singletonList("quantile");
static final List<String> LABEL_50 = Collections.singletonList("0.5");
static final List<String> LABEL_95 = Collections.singletonList("0.95");
static final List<String> LABEL_99 = Collections.singletonList("0.99");

// Tickers - RocksDB equivalent of counters
static final TickerType[] TICKERS = {
TickerType.BLOCK_CACHE_ADD,
TickerType.BLOCK_CACHE_HIT,
TickerType.BLOCK_CACHE_ADD_FAILURES,
TickerType.BLOCK_CACHE_INDEX_MISS,
TickerType.BLOCK_CACHE_INDEX_HIT,
TickerType.BLOCK_CACHE_INDEX_ADD,
TickerType.BLOCK_CACHE_INDEX_BYTES_INSERT,
TickerType.BLOCK_CACHE_INDEX_BYTES_EVICT,
TickerType.BLOCK_CACHE_FILTER_MISS,
TickerType.BLOCK_CACHE_FILTER_HIT,
TickerType.BLOCK_CACHE_FILTER_ADD,
TickerType.BLOCK_CACHE_FILTER_BYTES_INSERT,
TickerType.BLOCK_CACHE_FILTER_BYTES_EVICT,
TickerType.BLOCK_CACHE_DATA_MISS,
TickerType.BLOCK_CACHE_DATA_HIT,
TickerType.BLOCK_CACHE_DATA_ADD,
TickerType.BLOCK_CACHE_DATA_BYTES_INSERT,
TickerType.BLOCK_CACHE_BYTES_READ,
TickerType.BLOCK_CACHE_BYTES_WRITE,
TickerType.BLOOM_FILTER_USEFUL,
TickerType.PERSISTENT_CACHE_HIT,
TickerType.PERSISTENT_CACHE_MISS,
TickerType.SIM_BLOCK_CACHE_HIT,
TickerType.SIM_BLOCK_CACHE_MISS,
TickerType.MEMTABLE_HIT,
TickerType.MEMTABLE_MISS,
TickerType.GET_HIT_L0,
TickerType.GET_HIT_L1,
TickerType.GET_HIT_L2_AND_UP,
TickerType.COMPACTION_KEY_DROP_NEWER_ENTRY,
TickerType.COMPACTION_KEY_DROP_OBSOLETE,
TickerType.COMPACTION_KEY_DROP_RANGE_DEL,
TickerType.COMPACTION_KEY_DROP_USER,
TickerType.COMPACTION_RANGE_DEL_DROP_OBSOLETE,
TickerType.NUMBER_KEYS_WRITTEN,
TickerType.NUMBER_KEYS_READ,
TickerType.NUMBER_KEYS_UPDATED,
TickerType.BYTES_WRITTEN,
TickerType.BYTES_READ,
TickerType.NUMBER_DB_SEEK,
TickerType.NUMBER_DB_NEXT,
TickerType.NUMBER_DB_PREV,
TickerType.NUMBER_DB_SEEK_FOUND,
TickerType.NUMBER_DB_NEXT_FOUND,
TickerType.NUMBER_DB_PREV_FOUND,
TickerType.ITER_BYTES_READ,
TickerType.NO_FILE_CLOSES,
TickerType.NO_FILE_OPENS,
TickerType.NO_FILE_ERRORS,
// TickerType.STALL_L0_SLOWDOWN_MICROS,
// TickerType.STALL_MEMTABLE_COMPACTION_MICROS,
// TickerType.STALL_L0_NUM_FILES_MICROS,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume we want to keep these here for visibility of all of the options?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, to make it clear their absence is intentional.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(optional) Might be worth an explanatory comment :)

TickerType.STALL_MICROS,
TickerType.DB_MUTEX_WAIT_MICROS,
TickerType.RATE_LIMIT_DELAY_MILLIS,
TickerType.NO_ITERATORS,
TickerType.NUMBER_MULTIGET_BYTES_READ,
TickerType.NUMBER_MULTIGET_KEYS_READ,
TickerType.NUMBER_MULTIGET_CALLS,
TickerType.NUMBER_FILTERED_DELETES,
TickerType.NUMBER_MERGE_FAILURES,
TickerType.BLOOM_FILTER_PREFIX_CHECKED,
TickerType.BLOOM_FILTER_PREFIX_USEFUL,
TickerType.NUMBER_OF_RESEEKS_IN_ITERATION,
TickerType.GET_UPDATES_SINCE_CALLS,
TickerType.BLOCK_CACHE_COMPRESSED_MISS,
TickerType.BLOCK_CACHE_COMPRESSED_HIT,
TickerType.BLOCK_CACHE_COMPRESSED_ADD,
TickerType.BLOCK_CACHE_COMPRESSED_ADD_FAILURES,
TickerType.WAL_FILE_SYNCED,
TickerType.WAL_FILE_BYTES,
TickerType.WRITE_DONE_BY_SELF,
TickerType.WRITE_DONE_BY_OTHER,
TickerType.WRITE_TIMEDOUT,
TickerType.WRITE_WITH_WAL,
TickerType.COMPACT_READ_BYTES,
TickerType.COMPACT_WRITE_BYTES,
TickerType.FLUSH_WRITE_BYTES,
TickerType.NUMBER_DIRECT_LOAD_TABLE_PROPERTIES,
TickerType.NUMBER_SUPERVERSION_ACQUIRES,
TickerType.NUMBER_SUPERVERSION_RELEASES,
TickerType.NUMBER_SUPERVERSION_CLEANUPS,
TickerType.NUMBER_BLOCK_COMPRESSED,
TickerType.NUMBER_BLOCK_DECOMPRESSED,
TickerType.NUMBER_BLOCK_NOT_COMPRESSED,
TickerType.MERGE_OPERATION_TOTAL_TIME,
TickerType.FILTER_OPERATION_TOTAL_TIME,
TickerType.ROW_CACHE_HIT,
TickerType.ROW_CACHE_MISS,
TickerType.READ_AMP_ESTIMATE_USEFUL_BYTES,
TickerType.READ_AMP_TOTAL_READ_BYTES,
TickerType.NUMBER_RATE_LIMITER_DRAINS,
TickerType.NUMBER_ITER_SKIP,
TickerType.NUMBER_MULTIGET_KEYS_FOUND,
};

// Histograms - treated as prometheus summaries
static final HistogramType[] HISTOGRAMS = {
HistogramType.DB_GET,
HistogramType.DB_WRITE,
HistogramType.COMPACTION_TIME,
HistogramType.SUBCOMPACTION_SETUP_TIME,
HistogramType.TABLE_SYNC_MICROS,
HistogramType.COMPACTION_OUTFILE_SYNC_MICROS,
HistogramType.WAL_FILE_SYNC_MICROS,
HistogramType.MANIFEST_FILE_SYNC_MICROS,
HistogramType.TABLE_OPEN_IO_MICROS,
HistogramType.DB_MULTIGET,
HistogramType.READ_BLOCK_COMPACTION_MICROS,
HistogramType.READ_BLOCK_GET_MICROS,
HistogramType.WRITE_RAW_BLOCK_MICROS,
HistogramType.STALL_L0_SLOWDOWN_COUNT,
HistogramType.STALL_MEMTABLE_COMPACTION_COUNT,
HistogramType.STALL_L0_NUM_FILES_COUNT,
HistogramType.HARD_RATE_LIMIT_DELAY_COUNT,
HistogramType.SOFT_RATE_LIMIT_DELAY_COUNT,
HistogramType.NUM_FILES_IN_SINGLE_COMPACTION,
HistogramType.DB_SEEK,
HistogramType.WRITE_STALL,
HistogramType.SST_READ_MICROS,
HistogramType.NUM_SUBCOMPACTIONS_SCHEDULED,
HistogramType.BYTES_PER_READ,
HistogramType.BYTES_PER_WRITE,
HistogramType.BYTES_PER_MULTIGET,
HistogramType.BYTES_COMPRESSED,
HistogramType.BYTES_DECOMPRESSED,
HistogramType.COMPRESSION_TIMES_NANOS,
HistogramType.DECOMPRESSION_TIMES_NANOS,
HistogramType.READ_NUM_MERGE_OPERANDS,
};

static void registerRocksDBMetrics(
final Statistics stats, final PrometheusMetricsSystem metricsSystem) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you move this to the metrics package and inject MetricsCategory, we can reuse this elsewhere. For example, in the RocksDbTaskQueue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would give the metrics package a dependency on RocksDB, and then every package that uses metrics would drag that dependency along as deadweight.

Copy link
Contributor

@mbaxter mbaxter Apr 11, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, thats a good point. And that's the case now with vertx. I guess the "right" thing to do would be to have independent vertxmetrics and rocksdbmetrics packages modules.


for (final TickerType ticker : TICKERS) {
final String promCounterName = ticker.name().toLowerCase();
metricsSystem.createLongGauge(
KVSTORE_ROCKSDB_STATS,
promCounterName,
"RocksDB reported statistics for " + ticker.name(),
() -> stats.getTickerCount(ticker));
}

for (final HistogramType histogram : HISTOGRAMS) {
metricsSystem.addCollector(KVSTORE_ROCKSDB_STATS, histogramToCollector(stats, histogram));
}
}

private static Collector histogramToCollector(
final Statistics stats, final HistogramType histogram) {
return new Collector() {
final String metricName =
PrometheusMetricsSystem.convertToPrometheusName(
KVSTORE_ROCKSDB_STATS, histogram.name().toLowerCase());

@Override
public List<MetricFamilySamples> collect() {
final HistogramData data = stats.getHistogramData(histogram);
return Collections.singletonList(
new MetricFamilySamples(
metricName,
Type.SUMMARY,
"RocksDB histogram for " + metricName,
Arrays.asList(
new MetricFamilySamples.Sample(metricName, LABELS, LABEL_50, data.getMedian()),
new MetricFamilySamples.Sample(
metricName, LABELS, LABEL_95, data.getPercentile95()),
new MetricFamilySamples.Sample(
metricName, LABELS, LABEL_99, data.getPercentile99()))));
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import tech.pegasys.pantheon.metrics.MetricCategory;
import tech.pegasys.pantheon.metrics.MetricsSystem;
import tech.pegasys.pantheon.metrics.OperationTimer;
import tech.pegasys.pantheon.metrics.prometheus.PrometheusMetricsSystem;
import tech.pegasys.pantheon.services.util.RocksDbUtil;
import tech.pegasys.pantheon.util.bytes.BytesValue;

Expand All @@ -27,6 +28,7 @@
import org.apache.logging.log4j.Logger;
import org.rocksdb.Options;
import org.rocksdb.RocksDBException;
import org.rocksdb.Statistics;
import org.rocksdb.TransactionDB;
import org.rocksdb.TransactionDBOptions;
import org.rocksdb.WriteOptions;
Expand All @@ -45,6 +47,7 @@ public class RocksDbKeyValueStorage implements KeyValueStorage, Closeable {
private final OperationTimer writeLatency;
private final OperationTimer commitLatency;
private final Counter rollbackCount;
private final Statistics stats;

public static KeyValueStorage create(
final RocksDbConfiguration rocksDbConfiguration, final MetricsSystem metricsSystem)
Expand All @@ -56,33 +59,44 @@ private RocksDbKeyValueStorage(
final RocksDbConfiguration rocksDbConfiguration, final MetricsSystem metricsSystem) {
RocksDbUtil.loadNativeLibrary();
try {

stats = new Statistics();
options =
new Options()
.setCreateIfMissing(true)
.setMaxOpenFiles(rocksDbConfiguration.getMaxOpenFiles())
.setTableFormatConfig(rocksDbConfiguration.getBlockBasedTableConfig());
.setTableFormatConfig(rocksDbConfiguration.getBlockBasedTableConfig())
.setStatistics(stats);

txOptions = new TransactionDBOptions();
db = TransactionDB.open(options, txOptions, rocksDbConfiguration.getDatabaseDir().toString());

readLatency =
metricsSystem.createTimer(
MetricCategory.ROCKSDB, "read_latency_seconds", "Latency for read from RocksDB.");
MetricCategory.KVSTORE_ROCKSDB,
"read_latency_seconds",
"Latency for read from RocksDB.");
removeLatency =
metricsSystem.createTimer(
MetricCategory.ROCKSDB,
MetricCategory.KVSTORE_ROCKSDB,
"remove_latency_seconds",
"Latency of remove requests from RocksDB.");
writeLatency =
metricsSystem.createTimer(
MetricCategory.ROCKSDB, "write_latency_seconds", "Latency for write to RocksDB.");
MetricCategory.KVSTORE_ROCKSDB,
"write_latency_seconds",
"Latency for write to RocksDB.");
commitLatency =
metricsSystem.createTimer(
MetricCategory.ROCKSDB, "commit_latency_seconds", "Latency for commits to RocksDB.");
MetricCategory.KVSTORE_ROCKSDB,
"commit_latency_seconds",
"Latency for commits to RocksDB.");

if (metricsSystem instanceof PrometheusMetricsSystem) {
RocksDBStats.registerRocksDBMetrics(stats, (PrometheusMetricsSystem) metricsSystem);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you wrap all of this new logic up in a utility class in the metrics package? That way we can reuse this for other RocksDB instances as needed. It looks like you could create a RocksDBStats class with methods getStatistics() and registerMetrics(MetricsSystem metrics, MetricsCategory category).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we keeps the statistics object in rocksdb and treat it like a part of the options then we can have RocksDBStats have just static methods.


metricsSystem.createLongGauge(
MetricCategory.ROCKSDB,
MetricCategory.KVSTORE_ROCKSDB,
"rocks_db_table_readers_memory_bytes",
"Estimated memory used for RocksDB index and filter blocks in bytes",
() -> {
Expand All @@ -96,7 +110,7 @@ private RocksDbKeyValueStorage(

rollbackCount =
metricsSystem.createCounter(
MetricCategory.ROCKSDB,
MetricCategory.KVSTORE_ROCKSDB,
"rollback_count",
"Number of RocksDB transactions rolled back.");
} catch (final RocksDBException e) {
Expand Down