diff --git a/api/src/main/java/com/google/common/flogger/LogContext.java b/api/src/main/java/com/google/common/flogger/LogContext.java index 22495b76..93604e1e 100644 --- a/api/src/main/java/com/google/common/flogger/LogContext.java +++ b/api/src/main/java/com/google/common/flogger/LogContext.java @@ -75,6 +75,13 @@ private Key() {} public static final MetadataKey LOG_EVERY_N = MetadataKey.single("ratelimit_count", Integer.class); + /** + * The key associated with a rate limiting counter for "1-in-N" randomly sampled rate limiting. + * The value is set by {@link LoggingApi#onAverageEvery(int)}. + */ + public static final MetadataKey LOG_SAMPLE_EVERY_N = + MetadataKey.single("sampling_count", Integer.class); + /** * The key associated with a rate limiting period for "at most once every N" rate limiting. The * value is set by {@link LoggingApi#atMostEvery(int, TimeUnit)}. @@ -496,11 +503,12 @@ protected final void removeMetadata(MetadataKey key) { *

Basic Responsibilities

* *

This method is responsible for: + * *

    - *
  1. Performing any rate limiting operations specific to the extended API. - *
  2. Updating per log-site information (e.g. for debug metrics). - *
  3. Adding any additional metadata to this context. - *
  4. Returning whether logging should be attempted. + *
  5. Performing any rate limiting operations specific to the extended API. + *
  6. Updating per log-site information (e.g. for debug metrics). + *
  7. Adding any additional metadata to this context. + *
  8. Returning whether logging should be attempted. *
* *

Implementations of this method must always call {@code super.postProcess()} first with the @@ -555,10 +563,10 @@ protected final void removeMetadata(MetadataKey key) { * post-processing, and no rate limiter state will be updated. This is sometimes desirable, but * the API documentation should make it clear to the user as to which behaviour occurs. * - * For example, level selector methods (such as {@code atInfo()}) return the {@code NoOp} API for - * "disabled" log statements, and these have no effect on rate limiter state, and will not update - * the "skipped" count. This is fine because controlling logging via log level selection is not - * conceptually a form of "rate limiting". + *

For example, level selector methods (such as {@code atInfo()}) return the {@code NoOp} API + * for "disabled" log statements, and these have no effect on rate limiter state, and will not + * update the "skipped" count. This is fine because controlling logging via log level selection is + * not conceptually a form of "rate limiting". * *

The default implementation of this method enforces the rate limits as set by {@link * #every(int)} and {@link #atMostEvery(int, TimeUnit)}. @@ -573,14 +581,15 @@ protected boolean postProcess(@NullableDecl LogSiteKey logSiteKey) { if (logSiteKey != null) { // Since the base class postProcess() should be invoked before subclass logic, we can set // the initial status here. Subclasses can combine this with other rate limiter statuses by - // calling updateRateLimiterStatus() before we get back into shouldLog() . - rateLimitStatus = - RateLimitStatus.combine( - CountingRateLimiter.check(metadata, logSiteKey), - DurationRateLimiter.check(metadata, logSiteKey, timestampNanos)); + // calling updateRateLimiterStatus() before we get back into shouldLog(). + RateLimitStatus status = DurationRateLimiter.check(metadata, logSiteKey, timestampNanos); + status = RateLimitStatus.combine(status, CountingRateLimiter.check(metadata, logSiteKey)); + status = RateLimitStatus.combine(status, SamplingRateLimiter.check(metadata, logSiteKey)); + this.rateLimitStatus = status; + // Early exit as soon as we know the log statement is disallowed. A subclass may still do // post processing but should never re-enable the log. - if (rateLimitStatus == RateLimitStatus.DISALLOW) { + if (status == RateLimitStatus.DISALLOW) { return false; } } @@ -834,16 +843,25 @@ public API withStackTrace(StackSize size) { @Override public final API every(int n) { + return everyImpl(Key.LOG_EVERY_N, n, "rate limit"); + } + + @Override + public final API onAverageEvery(int n) { + return everyImpl(Key.LOG_SAMPLE_EVERY_N, n, "sampling"); + } + + private API everyImpl(MetadataKey key, int n, String label) { // See wasForced() for discussion as to why this occurs before argument checking. if (wasForced()) { return api(); } if (n <= 0) { - throw new IllegalArgumentException("rate limit count must be positive"); + throw new IllegalArgumentException(label + " count must be positive"); } // 1-in-1 rate limiting is a no-op. if (n > 1) { - addMetadata(Key.LOG_EVERY_N, n); + addMetadata(key, n); } return api(); } diff --git a/api/src/main/java/com/google/common/flogger/LoggingApi.java b/api/src/main/java/com/google/common/flogger/LoggingApi.java index 591006a6..c334b4d1 100644 --- a/api/src/main/java/com/google/common/flogger/LoggingApi.java +++ b/api/src/main/java/com/google/common/flogger/LoggingApi.java @@ -55,8 +55,9 @@ public interface LoggingApi> { /** * Modifies the current log statement to be emitted at most one-in-N times. The specified count - * must be greater than zero and it is expected, but not required, that it is constant. The first - * invocation of any rate-limited log statement will always be emitted. + * must be greater than zero and it is expected, but not required, that it is constant. In the + * absence of any other rate limiting, this method always allows the first invocation of any log + * statement to be emitted. * *

Notes

* @@ -77,10 +78,36 @@ public interface LoggingApi> { */ API every(int n); + /** + * Modifies the current log statement to be emitted with likelihood 1 in {@code n}. For example, + * inserting {@code onAverageEvery(20)} into a call chain results in approximately 5% as many + * messages being emitted as before. Unlike the other rate-limiting options, there is no + * guarantee about when the first such message will be emitted, though it becomes highly likely as + * the number of calls reaches several times {@code n}. + * + *

Notes

+ * + * If multiple rate limiters are used for a single log statement, that log statement will + * only be emitted once all rate limiters have reached their threshold, and when a log statement + * is emitted all the rate limiters are reset. In particular for {@code onAverageEvery(N)} this + * means that logs may occurs less frequently than one-in-N if other rate limiters are active. + *

+ * When rate limiting is active, a {@code "skipped"} count is added to log statements to indicate + * how many logs were disallowed since the last log statement was emitted. + *

+ * If this method is called multiple times for a single log statement, the last invocation will + * take precedence. + * + * @param n the factor by which to reduce logging frequency; a value of {@code 1} has no effect. + * @throws IllegalArgumentException if {@code n} is not positive. + */ + API onAverageEvery(int n); + /** * Modifies the current log statement to be emitted at most once per specified time period. The * specified duration must not be negative, and it is expected, but not required, that it is - * constant. The first invocation of any rate-limited log statement will always be emitted. + * constant. In the absence of any other rate limiting, this method always allows the first + * invocation of any log statement to be emitted. *

* Note that for performance reasons {@code atMostEvery()} is explicitly not intended to * perform "proper" rate limiting to produce a limited average rate over many samples. @@ -109,7 +136,7 @@ public interface LoggingApi> { *

Granularity

* * Because the implementation of this feature relies on a nanosecond timestamp provided by the - * backend, the actual granularity of the underlying clock used may vary. Thus it is possible to + * backend, the actual granularity of the underlying clock used may vary, and it is possible to * specify a time period smaller than the smallest visible time increment. If this occurs, then * the effective rate limit applied to the log statement will be the smallest available time * increment. For example, if the system clock granularity is 1 millisecond, and a @@ -841,6 +868,11 @@ public final API every(int n) { return noOp(); } + @Override + public final API onAverageEvery(int n) { + return noOp(); + } + @Override public final API atMostEvery(int n, TimeUnit unit) { checkNotNull(unit, "time unit"); diff --git a/api/src/main/java/com/google/common/flogger/SamplingRateLimiter.java b/api/src/main/java/com/google/common/flogger/SamplingRateLimiter.java new file mode 100644 index 00000000..8c976bbe --- /dev/null +++ b/api/src/main/java/com/google/common/flogger/SamplingRateLimiter.java @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2023 The Flogger Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.common.flogger; + +import static com.google.common.flogger.LogContext.Key.LOG_SAMPLE_EVERY_N; +import static com.google.common.flogger.RateLimitStatus.DISALLOW; + +import com.google.common.flogger.backend.Metadata; +import java.util.Random; +import java.util.concurrent.atomic.AtomicInteger; +import org.checkerframework.checker.nullness.compatqual.NullableDecl; + +/** + * Rate limiter to support {@code onAverageEvery(N)} functionality. + * + *

Instances of this class are created for each unique {@link LogSiteKey} for which rate limiting + * via the {@code LOG_SAMPLE_EVERY_N} metadata key is required. This class implements {@code + * RateLimitStatus} as a mechanism for resetting its own state. + * + *

This class is thread safe. + */ +final class SamplingRateLimiter extends RateLimitStatus { + private static final LogSiteMap map = + new LogSiteMap() { + @Override + protected SamplingRateLimiter initialValue() { + return new SamplingRateLimiter(); + } + }; + + @NullableDecl + static RateLimitStatus check(Metadata metadata, LogSiteKey logSiteKey) { + Integer rateLimitCount = metadata.findValue(LOG_SAMPLE_EVERY_N); + if (rateLimitCount == null || rateLimitCount <= 0) { + // Without valid rate limiter specific metadata, this limiter has no effect. + return null; + } + return map.get(logSiteKey, metadata).sampleOneIn(rateLimitCount); + } + + // Even though Random is synchonized, we have to put it in a ThreadLocal to avoid thread + // contention. We cannot use ThreadLocalRandom (yet) due to JDK level. + private static final ThreadLocal random = new ThreadLocal() { + @Override + protected Random initialValue() { + return new Random(); + } + }; + + // Visible for testing. + final AtomicInteger pendingCount = new AtomicInteger(); + + // Visible for testing. + SamplingRateLimiter() {} + + RateLimitStatus sampleOneIn(int rateLimitCount) { + // Always "roll the dice" and adjust the count if necessary (even if we were already + // pending). This means that in the long run we will account for every time we roll a + // zero and the number of logs will end up statistically close to 1-in-N (even if at + // times they can be "bursty" due to the action of other rate limiting mechanisms). + int pending; + if (random.get().nextInt(rateLimitCount) == 0) { + pending = pendingCount.incrementAndGet(); + } else { + pending = pendingCount.get(); + } + return pending > 0 ? this : DISALLOW; + } + + @Override + public void reset() { + pendingCount.decrementAndGet(); + } +} diff --git a/api/src/test/java/com/google/common/flogger/LogContextTest.java b/api/src/test/java/com/google/common/flogger/LogContextTest.java index 0dbdae64..c223a6c9 100644 --- a/api/src/test/java/com/google/common/flogger/LogContextTest.java +++ b/api/src/test/java/com/google/common/flogger/LogContextTest.java @@ -31,6 +31,7 @@ import com.google.common.base.Splitter; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; +import com.google.common.collect.Range; import com.google.common.flogger.DurationRateLimiter.RateLimitPeriod; import com.google.common.flogger.LogContext.Key; import com.google.common.flogger.context.Tags; @@ -216,6 +217,40 @@ public void testEveryN() { backend.assertLogged(2).metadata().containsUniqueEntry(Key.SKIPPED_LOG_COUNT, 4); } + @Test + public void testOnAverageEveryN() { + FakeLoggerBackend backend = new FakeLoggerBackend(); + TestLogger logger = TestLogger.create(backend); + + long startNanos = MILLISECONDS.toNanos(System.currentTimeMillis()); + // Logging occurs randomly 1-in-5 times over 1000 log statements. + for (int millis = 0, count = 0; millis <= 1000; millis += 1) { + long timestampNanos = startNanos + MILLISECONDS.toNanos(millis); + logger.at(INFO, timestampNanos).onAverageEvery(5).log("Count=%d", count++); + } + + // Satisically impossible that we randomly get +/- 100 over 1000 logs. + assertThat(backend.getLoggedCount()).isIn(Range.closed(100, 300)); + backend.assertLogged(0).metadata().containsUniqueEntry(Key.LOG_SAMPLE_EVERY_N, 5); + + // Check the expected count and skipped-count for each log based on the timestamp. + int lastLogIndex = -1; + for (int n = 0; n < backend.getLoggedCount(); n++) { + // The timestamp increases by 1 millisecond each time so we can get the log index from it. + long deltaNanos = backend.getLogged(n).getTimestampNanos() - startNanos; + int logIndex = (int) (deltaNanos / MILLISECONDS.toNanos(1)); + backend.assertLogged(n).hasArguments(logIndex); + // This works even if lastLogIndex == -1. + int skipped = (logIndex - lastLogIndex) - 1; + if (skipped == 0) { + backend.assertLogged(n).metadata().keys().doesNotContain(Key.SKIPPED_LOG_COUNT); + } else { + backend.assertLogged(n).metadata().containsUniqueEntry(Key.SKIPPED_LOG_COUNT, skipped); + } + lastLogIndex = logIndex; + } + } + @Test public void testAtMostEvery() { FakeLoggerBackend backend = new FakeLoggerBackend(); diff --git a/api/src/test/java/com/google/common/flogger/SamplingRateLimiterTest.java b/api/src/test/java/com/google/common/flogger/SamplingRateLimiterTest.java new file mode 100644 index 00000000..37b743ff --- /dev/null +++ b/api/src/test/java/com/google/common/flogger/SamplingRateLimiterTest.java @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2023 The Flogger Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.common.flogger; + +import static com.google.common.flogger.LogContext.Key.LOG_SAMPLE_EVERY_N; +import static com.google.common.truth.Truth.assertThat; + +import com.google.common.collect.Range; +import com.google.common.flogger.backend.Metadata; +import com.google.common.flogger.testing.FakeLogSite; +import com.google.common.flogger.testing.FakeMetadata; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class SamplingRateLimiterTest { + @Test + public void testInvalidCount() { + Metadata metadata = new FakeMetadata().add(LOG_SAMPLE_EVERY_N, 0); + assertThat(SamplingRateLimiter.check(metadata, FakeLogSite.unique())).isNull(); + } + + @Test + public void testPendingCount() { + SamplingRateLimiter limiter = new SamplingRateLimiter(); + // Initially we are not "pending", so disallow logging for an "impossible" sample rate. + assertThat(limiter.pendingCount.get()).isEqualTo(0); + assertThat(limiter.sampleOneIn(Integer.MAX_VALUE)).isEqualTo(RateLimitStatus.DISALLOW); + for (int i = 0; i < 100; i++) { + RateLimitStatus unused = limiter.sampleOneIn(5); + } + // Statistically we should be pending at least once. + int pendingCount = limiter.pendingCount.get(); + assertThat(pendingCount).isGreaterThan(0); + // Now we are pending, we allow logging even for an "impossible" sample rate. + assertThat(limiter.sampleOneIn(Integer.MAX_VALUE)).isNotEqualTo(RateLimitStatus.DISALLOW); + limiter.reset(); + assertThat(limiter.pendingCount.get()).isEqualTo(pendingCount - 1); + } + + @Test + public void testSamplingRate() { + // Chance is less than one-millionth of 1% that this will fail spuriously. + Metadata metadata = new FakeMetadata().add(LOG_SAMPLE_EVERY_N, 2); + assertThat(countNSamples(1000, metadata)).isIn(Range.closed(400, 600)); + + // Expected average is 20 logs out of 1000. Seeing 0 or > 100 is enormously unlikely. + metadata = new FakeMetadata().add(LOG_SAMPLE_EVERY_N, 50); + assertThat(countNSamples(1000, metadata)).isIn(Range.closed(1, 100)); + } + + private static int countNSamples(int n, Metadata metadata) { + LogSite logSite = FakeLogSite.unique(); + int sampled = 0; + while (n-- > 0) { + if (RateLimitStatus.checkStatus( + SamplingRateLimiter.check(metadata, logSite), logSite, metadata) >= 0) { + sampled++; + } + } + return sampled; + } +}