From 2de058f5ff460c5f33e627242fbe4a2308e11cd3 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Mon, 26 Aug 2024 20:46:32 +0200
Subject: [PATCH 01/23] Initial version based on the `leaky-bucket` and
 `concread` crates

---
 Cargo.lock                             | 118 +++++++++++++++----
 Cargo.toml                             |   2 +-
 experiments/rate-limiting/Cargo.toml   |  12 ++
 experiments/rate-limiting/src/main.rs  |   5 +
 experiments/rate-limiting/src/rater.rs | 154 +++++++++++++++++++++++++
 5 files changed, 268 insertions(+), 23 deletions(-)
 create mode 100644 experiments/rate-limiting/Cargo.toml
 create mode 100644 experiments/rate-limiting/src/main.rs
 create mode 100644 experiments/rate-limiting/src/rater.rs

diff --git a/Cargo.lock b/Cargo.lock
index 9339e1d..bd5d57a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -394,6 +394,24 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
 
+[[package]]
+name = "concread"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cba00cef522c2597dfbb0a8d1b0ac8ac2b99714f50cc354cda71da63164da0be"
+dependencies = [
+ "ahash",
+ "arc-swap",
+ "crossbeam-epoch",
+ "crossbeam-queue",
+ "crossbeam-utils",
+ "lru",
+ "smallvec",
+ "sptr",
+ "tokio",
+ "tracing",
+]
+
 [[package]]
 name = "core-foundation"
 version = "0.9.4"
@@ -428,6 +446,15 @@ dependencies = [
  "crossbeam-utils",
 ]
 
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "crossbeam-queue"
 version = "0.3.11"
@@ -962,6 +989,17 @@ version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 
+[[package]]
+name = "leaky-bucket"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a396bb213c2d09ed6c5495fd082c991b6ab39c9daf4fff59e6727f85c73e4c5"
+dependencies = [
+ "parking_lot",
+ "pin-project-lite",
+ "tokio",
+]
+
 [[package]]
 name = "libc"
 version = "0.2.153"
@@ -1015,6 +1053,15 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
 
+[[package]]
+name = "matchers"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
+dependencies = [
+ "regex-automata 0.1.10",
+]
+
 [[package]]
 name = "maud"
 version = "0.26.0"
@@ -1117,13 +1164,14 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "0.8.11"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
+checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec"
 dependencies = [
+ "hermit-abi 0.3.9",
  "libc",
  "wasi",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -1173,16 +1221,6 @@ dependencies = [
  "autocfg",
 ]
 
-[[package]]
-name = "num_cpus"
-version = "1.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
-dependencies = [
- "hermit-abi 0.3.9",
- "libc",
-]
-
 [[package]]
 name = "object"
 version = "0.32.2"
@@ -1711,6 +1749,17 @@ dependencies = [
  "getrandom",
 ]
 
+[[package]]
+name = "rate-limiting"
+version = "0.1.0"
+dependencies = [
+ "concread",
+ "leaky-bucket",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "redox_syscall"
 version = "0.4.1"
@@ -1728,8 +1777,17 @@ checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-automata",
- "regex-syntax",
+ "regex-automata 0.4.6",
+ "regex-syntax 0.8.3",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
+dependencies = [
+ "regex-syntax 0.6.29",
 ]
 
 [[package]]
@@ -1740,9 +1798,15 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-syntax",
+ "regex-syntax 0.8.3",
 ]
 
+[[package]]
+name = "regex-syntax"
+version = "0.6.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
+
 [[package]]
 name = "regex-syntax"
 version = "0.8.3"
@@ -2193,6 +2257,12 @@ version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 
+[[package]]
+name = "sptr"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a"
+
 [[package]]
 name = "static-files-module"
 version = "0.2.0"
@@ -2448,27 +2518,27 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.37.0"
+version = "1.39.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787"
+checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5"
 dependencies = [
  "backtrace",
  "bytes",
  "libc",
  "mio",
- "num_cpus",
+ "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
  "socket2",
  "tokio-macros",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
 name = "tokio-macros"
-version = "2.2.0"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
+checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2624,10 +2694,14 @@ version = "0.3.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
 dependencies = [
+ "matchers",
  "nu-ansi-term",
+ "once_cell",
+ "regex",
  "sharded-slab",
  "smallvec",
  "thread_local",
+ "tracing",
  "tracing-core",
  "tracing-log",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 2b75343..9a2e53b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [workspace]
 resolver = "2"
-members = [
+members = [ "experiments/rate-limiting",
     "source/river",
 ]
 
diff --git a/experiments/rate-limiting/Cargo.toml b/experiments/rate-limiting/Cargo.toml
new file mode 100644
index 0000000..f15b3ea
--- /dev/null
+++ b/experiments/rate-limiting/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "rate-limiting"
+version = "0.1.0"
+edition = "2021"
+publish = false
+
+[dependencies]
+concread = "0.5.3"
+leaky-bucket = "1.1.2"
+tokio = { version = "1.39.3", features = ["full"] }
+tracing = "0.1.40"
+tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
diff --git a/experiments/rate-limiting/src/main.rs b/experiments/rate-limiting/src/main.rs
new file mode 100644
index 0000000..ff4e39f
--- /dev/null
+++ b/experiments/rate-limiting/src/main.rs
@@ -0,0 +1,5 @@
+pub mod rater;
+
+fn main() {
+    println!("Hello, world!");
+}
diff --git a/experiments/rate-limiting/src/rater.rs b/experiments/rate-limiting/src/rater.rs
new file mode 100644
index 0000000..5bc6272
--- /dev/null
+++ b/experiments/rate-limiting/src/rater.rs
@@ -0,0 +1,154 @@
+use std::{sync::Arc, time::Duration};
+
+use concread::arcache::{ARCache, ARCacheBuilder};
+use leaky_bucket::RateLimiter;
+
+pub struct Rater {
+    cache: ARCache<String, Arc<RateLimiter>>,
+    max_tokens_per_bucket: usize,
+    refill_interval_millis: usize,
+    refill_qty: usize,
+}
+
+impl Rater {
+    pub fn new(
+        threads: usize,
+        max_buckets: usize,
+        max_tokens_per_bucket: usize,
+        refill_interval_millis: usize,
+        refill_qty: usize,
+    ) -> Self {
+        let cache = ARCacheBuilder::new()
+            .set_expected_workload(max_buckets, threads.max(1), 1, 1, false)
+            .build()
+            .expect("Creation of rate limiter should not fail");
+
+        Self {
+            cache,
+            max_tokens_per_bucket,
+            refill_interval_millis,
+            refill_qty: refill_qty.min(max_tokens_per_bucket),
+        }
+    }
+
+    pub fn get_limiter(&self, key: String) -> Arc<RateLimiter> {
+        let mut reader = self.cache.read();
+
+        if let Some(find) = reader.get(&key) {
+            // Rate limiter DID exist in the cache
+            tracing::trace!(key, "rate limiting cache hit",);
+            find.clone()
+        } else {
+            let new_limiter = Arc::new(
+                RateLimiter::builder()
+                    .initial(self.max_tokens_per_bucket)
+                    .max(self.max_tokens_per_bucket)
+                    .interval(Duration::from_millis(self.refill_interval_millis as u64))
+                    .refill(self.refill_qty)
+                    .fair(true)
+                    .build(),
+            );
+
+            tracing::debug!(key, "rate limiting cache miss",);
+            reader.insert(key, new_limiter.clone());
+            reader.finish();
+            new_limiter
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use tokio::sync::mpsc::channel;
+
+    use super::*;
+    use std::{ops::Add, time::Instant};
+
+    #[tokio::test]
+    async fn smoke() {
+        let _ = tracing_subscriber::fmt::try_init();
+
+        let rater = Arc::new(Rater::new(2, 5, 3, 10, 1));
+
+        for i in 0..100 {
+            let start = Instant::now();
+            let bucket = rater.get_limiter("bob".to_string());
+            bucket.acquire_owned(1).await;
+            tracing::info!("Success {i} took {:?}!", start.elapsed());
+        }
+    }
+
+    #[tokio::test]
+    async fn concurrent_fewer() {
+        let _ = tracing_subscriber::fmt::try_init();
+
+        let rater = Arc::new(Rater::new(8, 16, 3, 10, 1));
+        let mut handles = vec![];
+
+        for thread in 0..8 {
+            let rater = rater.clone();
+            let hdl = tokio::task::spawn(async move {
+                for i in 1..32 {
+                    let name = match i {
+                        i if i % 15 == 0 => "fizzbuzz".to_string(),
+                        i if i % 3 == 0 => "fizz".to_string(),
+                        i if i % 5 == 0 => "buzz".to_string(),
+                        i => format!("{i}"),
+                    };
+                    let start = Instant::now();
+                    let bucket = rater.get_limiter(name.clone());
+                    bucket.acquire_owned(1).await;
+                    tracing::info!("{thread}:{i}:{name} took {:?}!", start.elapsed());
+                }
+            });
+            handles.push(hdl);
+        }
+
+        for h in handles.into_iter() {
+            h.await.unwrap();
+        }
+    }
+
+    #[tokio::test]
+    async fn concurrent_more() {
+        let _ = tracing_subscriber::fmt::try_init();
+
+        let rater = Arc::new(Rater::new(8, 128, 3, 10, 1));
+        let (htx, mut hrx) = channel(1024);
+        let deadline = Instant::now().add(Duration::from_millis(10));
+
+        for thread in 0..8 {
+            let rater = rater.clone();
+            let htxin = htx.clone();
+            let hdl = tokio::task::spawn(async move {
+                for i in 1..32 {
+                    let name = match i {
+                        i if i % 15 == 0 => "fizzbuzz".to_string(),
+                        i if i % 3 == 0 => "fizz".to_string(),
+                        i if i % 5 == 0 => "buzz".to_string(),
+                        i => format!("{i}"),
+                    };
+                    let rater = rater.clone();
+                    let hdl = tokio::task::spawn(async move {
+                        tokio::time::sleep_until(deadline.into()).await;
+                        let start = Instant::now();
+                        let bucket = rater.get_limiter(name.clone());
+                        let res = tokio::time::timeout(Duration::from_millis(100), bucket.acquire_owned(1)).await;
+                        match res {
+                            Ok(_) => tracing::info!("{thread}:{i}:{name} took {:?}!", start.elapsed()),
+                            Err(_) => tracing::error!("{thread}:{i}:{name} gave up after 100ms!"),
+                        }
+                    });
+                    htxin.send(hdl).await.unwrap();
+                }
+                drop(htxin);
+            });
+            htx.send(hdl).await.unwrap();
+        }
+        drop(htx);
+
+        while let Some(hdl) = hrx.recv().await {
+            hdl.await.unwrap();
+        }
+    }
+}

From f7b1f4005ed391b55784b1a1273e9ba273a76706 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Tue, 27 Aug 2024 11:24:56 +0200
Subject: [PATCH 02/23] Make `Rater` generic over the key kind

---
 experiments/rate-limiting/src/rater.rs | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/experiments/rate-limiting/src/rater.rs b/experiments/rate-limiting/src/rater.rs
index 5bc6272..d379a57 100644
--- a/experiments/rate-limiting/src/rater.rs
+++ b/experiments/rate-limiting/src/rater.rs
@@ -1,16 +1,24 @@
 use std::{sync::Arc, time::Duration};
+use std::hash::Hash;
+use std::fmt::Debug;
 
 use concread::arcache::{ARCache, ARCacheBuilder};
 use leaky_bucket::RateLimiter;
 
-pub struct Rater {
-    cache: ARCache<String, Arc<RateLimiter>>,
+pub struct Rater<Key>
+where
+    Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
+{
+    cache: ARCache<Key, Arc<RateLimiter>>,
     max_tokens_per_bucket: usize,
     refill_interval_millis: usize,
     refill_qty: usize,
 }
 
-impl Rater {
+impl<Key> Rater<Key>
+where
+    Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
+{
     pub fn new(
         threads: usize,
         max_buckets: usize,
@@ -31,12 +39,12 @@ impl Rater {
         }
     }
 
-    pub fn get_limiter(&self, key: String) -> Arc<RateLimiter> {
+    pub fn get_limiter(&self, key: Key) -> Arc<RateLimiter> {
         let mut reader = self.cache.read();
 
         if let Some(find) = reader.get(&key) {
             // Rate limiter DID exist in the cache
-            tracing::trace!(key, "rate limiting cache hit",);
+            tracing::trace!(?key, "rate limiting cache hit",);
             find.clone()
         } else {
             let new_limiter = Arc::new(
@@ -49,7 +57,7 @@ impl Rater {
                     .build(),
             );
 
-            tracing::debug!(key, "rate limiting cache miss",);
+            tracing::debug!(?key, "rate limiting cache miss",);
             reader.insert(key, new_limiter.clone());
             reader.finish();
             new_limiter

From 65107ba0023a25d0dbe6c54da53c1eebf1cc325a Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Tue, 27 Aug 2024 12:40:30 +0200
Subject: [PATCH 03/23] Documentation and a bit of refactoring

---
 experiments/rate-limiting/src/rater.rs | 258 ++++++++++++++++++++-----
 1 file changed, 209 insertions(+), 49 deletions(-)

diff --git a/experiments/rate-limiting/src/rater.rs b/experiments/rate-limiting/src/rater.rs
index d379a57..079eb6a 100644
--- a/experiments/rate-limiting/src/rater.rs
+++ b/experiments/rate-limiting/src/rater.rs
@@ -1,10 +1,75 @@
-use std::{sync::Arc, time::Duration};
-use std::hash::Hash;
-use std::fmt::Debug;
+use std::{fmt::Debug, hash::Hash, sync::Arc, time::Duration};
 
 use concread::arcache::{ARCache, ARCacheBuilder};
 use leaky_bucket::RateLimiter;
 
+/// Configuration for the [`Rater`]
+#[derive(Debug, PartialEq, Clone)]
+pub struct RaterConfig {
+    /// The number of expected concurrent threads - should match the number of
+    /// tokio threadpool workers
+    pub threads: usize,
+    /// The peak number of leaky buckets we aim to have live at once
+    ///
+    /// NOTE: This is not a hard limit of the amount of memory used. See [`ARCacheBuilder`]
+    /// for docs on calculating actual memory usage based on these parameters
+    pub max_buckets: usize,
+    /// The max and initial number of tokens in the leaky bucket - this is the number of
+    /// requests that can go through without any waiting if the bucket is full
+    pub max_tokens_per_bucket: usize,
+    /// The interval between "refills" of the bucket, e.g. the bucket refills `refill_qty`
+    /// every `refill_interval_millis`
+    pub refill_interval_millis: usize,
+    /// The number of tokens added to the bucket every `refill_interval_millis`
+    pub refill_qty: usize,
+}
+
+/// A concurrent rate limiting structure
+///
+/// ## Implementation details and notes
+///
+/// For performance and resource reasons, this provides an *approximation* of exact rate
+/// limiting. Currently, there are a few "false positive" cases that can permit more than
+/// the expected number of actions to occur.
+///
+/// Rater is currently modeled as a Least Recently Used (LRU) cache of leaky buckets mapped
+/// by a key. This is done to provide a bounded quantity of leaky buckets, without requiring
+/// a worker task to "cull" the oldest buckets. Instead, unused buckets will naturally
+/// "fall out" of the cache if they are not used.
+///
+/// ### Too many unique keys at too high of a rate
+///
+/// If there is a very high diversity of Keys provided, it is possible that keys could
+/// be evicted from the cache before they would naturally expire or be refilled. In this
+/// case, Rater will appear to not apply rate limiting, as the evicted bucket will be
+/// replaced with a new, initially full bucket. This can be mitigated by choosing a
+/// bucket storage capacity that is large enough to hold enough buckets to handle the
+/// expected requests per second. e.g. if there is room for 1M buckets, and a bucket would
+/// refill one token every 100ms, then we would expect to be able to handle at least 100K
+/// requests with unique keys per second without evicting the buckets before the bucket
+/// would refill anyway.
+///
+/// ### A burst of previously-unseen keys
+///
+/// If a number of requests appear around the same time for a Key that is not resident
+/// in the cache, it is possible that all worker threads will create a new bucket and
+/// attempt to add their buckets to the cache, though only one will be persisted, and
+/// the others will be lost.
+///
+/// For example if there are N worker threads, and N requests with the same key arrive
+/// at roughly the same time, it is possible that we will create N new leaky buckets,
+/// each that will give one immediately-ready token for the request. However, in the
+/// worst case (N - 1) of these tokens won't "count", as (N - 1) of these buckets
+/// will be thrown away, and not counted in the one bucket that was persisted
+///
+/// This worst case is extremely unlikely, as it would require N requests with the same Key
+/// to arrive in the time window necessary to write to the cache, and for all N requests
+/// to be distributed to N different worker threads that all attempt to find the Key
+/// at the same time.
+///
+/// There is no mitigation for this currently, other than treating the "max tokens per
+/// bucket" as an approximate value, with up to "number of worker threads" of false
+/// positives as an acceptable bound.
 pub struct Rater<Key>
 where
     Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
@@ -19,15 +84,40 @@ impl<Key> Rater<Key>
 where
     Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
 {
-    pub fn new(
-        threads: usize,
-        max_buckets: usize,
-        max_tokens_per_bucket: usize,
-        refill_interval_millis: usize,
-        refill_qty: usize,
-    ) -> Self {
+    /// Create a new rate limiter with the given configuration.
+    ///
+    /// See [`RaterConfig`] for configuration options.
+    pub fn new(config: RaterConfig) -> Self {
+        let RaterConfig {
+            threads,
+            max_buckets,
+            max_tokens_per_bucket,
+            refill_interval_millis,
+            refill_qty,
+        } = config;
         let cache = ARCacheBuilder::new()
-            .set_expected_workload(max_buckets, threads.max(1), 1, 1, false)
+            .set_expected_workload(
+                // total
+                //
+                // total number of items you want to have in memory
+                max_buckets,
+                // threads
+                //
+                // the number of read threads you expect concurrently (AT LEAST 1)
+                threads.max(1),
+                // ex_ro_miss
+                //
+                // the expected average number of cache misses per read operation
+                1,
+                // ex_rw_miss
+                //
+                // the expected average number of writes or write cache misses per operation
+                1,
+                // read_cache
+                //
+                // ?
+                false,
+            )
             .build()
             .expect("Creation of rate limiter should not fail");
 
@@ -39,30 +129,78 @@ where
         }
     }
 
-    pub fn get_limiter(&self, key: Key) -> Arc<RateLimiter> {
+    /// Obtain a ticket for the given Key.
+    ///
+    /// If the Key does not exist already, it will be created.
+    pub fn get_ticket(&self, key: Key) -> Ticket {
         let mut reader = self.cache.read();
 
         if let Some(find) = reader.get(&key) {
             // Rate limiter DID exist in the cache
             tracing::trace!(?key, "rate limiting cache hit",);
-            find.clone()
+            Ticket {
+                limiter: find.clone(),
+            }
         } else {
-            let new_limiter = Arc::new(
-                RateLimiter::builder()
-                    .initial(self.max_tokens_per_bucket)
-                    .max(self.max_tokens_per_bucket)
-                    .interval(Duration::from_millis(self.refill_interval_millis as u64))
-                    .refill(self.refill_qty)
-                    .fair(true)
-                    .build(),
-            );
-
+            let new_limiter = Arc::new(self.new_rate_limiter());
             tracing::debug!(?key, "rate limiting cache miss",);
             reader.insert(key, new_limiter.clone());
             reader.finish();
-            new_limiter
+            Ticket {
+                limiter: new_limiter,
+            }
         }
     }
+
+    fn new_rate_limiter(&self) -> RateLimiter {
+        RateLimiter::builder()
+            .initial(self.max_tokens_per_bucket)
+            .max(self.max_tokens_per_bucket)
+            .interval(Duration::from_millis(self.refill_interval_millis as u64))
+            .refill(self.refill_qty)
+            .fair(true)
+            .build()
+    }
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum TicketError {
+    BurstLimitExceeded,
+}
+
+/// A claim ticket for the leaky bucket queue
+///
+/// You are expected to call [`Ticket::wait()`] to wait for your turn to perform
+/// the rate limited option.
+#[must_use = "You must call `Ticket::wait()` to wait your turn!"]
+pub struct Ticket {
+    limiter: Arc<RateLimiter>,
+}
+
+impl Ticket {
+    /// Wait for our "turn" granted by the leaky bucket
+    ///
+    /// * If the bucket has a token available, `Ok(())` will be returned immediately
+    /// * If the bucket does not have a token available, this function will yield until
+    ///   a token is refilled
+    /// * If the bucket has too many pending waiters, then `Err(TicketError)` will be
+    ///   returned immediately
+    ///
+    /// NOTE: In the future, we would like to be able to return immediately if there
+    /// are too many pending requests at once, instead of queueing requests that are
+    /// going to end up timing out anyway.
+    ///
+    /// However, this is not supported by the [`leaky-bucket`] crate today, enqueueing
+    /// will always succeed, and we will handle this one layer up by adding a timeout
+    /// to requests.
+    ///
+    /// This should be fixed in the future as a performance optimization, but for now
+    /// we give ourselves the API surface to make this change with minimal fuss in
+    /// in the future.
+    pub async fn wait(self) -> Result<(), TicketError> {
+        self.limiter.acquire_owned(1).await;
+        Ok(())
+    }
 }
 
 #[cfg(test)]
@@ -75,13 +213,20 @@ mod test {
     #[tokio::test]
     async fn smoke() {
         let _ = tracing_subscriber::fmt::try_init();
+        let config = RaterConfig {
+            threads: 2,
+            max_buckets: 5,
+            max_tokens_per_bucket: 3,
+            refill_interval_millis: 10,
+            refill_qty: 1,
+        };
 
-        let rater = Arc::new(Rater::new(2, 5, 3, 10, 1));
+        let rater = Arc::new(Rater::new(config));
 
         for i in 0..100 {
             let start = Instant::now();
-            let bucket = rater.get_limiter("bob".to_string());
-            bucket.acquire_owned(1).await;
+            let bucket = rater.get_ticket("bob".to_string());
+            bucket.wait().await.unwrap();
             tracing::info!("Success {i} took {:?}!", start.elapsed());
         }
     }
@@ -89,23 +234,24 @@ mod test {
     #[tokio::test]
     async fn concurrent_fewer() {
         let _ = tracing_subscriber::fmt::try_init();
-
-        let rater = Arc::new(Rater::new(8, 16, 3, 10, 1));
+        let config = RaterConfig {
+            threads: 8,
+            max_buckets: 16,
+            max_tokens_per_bucket: 3,
+            refill_interval_millis: 10,
+            refill_qty: 1,
+        };
+        let rater = Arc::new(Rater::new(config));
         let mut handles = vec![];
 
         for thread in 0..8 {
             let rater = rater.clone();
             let hdl = tokio::task::spawn(async move {
                 for i in 1..32 {
-                    let name = match i {
-                        i if i % 15 == 0 => "fizzbuzz".to_string(),
-                        i if i % 3 == 0 => "fizz".to_string(),
-                        i if i % 5 == 0 => "buzz".to_string(),
-                        i => format!("{i}"),
-                    };
+                    let name = fizzbuzz(i);
                     let start = Instant::now();
-                    let bucket = rater.get_limiter(name.clone());
-                    bucket.acquire_owned(1).await;
+                    let bucket = rater.get_ticket(name.clone());
+                    bucket.wait().await.unwrap();
                     tracing::info!("{thread}:{i}:{name} took {:?}!", start.elapsed());
                 }
             });
@@ -120,8 +266,14 @@ mod test {
     #[tokio::test]
     async fn concurrent_more() {
         let _ = tracing_subscriber::fmt::try_init();
-
-        let rater = Arc::new(Rater::new(8, 128, 3, 10, 1));
+        let config = RaterConfig {
+            threads: 8,
+            max_buckets: 128,
+            max_tokens_per_bucket: 3,
+            refill_interval_millis: 10,
+            refill_qty: 1,
+        };
+        let rater = Arc::new(Rater::new(config));
         let (htx, mut hrx) = channel(1024);
         let deadline = Instant::now().add(Duration::from_millis(10));
 
@@ -130,21 +282,20 @@ mod test {
             let htxin = htx.clone();
             let hdl = tokio::task::spawn(async move {
                 for i in 1..32 {
-                    let name = match i {
-                        i if i % 15 == 0 => "fizzbuzz".to_string(),
-                        i if i % 3 == 0 => "fizz".to_string(),
-                        i if i % 5 == 0 => "buzz".to_string(),
-                        i => format!("{i}"),
-                    };
+                    let name = fizzbuzz(i);
                     let rater = rater.clone();
                     let hdl = tokio::task::spawn(async move {
                         tokio::time::sleep_until(deadline.into()).await;
                         let start = Instant::now();
-                        let bucket = rater.get_limiter(name.clone());
-                        let res = tokio::time::timeout(Duration::from_millis(100), bucket.acquire_owned(1)).await;
+                        let bucket = rater.get_ticket(name.clone());
+                        let res =
+                            tokio::time::timeout(Duration::from_millis(100), bucket.wait()).await;
                         match res {
-                            Ok(_) => tracing::info!("{thread}:{i}:{name} took {:?}!", start.elapsed()),
-                            Err(_) => tracing::error!("{thread}:{i}:{name} gave up after 100ms!"),
+                            Ok(Ok(())) => {
+                                tracing::info!("{thread}:{i}:{name} took {:?}!", start.elapsed())
+                            }
+                            Ok(Err(_)) => unreachable!(),
+                            Err(_) => tracing::warn!("{thread}:{i}:{name} gave up after 100ms!"),
                         }
                     });
                     htxin.send(hdl).await.unwrap();
@@ -159,4 +310,13 @@ mod test {
             hdl.await.unwrap();
         }
     }
+
+    fn fizzbuzz(i: usize) -> String {
+        match i {
+            i if i % 15 == 0 => "fizzbuzz".to_string(),
+            i if i % 3 == 0 => "fizz".to_string(),
+            i if i % 5 == 0 => "buzz".to_string(),
+            i => format!("{i}"),
+        }
+    }
 }

From f9b74b0c184a244d028c60a150edec7724637954 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Wed, 28 Aug 2024 10:58:59 +0200
Subject: [PATCH 04/23] In progress

---
 Cargo.lock                              |   2 +
 source/river/Cargo.toml                 |  14 +-
 source/river/assets/test-config.kdl     |  35 +++
 source/river/src/config/kdl/mod.rs      |   2 +-
 source/river/src/proxy/mod.rs           |  67 ++++-
 source/river/src/proxy/rate_limiting.rs | 368 ++++++++++++++++++++++++
 6 files changed, 475 insertions(+), 13 deletions(-)
 create mode 100644 source/river/src/proxy/rate_limiting.rs

diff --git a/Cargo.lock b/Cargo.lock
index bd5d57a..3e9278a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1876,9 +1876,11 @@ dependencies = [
  "async-trait",
  "cidr",
  "clap 4.5.4",
+ "concread",
  "futures-util",
  "http 1.1.0",
  "kdl",
+ "leaky-bucket",
  "log",
  "miette",
  "pandora-module-utils",
diff --git a/source/river/Cargo.toml b/source/river/Cargo.toml
index 4eb3f91..f19e321 100644
--- a/source/river/Cargo.toml
+++ b/source/river/Cargo.toml
@@ -24,17 +24,19 @@ rustdoc-args = ["--cfg", "doc_cfg"]
 
 [dependencies]
 async-trait = "0.1.79"
+cidr = "0.2.3"
+concread = "0.5.3"
+futures-util = "0.3.30"
+http = "1.0.0"
+kdl = "4.6.0"
+leaky-bucket = "1.1.2"
 log = "0.4.21"
+miette = { version = "5.10.0", features = ["fancy"] }
 regex = "1.10.4"
+thiserror = "1.0.61"
 tokio = "1.37.0" # TODO: check for implicit feature usage
 toml = "0.8.12"
 tracing = "0.1.40"
-kdl = "4.6.0"
-miette = { version = "5.10.0", features = ["fancy"] }
-thiserror = "1.0.61"
-http = "1.0.0"
-futures-util = "0.3.30"
-cidr = "0.2.3"
 
 [dependencies.static-files-module]
 version = "0.2"
diff --git a/source/river/assets/test-config.kdl b/source/river/assets/test-config.kdl
index c0de1aa..5822da0 100644
--- a/source/river/assets/test-config.kdl
+++ b/source/river/assets/test-config.kdl
@@ -31,6 +31,41 @@ services {
             "0.0.0.0:4443" cert-path="./assets/test.crt" key-path="./assets/test.key" offer-h2=true
         }
 
+        // Apply Rate limiting to this service
+        //
+        // Note that ALL rules are applied, and a request must receive a token from all
+        // applicable rules.
+        //
+        // For example:
+        //
+        // A request to URI `/index.html` from IP 1.2.3.4 will only need to get a token from
+        // the `source-ip` rule.
+        //
+        // A request to URI `/static/style.css` from IP 1.2.3.4 will need to get a token from
+        // BOTH the `source-ip` rule (from the `1.2.3.4` bucket), AND the `uri` rule (from the
+        // `/static/style.css` bucket)
+        rate-limiting {
+            // If a request takes more than 100ms to obtain all necessary tokens, a 429 error
+            // will be returned
+            timeout millis=100
+
+            // This rate limiting rule is based on the source IP address
+            //
+            // * Up to the last 4000 IP addresses will be remembered
+            // * Each IP address can make a burst of 10 requests
+            // * The bucket for each IP will refill at a rate of 1 request per 10 milliseconds
+            rule kind="source-ip" \
+                max-buckets=4000 tokens-per-bucket=10 refill-qty=1 refill-rate-ms=10
+
+            // This rate limiting is based on the specific URI path
+            //
+            // * Up to the last 2000 URI paths will be remembered
+            // * Each URI path can make a burst of 20 requests
+            // * The bucket for each URI will refill at a rate of 5 requests per 1 millisecond
+            rule kind="uri" pattern="static/.*" \
+                max-buckets=2000 tokens-per-bucket=20 refill-qty=5 refill-rate-ms=1
+        }
+
         // Connectors are the "upstream" interfaces that we connect with. We can name as many
         // as we'd like, at least one is required. By default, connectors are distributed
         // round-robin.
diff --git a/source/river/src/config/kdl/mod.rs b/source/river/src/config/kdl/mod.rs
index 2c9c703..b413047 100644
--- a/source/river/src/config/kdl/mod.rs
+++ b/source/river/src/config/kdl/mod.rs
@@ -72,7 +72,7 @@ fn extract_services(
     let service_node = utils::required_child_doc(doc, doc, "services")?;
     let services = utils::wildcard_argless_child_docs(doc, service_node)?;
 
-    let proxy_node_set = HashSet::from(["listeners", "connectors", "path-control"]);
+    let proxy_node_set = HashSet::from(["listeners", "connectors", "path-control", "rate-limiting"]);
     let file_server_node_set = HashSet::from(["listeners", "file-server"]);
 
     let mut proxies = vec![];
diff --git a/source/river/src/proxy/mod.rs b/source/river/src/proxy/mod.rs
index b85a1e4..b0d3edf 100644
--- a/source/river/src/proxy/mod.rs
+++ b/source/river/src/proxy/mod.rs
@@ -4,12 +4,12 @@
 //! this includes creation of HTTP proxy services, as well as Path Control
 //! modifiers.
 
-use std::collections::{BTreeMap, BTreeSet};
+use std::{collections::{BTreeMap, BTreeSet}, time::Duration};
 
 use async_trait::async_trait;
 use futures_util::FutureExt;
 
-use pingora::{server::Server, Error};
+use pingora::{server::Server, Error, ErrorType};
 use pingora_core::{upstreams::peer::HttpPeer, Result};
 use pingora_http::{RequestHeader, ResponseHeader};
 use pingora_load_balancing::{
@@ -20,6 +20,7 @@ use pingora_load_balancing::{
     Backend, Backends, LoadBalancer,
 };
 use pingora_proxy::{ProxyHttp, Session};
+use tokio::task::JoinSet;
 
 use crate::{
     config::internal::{PathControl, ProxyConfig, SelectionKind},
@@ -30,13 +31,19 @@ use crate::{
     },
 };
 
-use self::request_filters::RequestFilterMod;
+use self::{rate_limiting::{RaterInstance, TicketError}, request_filters::RequestFilterMod};
 
+pub mod rate_limiting;
 pub mod request_filters;
 pub mod request_modifiers;
 pub mod request_selector;
 pub mod response_modifiers;
 
+
+pub struct RateLimiters {
+    request_filter_stage: Vec<RaterInstance>,
+}
+
 /// The [RiverProxyService] is intended to capture the behaviors used to extend
 /// the [HttpProxy] functionality by providing a [ProxyHttp] trait implementation.
 ///
@@ -50,6 +57,7 @@ pub struct RiverProxyService<BS: BackendSelection> {
     /// Load Balancer
     pub load_balancer: LoadBalancer<BS>,
     pub request_selector: RequestSelector,
+    pub rate_limiters: RateLimiters,
 }
 
 /// Create a proxy service, with the type parameters chosen based on the config file
@@ -106,6 +114,7 @@ where
                 modifiers,
                 load_balancer: upstreams,
                 request_selector: conf.upstream_options.selector,
+                rate_limiters: RateLimiters { request_filter_stage: vec![] },
             },
             &conf.name,
         );
@@ -168,7 +177,7 @@ impl Modifiers {
                 }
                 other => {
                     tracing::warn!("Unknown request filter: '{other}'");
-                    return Err(Error::new(pingora::ErrorType::Custom("Bad configuration")));
+                    return Err(Error::new(ErrorType::Custom("Bad configuration")));
                 }
             };
             request_filter_mods.push(f);
@@ -186,7 +195,7 @@ impl Modifiers {
                 }
                 other => {
                     tracing::warn!("Unknown upstream request filter: '{other}'");
-                    return Err(Error::new(pingora::ErrorType::Custom("Bad configuration")));
+                    return Err(Error::new(ErrorType::Custom("Bad configuration")));
                 }
             };
             upstream_request_filters.push(f);
@@ -204,7 +213,7 @@ impl Modifiers {
                 }
                 other => {
                     tracing::warn!("Unknown upstream response filter: '{other}'");
-                    return Err(Error::new(pingora::ErrorType::Custom("Bad configuration")));
+                    return Err(Error::new(ErrorType::Custom("Bad configuration")));
                 }
             };
             upstream_response_filters.push(f);
@@ -242,6 +251,52 @@ where
     where
         Self::CTX: Send + Sync,
     {
+        // First: do rate limiting at this stage - quickly check if there are none and skip over
+        // entirely if so
+        if !self.rate_limiters.request_filter_stage.is_empty() {
+            // Use a joinset to capture all applicable rate limiters
+            //
+            // TODO: avoid spawning one task per limiter?
+            let mut set: JoinSet<Result<(), TicketError>> = JoinSet::new();
+            for limiter in self.rate_limiters.request_filter_stage.iter() {
+                let ticket = limiter.get_ticket(session);
+                set.spawn(ticket.wait());
+            }
+
+            // This future returns when either ALL tickets have been obtained, OR
+            // when any ticket returns an error
+            let total_ticket_fut = async move {
+                while let Some(res) = set.join_next().await {
+                    match res {
+                        Ok(Ok(())) => {},
+                        Ok(Err(_e)) => return Err(()),
+                        Err(_) => return Err(()),
+                    }
+                }
+                Ok(())
+            };
+
+            // Cap the waiting time in case all buckets are too full
+            //
+            // TODO: This is a workaround for not having the ability to immediately bail
+            // if buckets are "too full"!
+            //
+            // TODO: make this configurable?
+            let res = tokio::time::timeout(Duration::from_secs(1), total_ticket_fut).await;
+            match res {
+                Ok(Ok(())) => {},
+                Ok(Err(())) => {
+                    // Rate limiter said "bail"
+                    return Err(Error::new_down(ErrorType::CustomCode("rate limit exceeded", 429)))
+                }
+                Err(_) => {
+                    // Timeout occurred
+                    return Err(Error::new_down(ErrorType::CustomCode("rate limit exceeded", 429)))
+                }
+            }
+        }
+
+
         for filter in &self.modifiers.request_filters {
             match filter.request_filter(session, ctx).await {
                 // If Ok true: we're done handling this request
diff --git a/source/river/src/proxy/rate_limiting.rs b/source/river/src/proxy/rate_limiting.rs
new file mode 100644
index 0000000..2de881f
--- /dev/null
+++ b/source/river/src/proxy/rate_limiting.rs
@@ -0,0 +1,368 @@
+use std::{fmt::Debug, hash::Hash, net::IpAddr, sync::Arc, time::Duration};
+
+use concread::arcache::{ARCache, ARCacheBuilder};
+use leaky_bucket::RateLimiter;
+use pandora_module_utils::pingora::SocketAddr;
+use pingora_proxy::Session;
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum RequestKeyKind {
+    SourceIp,
+    DestIp,
+    Uri,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum RequestKey {
+    Source(IpAddr),
+    Dest(IpAddr),
+    Uri(String),
+}
+
+pub struct RaterInstance {
+    pub rater: Rater<RequestKey>,
+    pub kind: RequestKeyKind,
+}
+
+impl RaterInstance {
+    pub fn get_ticket(&self, session: &mut Session) -> Ticket {
+        let key = self.get_key(session);
+        self.rater.get_ticket(key)
+    }
+
+    pub fn get_key(&self, session: &mut Session) -> RequestKey {
+        match self.kind {
+            RequestKeyKind::SourceIp => {
+                let src = session.downstream_session.client_addr().unwrap();
+                let src_ip = match src {
+                    SocketAddr::Inet(addr) => addr.ip(),
+                    SocketAddr::Unix(_) => todo!(),
+                };
+                RequestKey::Source(src_ip)
+            },
+            RequestKeyKind::DestIp => todo!(),
+            RequestKeyKind::Uri => {
+                let uri = session.downstream_session.req_header().uri.to_string();
+                RequestKey::Uri(uri)
+            },
+        }
+    }
+}
+
+/// Configuration for the [`Rater`]
+#[derive(Debug, PartialEq, Clone)]
+pub struct RaterConfig {
+    /// The number of expected concurrent threads - should match the number of
+    /// tokio threadpool workers
+    pub threads: usize,
+    /// The peak number of leaky buckets we aim to have live at once
+    ///
+    /// NOTE: This is not a hard limit of the amount of memory used. See [`ARCacheBuilder`]
+    /// for docs on calculating actual memory usage based on these parameters
+    pub max_buckets: usize,
+    /// The max and initial number of tokens in the leaky bucket - this is the number of
+    /// requests that can go through without any waiting if the bucket is full
+    pub max_tokens_per_bucket: usize,
+    /// The interval between "refills" of the bucket, e.g. the bucket refills `refill_qty`
+    /// every `refill_interval_millis`
+    pub refill_interval_millis: usize,
+    /// The number of tokens added to the bucket every `refill_interval_millis`
+    pub refill_qty: usize,
+}
+
+/// A concurrent rate limiting structure
+///
+/// ## Implementation details and notes
+///
+/// For performance and resource reasons, this provides an *approximation* of exact rate
+/// limiting. Currently, there are a few "false positive" cases that can permit more than
+/// the expected number of actions to occur.
+///
+/// Rater is currently modeled as a Least Recently Used (LRU) cache of leaky buckets mapped
+/// by a key. This is done to provide a bounded quantity of leaky buckets, without requiring
+/// a worker task to "cull" the oldest buckets. Instead, unused buckets will naturally
+/// "fall out" of the cache if they are not used.
+///
+/// ### Too many unique keys at too high of a rate
+///
+/// If there is a very high diversity of Keys provided, it is possible that keys could
+/// be evicted from the cache before they would naturally expire or be refilled. In this
+/// case, Rater will appear to not apply rate limiting, as the evicted bucket will be
+/// replaced with a new, initially full bucket. This can be mitigated by choosing a
+/// bucket storage capacity that is large enough to hold enough buckets to handle the
+/// expected requests per second. e.g. if there is room for 1M buckets, and a bucket would
+/// refill one token every 100ms, then we would expect to be able to handle at least 100K
+/// requests with unique keys per second without evicting the buckets before the bucket
+/// would refill anyway.
+///
+/// ### A burst of previously-unseen keys
+///
+/// If a number of requests appear around the same time for a Key that is not resident
+/// in the cache, it is possible that all worker threads will create a new bucket and
+/// attempt to add their buckets to the cache, though only one will be persisted, and
+/// the others will be lost.
+///
+/// For example if there are N worker threads, and N requests with the same key arrive
+/// at roughly the same time, it is possible that we will create N new leaky buckets,
+/// each that will give one immediately-ready token for the request. However, in the
+/// worst case (N - 1) of these tokens won't "count", as (N - 1) of these buckets
+/// will be thrown away, and not counted in the one bucket that was persisted
+///
+/// This worst case is extremely unlikely, as it would require N requests with the same Key
+/// to arrive in the time window necessary to write to the cache, and for all N requests
+/// to be distributed to N different worker threads that all attempt to find the Key
+/// at the same time.
+///
+/// There is no mitigation for this currently, other than treating the "max tokens per
+/// bucket" as an approximate value, with up to "number of worker threads" of false
+/// positives as an acceptable bound.
+pub struct Rater<Key>
+where
+    Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
+{
+    cache: ARCache<Key, Arc<RateLimiter>>,
+    max_tokens_per_bucket: usize,
+    refill_interval_millis: usize,
+    refill_qty: usize,
+}
+
+impl<Key> Rater<Key>
+where
+    Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
+{
+    /// Create a new rate limiter with the given configuration.
+    ///
+    /// See [`RaterConfig`] for configuration options.
+    pub fn new(config: RaterConfig) -> Self {
+        let RaterConfig {
+            threads,
+            max_buckets,
+            max_tokens_per_bucket,
+            refill_interval_millis,
+            refill_qty,
+        } = config;
+        let cache = ARCacheBuilder::new()
+            .set_expected_workload(
+                // total
+                //
+                // total number of items you want to have in memory
+                max_buckets,
+                // threads
+                //
+                // the number of read threads you expect concurrently (AT LEAST 1)
+                threads.max(1),
+                // ex_ro_miss
+                //
+                // the expected average number of cache misses per read operation
+                1,
+                // ex_rw_miss
+                //
+                // the expected average number of writes or write cache misses per operation
+                1,
+                // read_cache
+                //
+                // ?
+                false,
+            )
+            .build()
+            .expect("Creation of rate limiter should not fail");
+
+        Self {
+            cache,
+            max_tokens_per_bucket,
+            refill_interval_millis,
+            refill_qty: refill_qty.min(max_tokens_per_bucket),
+        }
+    }
+
+    /// Obtain a ticket for the given Key.
+    ///
+    /// If the Key does not exist already, it will be created.
+    pub fn get_ticket(&self, key: Key) -> Ticket {
+        let mut reader = self.cache.read();
+
+        if let Some(find) = reader.get(&key) {
+            // Rate limiter DID exist in the cache
+            tracing::trace!(?key, "rate limiting cache hit",);
+            Ticket {
+                limiter: find.clone(),
+            }
+        } else {
+            let new_limiter = Arc::new(self.new_rate_limiter());
+            tracing::debug!(?key, "rate limiting cache miss",);
+            reader.insert(key, new_limiter.clone());
+            reader.finish();
+            Ticket {
+                limiter: new_limiter,
+            }
+        }
+    }
+
+    fn new_rate_limiter(&self) -> RateLimiter {
+        RateLimiter::builder()
+            .initial(self.max_tokens_per_bucket)
+            .max(self.max_tokens_per_bucket)
+            .interval(Duration::from_millis(self.refill_interval_millis as u64))
+            .refill(self.refill_qty)
+            .fair(true)
+            .build()
+    }
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum TicketError {
+    BurstLimitExceeded,
+}
+
+/// A claim ticket for the leaky bucket queue
+///
+/// You are expected to call [`Ticket::wait()`] to wait for your turn to perform
+/// the rate limited option.
+#[must_use = "You must call `Ticket::wait()` to wait your turn!"]
+pub struct Ticket {
+    limiter: Arc<RateLimiter>,
+}
+
+impl Ticket {
+    /// Wait for our "turn" granted by the leaky bucket
+    ///
+    /// * If the bucket has a token available, `Ok(())` will be returned immediately
+    /// * If the bucket does not have a token available, this function will yield until
+    ///   a token is refilled
+    /// * If the bucket has too many pending waiters, then `Err(TicketError)` will be
+    ///   returned immediately
+    ///
+    /// NOTE: In the future, we would like to be able to return immediately if there
+    /// are too many pending requests at once, instead of queueing requests that are
+    /// going to end up timing out anyway.
+    ///
+    /// However, this is not supported by the [`leaky-bucket`] crate today, enqueueing
+    /// will always succeed, and we will handle this one layer up by adding a timeout
+    /// to requests.
+    ///
+    /// This should be fixed in the future as a performance optimization, but for now
+    /// we give ourselves the API surface to make this change with minimal fuss in
+    /// in the future.
+    pub async fn wait(self) -> Result<(), TicketError> {
+        self.limiter.acquire_owned(1).await;
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use tokio::sync::mpsc::channel;
+
+    use super::*;
+    use std::{ops::Add, time::Instant};
+
+    #[tokio::test]
+    async fn smoke() {
+        let _ = tracing_subscriber::fmt::try_init();
+        let config = RaterConfig {
+            threads: 2,
+            max_buckets: 5,
+            max_tokens_per_bucket: 3,
+            refill_interval_millis: 10,
+            refill_qty: 1,
+        };
+
+        let rater = Arc::new(Rater::new(config));
+
+        for i in 0..100 {
+            let start = Instant::now();
+            let bucket = rater.get_ticket("bob".to_string());
+            bucket.wait().await.unwrap();
+            tracing::info!("Success {i} took {:?}!", start.elapsed());
+        }
+    }
+
+    #[tokio::test]
+    async fn concurrent_fewer() {
+        let _ = tracing_subscriber::fmt::try_init();
+        let config = RaterConfig {
+            threads: 8,
+            max_buckets: 16,
+            max_tokens_per_bucket: 3,
+            refill_interval_millis: 10,
+            refill_qty: 1,
+        };
+        let rater = Arc::new(Rater::new(config));
+        let mut handles = vec![];
+
+        for thread in 0..8 {
+            let rater = rater.clone();
+            let hdl = tokio::task::spawn(async move {
+                for i in 1..32 {
+                    let name = fizzbuzz(i);
+                    let start = Instant::now();
+                    let bucket = rater.get_ticket(name.clone());
+                    bucket.wait().await.unwrap();
+                    tracing::info!("{thread}:{i}:{name} took {:?}!", start.elapsed());
+                }
+            });
+            handles.push(hdl);
+        }
+
+        for h in handles.into_iter() {
+            h.await.unwrap();
+        }
+    }
+
+    #[tokio::test]
+    async fn concurrent_more() {
+        let _ = tracing_subscriber::fmt::try_init();
+        let config = RaterConfig {
+            threads: 8,
+            max_buckets: 128,
+            max_tokens_per_bucket: 3,
+            refill_interval_millis: 10,
+            refill_qty: 1,
+        };
+        let rater = Arc::new(Rater::new(config));
+        let (htx, mut hrx) = channel(1024);
+        let deadline = Instant::now().add(Duration::from_millis(10));
+
+        for thread in 0..8 {
+            let rater = rater.clone();
+            let htxin = htx.clone();
+            let hdl = tokio::task::spawn(async move {
+                for i in 1..32 {
+                    let name = fizzbuzz(i);
+                    let rater = rater.clone();
+                    let hdl = tokio::task::spawn(async move {
+                        tokio::time::sleep_until(deadline.into()).await;
+                        let start = Instant::now();
+                        let bucket = rater.get_ticket(name.clone());
+                        let res =
+                            tokio::time::timeout(Duration::from_millis(100), bucket.wait()).await;
+                        match res {
+                            Ok(Ok(())) => {
+                                tracing::info!("{thread}:{i}:{name} took {:?}!", start.elapsed())
+                            }
+                            Ok(Err(_)) => unreachable!(),
+                            Err(_) => tracing::warn!("{thread}:{i}:{name} gave up after 100ms!"),
+                        }
+                    });
+                    htxin.send(hdl).await.unwrap();
+                }
+                drop(htxin);
+            });
+            htx.send(hdl).await.unwrap();
+        }
+        drop(htx);
+
+        while let Some(hdl) = hrx.recv().await {
+            hdl.await.unwrap();
+        }
+    }
+
+    fn fizzbuzz(i: usize) -> String {
+        match i {
+            i if i % 15 == 0 => "fizzbuzz".to_string(),
+            i if i % 3 == 0 => "fizz".to_string(),
+            i if i % 5 == 0 => "buzz".to_string(),
+            i => format!("{i}"),
+        }
+    }
+}

From c15c3709c3c072fe96bafc1c448caa84001f1977 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Wed, 28 Aug 2024 17:22:49 +0200
Subject: [PATCH 05/23] Plumb through rate limiting

---
 experiments/rate-limiting/src/lib.rs      |   1 +
 experiments/rate-limiting/src/main.rs     |   5 -
 experiments/rate-limiting/src/rater.rs    |   9 ++
 source/river/src/config/internal.rs       |  13 +-
 source/river/src/config/kdl/mod.rs        | 157 +++++++++++++++++++++-
 source/river/src/config/toml.rs           |   3 +-
 source/river/src/proxy/mod.rs             |  62 ++++++---
 source/river/src/proxy/rate_limiting.rs   |  62 +++++++--
 source/river/src/proxy/request_filters.rs |   6 +-
 9 files changed, 274 insertions(+), 44 deletions(-)
 create mode 100644 experiments/rate-limiting/src/lib.rs
 delete mode 100644 experiments/rate-limiting/src/main.rs

diff --git a/experiments/rate-limiting/src/lib.rs b/experiments/rate-limiting/src/lib.rs
new file mode 100644
index 0000000..f492ce5
--- /dev/null
+++ b/experiments/rate-limiting/src/lib.rs
@@ -0,0 +1 @@
+pub mod rater;
diff --git a/experiments/rate-limiting/src/main.rs b/experiments/rate-limiting/src/main.rs
deleted file mode 100644
index ff4e39f..0000000
--- a/experiments/rate-limiting/src/main.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-pub mod rater;
-
-fn main() {
-    println!("Hello, world!");
-}
diff --git a/experiments/rate-limiting/src/rater.rs b/experiments/rate-limiting/src/rater.rs
index 079eb6a..d683230 100644
--- a/experiments/rate-limiting/src/rater.rs
+++ b/experiments/rate-limiting/src/rater.rs
@@ -80,6 +80,15 @@ where
     refill_qty: usize,
 }
 
+impl<Key> Debug for Rater<Key>
+where
+    Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
+{
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str("Rater { ... }")
+    }
+}
+
 impl<Key> Rater<Key>
 where
     Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
diff --git a/source/river/src/config/internal.rs b/source/river/src/config/internal.rs
index e610533..e496c80 100644
--- a/source/river/src/config/internal.rs
+++ b/source/river/src/config/internal.rs
@@ -14,7 +14,10 @@ use pingora::{
 };
 use tracing::warn;
 
-use crate::proxy::request_selector::{null_selector, RequestSelector};
+use crate::proxy::{
+    rate_limiting::RaterInstanceConfig,
+    request_selector::{null_selector, RequestSelector},
+};
 
 /// River's internal configuration
 #[derive(Debug, Clone)]
@@ -109,6 +112,13 @@ impl Config {
     }
 }
 
+///
+#[derive(Debug, Default, Clone, PartialEq)]
+pub struct RateLimitingConfig {
+    pub(crate) timeout_ms: Option<usize>,
+    pub(crate) rules: Vec<RaterInstanceConfig>,
+}
+
 /// Add Path Control Modifiers
 ///
 /// Note that we use `BTreeMap` and NOT `HashMap`, as we want to maintain the
@@ -141,6 +151,7 @@ pub struct ProxyConfig {
     pub(crate) upstream_options: UpstreamOptions,
     pub(crate) upstreams: Vec<HttpPeer>,
     pub(crate) path_control: PathControl,
+    pub(crate) rate_limiting: RateLimitingConfig,
 }
 
 #[derive(Debug, PartialEq, Clone)]
diff --git a/source/river/src/config/kdl/mod.rs b/source/river/src/config/kdl/mod.rs
index b413047..f46afb2 100644
--- a/source/river/src/config/kdl/mod.rs
+++ b/source/river/src/config/kdl/mod.rs
@@ -4,20 +4,26 @@ use std::{
     path::PathBuf,
 };
 
-use kdl::{KdlDocument, KdlEntry, KdlNode};
+use kdl::{KdlDocument, KdlEntry, KdlNode, KdlValue};
 use miette::{bail, Diagnostic, SourceSpan};
 use pingora::{protocols::ALPN, upstreams::peer::HttpPeer};
+use regex::Regex;
 
 use crate::{
     config::internal::{
         Config, DiscoveryKind, FileServerConfig, HealthCheckKind, ListenerConfig, ListenerKind,
         PathControl, ProxyConfig, SelectionKind, TlsConfig, UpstreamOptions,
     },
-    proxy::request_selector::{
-        null_selector, source_addr_and_uri_path_selector, uri_path_selector, RequestSelector,
+    proxy::{
+        rate_limiting::{Rater, RaterConfig, RaterInstance, RaterInstanceConfig, RequestKeyKind},
+        request_selector::{
+            null_selector, source_addr_and_uri_path_selector, uri_path_selector, RequestSelector,
+        },
     },
 };
 
+use super::internal::RateLimitingConfig;
+
 #[cfg(test)]
 mod test;
 mod utils;
@@ -33,7 +39,7 @@ impl TryFrom<KdlDocument> for Config {
             upgrade_socket,
             pid_file,
         } = extract_system_data(&value)?;
-        let (basic_proxies, file_servers) = extract_services(&value)?;
+        let (basic_proxies, file_servers) = extract_services(threads_per_service, &value)?;
 
         Ok(Config {
             threads_per_service,
@@ -67,12 +73,14 @@ impl Default for SystemData {
 
 /// Extract all services from the top level document
 fn extract_services(
+    threads_per_service: usize,
     doc: &KdlDocument,
 ) -> miette::Result<(Vec<ProxyConfig>, Vec<FileServerConfig>)> {
     let service_node = utils::required_child_doc(doc, doc, "services")?;
     let services = utils::wildcard_argless_child_docs(doc, service_node)?;
 
-    let proxy_node_set = HashSet::from(["listeners", "connectors", "path-control", "rate-limiting"]);
+    let proxy_node_set =
+        HashSet::from(["listeners", "connectors", "path-control", "rate-limiting"]);
     let file_server_node_set = HashSet::from(["listeners", "file-server"]);
 
     let mut proxies = vec![];
@@ -96,7 +104,7 @@ fn extract_services(
         if fingerprint_set.is_subset(&proxy_node_set) {
             // If the contained nodes are a strict subset of proxy node config fields,
             // then treat this section as a proxy node
-            proxies.push(extract_service(doc, name, service)?);
+            proxies.push(extract_service(threads_per_service, doc, name, service)?);
         } else if fingerprint_set.is_subset(&file_server_node_set) {
             // If the contained nodes are a strict subset of the file server config
             // fields, then treat this section as a file server node
@@ -221,6 +229,7 @@ fn extract_file_server(
 
 /// Extracts a single service from the `services` block
 fn extract_service(
+    threads_per_service: usize,
     doc: &KdlDocument,
     name: &str,
     node: &KdlDocument,
@@ -281,15 +290,151 @@ fn extract_service(
         }
     }
 
+    // Rate limiting
+    let mut rl = RateLimitingConfig::default();
+    if let Some(rl_node) = utils::optional_child_doc(doc, node, "rate-limiting") {
+        let nodes = utils::data_nodes(doc, rl_node)?;
+        for (node, name, args) in nodes.iter() {
+            match *name {
+                "timeout" => {
+                    let vals = utils::str_value_args(doc, args)?;
+                    let [("millis", val)] = vals.as_slice() else {
+                        return Err(Bad::docspan(
+                            "Unknown argument, missing 'millis'",
+                            doc,
+                            node.span(),
+                        )
+                        .into());
+                    };
+                    let Some(millis) = val.value().as_i64().and_then(|v| usize::try_from(v).ok())
+                    else {
+                        return Err(Bad::docspan(
+                            "'millis' value should be a number",
+                            doc,
+                            node.span(),
+                        )
+                        .into());
+                    };
+                    let old = rl.timeout_ms.replace(millis);
+                    if old.is_some() {
+                        return Err(Bad::docspan(
+                            "Can't specify 'timeout' twice!",
+                            doc,
+                            node.span(),
+                        )
+                        .into());
+                    }
+                }
+                "rule" => {
+                    let vals = utils::str_value_args(doc, args)?;
+                    let valslice = vals
+                        .iter()
+                        .map(|(k, v)| (*k, v.value()))
+                        .collect::<BTreeMap<&str, &KdlValue>>();
+                    rl.rules.push(make_rate_limiter(threads_per_service, doc, node, valslice)?);
+                }
+                other => {
+                    return Err(
+                        Bad::docspan(format!("Unknown name: '{other}'"), doc, node.span()).into(),
+                    );
+                }
+            }
+        }
+
+    }
+
     Ok(ProxyConfig {
         name: name.to_string(),
         listeners: list_cfgs,
         upstreams: conn_cfgs,
         path_control: pc,
         upstream_options: load_balance.unwrap_or_default(),
+        rate_limiting: rl,
     })
 }
 
+fn make_rate_limiter(
+    threads_per_service: usize,
+    doc: &KdlDocument,
+    node: &KdlNode,
+    args: BTreeMap<&str, &KdlValue>,
+) -> miette::Result<RaterInstanceConfig> {
+    let take_num = |key: &str| -> miette::Result<usize> {
+        let Some(val) = args.get(key) else {
+            return Err(Bad::docspan(format!("Missing key: '{key}'"), doc, node.span()).into());
+        };
+        let Some(val) = val.as_i64().and_then(|v| usize::try_from(v).ok()) else {
+            return Err(Bad::docspan(
+                format!(
+                    "'{key} should have a positive integer value, got '{:?}' instead",
+                    val
+                ),
+                doc,
+                node.span(),
+            )
+            .into());
+        };
+        Ok(val)
+    };
+    let take_str = |key: &str| -> miette::Result<&str> {
+        let Some(val) = args.get(key) else {
+            return Err(Bad::docspan(format!("Missing key: '{key}'"), doc, node.span()).into());
+        };
+        let Some(val) = val.as_string() else {
+            return Err(Bad::docspan(
+                format!("'{key} should have a string value, got '{:?}' instead", val),
+                doc,
+                node.span(),
+            )
+            .into());
+        };
+        Ok(val)
+    };
+
+    // mandatory/common fields
+    let kind = take_str("kind")?;
+    let max_buckets = take_num("max-buckets")?;
+    let tokens_per_bucket = take_num("tokens-per-bucket")?;
+    let refill_qty = take_num("refill-qty")?;
+    let refill_rate_ms = take_num("refill-rate-ms")?;
+
+    let rater_cfg = RaterConfig {
+        threads: threads_per_service,
+        max_buckets,
+        max_tokens_per_bucket: tokens_per_bucket,
+        refill_interval_millis: refill_rate_ms,
+        refill_qty,
+    };
+
+    match kind {
+        "source-ip" => Ok(RaterInstanceConfig {
+            rater_cfg,
+            kind: RequestKeyKind::SourceIp,
+        }),
+        "uri" => {
+            let pattern = take_str("pattern")?;
+            let Ok(pattern) = Regex::new(pattern) else {
+                return Err(Bad::docspan(
+                    format!("'{pattern} should be a valid regular expression"),
+                    doc,
+                    node.span(),
+                )
+                .into());
+            };
+            Ok(RaterInstanceConfig {
+                rater_cfg,
+                kind: RequestKeyKind::Uri { pattern },
+            })
+        }
+        other => Err(Bad::docspan(
+            format!("'{other} is not a known kind of rate limiting"),
+            doc,
+            node.span(),
+        )
+        .into()),
+    }
+}
+
 /// Extracts the `load-balance` structure from the `connectors` section
 fn extract_load_balance(doc: &KdlDocument, node: &KdlNode) -> miette::Result<UpstreamOptions> {
     let items = utils::data_nodes(
diff --git a/source/river/src/config/toml.rs b/source/river/src/config/toml.rs
index c45da01..c5b2963 100644
--- a/source/river/src/config/toml.rs
+++ b/source/river/src/config/toml.rs
@@ -8,7 +8,7 @@ use std::{
 use pingora::upstreams::peer::HttpPeer;
 use serde::{Deserialize, Serialize};
 
-use super::internal::UpstreamOptions;
+use super::internal::{RateLimitingConfig, UpstreamOptions};
 
 /// Configuration used for TOML formatted files
 #[derive(Serialize, Deserialize, Debug, PartialEq)]
@@ -153,6 +153,7 @@ impl From<ProxyConfig> for super::internal::ProxyConfig {
             upstreams: vec![other.connector.into()],
             path_control: other.path_control.into(),
             upstream_options: UpstreamOptions::default(),
+            rate_limiting: RateLimitingConfig::default(),
         }
     }
 }
diff --git a/source/river/src/proxy/mod.rs b/source/river/src/proxy/mod.rs
index b0d3edf..c846666 100644
--- a/source/river/src/proxy/mod.rs
+++ b/source/river/src/proxy/mod.rs
@@ -4,7 +4,10 @@
 //! this includes creation of HTTP proxy services, as well as Path Control
 //! modifiers.
 
-use std::{collections::{BTreeMap, BTreeSet}, time::Duration};
+use std::{
+    collections::{BTreeMap, BTreeSet},
+    time::Duration,
+};
 
 use async_trait::async_trait;
 use futures_util::FutureExt;
@@ -31,7 +34,10 @@ use crate::{
     },
 };
 
-use self::{rate_limiting::{RaterInstance, TicketError}, request_filters::RequestFilterMod};
+use self::{
+    rate_limiting::{Rater, RaterInstance, RequestKeyKind, TicketError},
+    request_filters::RequestFilterMod,
+};
 
 pub mod rate_limiting;
 pub mod request_filters;
@@ -39,7 +45,6 @@ pub mod request_modifiers;
 pub mod request_selector;
 pub mod response_modifiers;
 
-
 pub struct RateLimiters {
     request_filter_stage: Vec<RaterInstance>,
 }
@@ -57,6 +62,7 @@ pub struct RiverProxyService<BS: BackendSelection> {
     /// Load Balancer
     pub load_balancer: LoadBalancer<BS>,
     pub request_selector: RequestSelector,
+    pub rate_limiting_timeout: Duration,
     pub rate_limiters: RateLimiters,
 }
 
@@ -108,13 +114,35 @@ where
             .expect("static should not error");
         // end of TODO
 
+        let request_filter_stage = conf
+            .rate_limiting
+            .rules
+            .iter()
+            .filter_map(|cfg| match &cfg.kind {
+                yes @ RequestKeyKind::SourceIp | yes @ RequestKeyKind::Uri { .. } => {
+                    Some(RaterInstance {
+                        rater: Rater::new(cfg.rater_cfg.clone()),
+                        kind: yes.clone(),
+                    })
+                }
+                RequestKeyKind::DestIp => None,
+            })
+            .collect();
+
         let mut my_proxy = pingora_proxy::http_proxy_service_with_name(
             &server.configuration,
             Self {
                 modifiers,
                 load_balancer: upstreams,
                 request_selector: conf.upstream_options.selector,
-                rate_limiters: RateLimiters { request_filter_stage: vec![] },
+                rate_limiting_timeout: conf
+                    .rate_limiting
+                    .timeout_ms
+                    .map(|v| Duration::from_millis(v as u64))
+                    .unwrap_or(Duration::from_secs(1)),
+                rate_limiters: RateLimiters {
+                    request_filter_stage,
+                },
             },
             &conf.name,
         );
@@ -259,8 +287,9 @@ where
             // TODO: avoid spawning one task per limiter?
             let mut set: JoinSet<Result<(), TicketError>> = JoinSet::new();
             for limiter in self.rate_limiters.request_filter_stage.iter() {
-                let ticket = limiter.get_ticket(session);
-                set.spawn(ticket.wait());
+                if let Some(ticket) = limiter.get_ticket(session) {
+                    set.spawn(ticket.wait());
+                }
             }
 
             // This future returns when either ALL tickets have been obtained, OR
@@ -268,7 +297,7 @@ where
             let total_ticket_fut = async move {
                 while let Some(res) = set.join_next().await {
                     match res {
-                        Ok(Ok(())) => {},
+                        Ok(Ok(())) => {}
                         Ok(Err(_e)) => return Err(()),
                         Err(_) => return Err(()),
                     }
@@ -280,23 +309,24 @@ where
             //
             // TODO: This is a workaround for not having the ability to immediately bail
             // if buckets are "too full"!
-            //
-            // TODO: make this configurable?
-            let res = tokio::time::timeout(Duration::from_secs(1), total_ticket_fut).await;
+            let res = tokio::time::timeout(self.rate_limiting_timeout, total_ticket_fut).await;
             match res {
-                Ok(Ok(())) => {},
+                Ok(Ok(())) => {
+                    // Rate limiter completed!
+                }
                 Ok(Err(())) => {
-                    // Rate limiter said "bail"
-                    return Err(Error::new_down(ErrorType::CustomCode("rate limit exceeded", 429)))
+                    tracing::trace!("Rejecting due to rate limiting failure");
+                    session.downstream_session.respond_error(429).await;
+                    return Ok(true);
                 }
                 Err(_) => {
-                    // Timeout occurred
-                    return Err(Error::new_down(ErrorType::CustomCode("rate limit exceeded", 429)))
+                    tracing::trace!("Rejecting due to rate limiting timeout");
+                    session.downstream_session.respond_error(429).await;
+                    return Ok(true);
                 }
             }
         }
 
-
         for filter in &self.modifiers.request_filters {
             match filter.request_filter(session, ctx).await {
                 // If Ok true: we're done handling this request
diff --git a/source/river/src/proxy/rate_limiting.rs b/source/river/src/proxy/rate_limiting.rs
index 2de881f..f801434 100644
--- a/source/river/src/proxy/rate_limiting.rs
+++ b/source/river/src/proxy/rate_limiting.rs
@@ -4,12 +4,28 @@ use concread::arcache::{ARCache, ARCacheBuilder};
 use leaky_bucket::RateLimiter;
 use pandora_module_utils::pingora::SocketAddr;
 use pingora_proxy::Session;
+use regex::Regex;
 
-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[derive(Debug, Clone)]
 pub enum RequestKeyKind {
     SourceIp,
     DestIp,
-    Uri,
+    Uri {
+        pattern: Regex
+    },
+}
+
+impl PartialEq for RequestKeyKind {
+    fn eq(&self, other: &Self) -> bool {
+        match (self, other) {
+            (Self::SourceIp, Self::SourceIp) => true,
+            (Self::DestIp, Self::DestIp) => true,
+            (Self::Uri { pattern: pattern1 }, Self::Uri { pattern: pattern2 }) => {
+                pattern1.as_str() == pattern2.as_str()
+            }
+            _ => false
+        }
+    }
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -19,31 +35,42 @@ pub enum RequestKey {
     Uri(String),
 }
 
+#[derive(Debug)]
 pub struct RaterInstance {
     pub rater: Rater<RequestKey>,
     pub kind: RequestKeyKind,
 }
 
+#[derive(Debug, Clone, PartialEq)]
+pub struct RaterInstanceConfig {
+    pub rater_cfg: RaterConfig,
+    pub kind: RequestKeyKind,
+}
+
 impl RaterInstance {
-    pub fn get_ticket(&self, session: &mut Session) -> Ticket {
-        let key = self.get_key(session);
-        self.rater.get_ticket(key)
+    pub fn get_ticket(&self, session: &mut Session) -> Option<Ticket> {
+        let key = self.get_key(session)?;
+        Some(self.rater.get_ticket(key))
     }
 
-    pub fn get_key(&self, session: &mut Session) -> RequestKey {
-        match self.kind {
+    pub fn get_key(&self, session: &mut Session) -> Option<RequestKey> {
+        match &self.kind {
             RequestKeyKind::SourceIp => {
                 let src = session.downstream_session.client_addr().unwrap();
                 let src_ip = match src {
                     SocketAddr::Inet(addr) => addr.ip(),
-                    SocketAddr::Unix(_) => todo!(),
+                    SocketAddr::Unix(_) => return None,
                 };
-                RequestKey::Source(src_ip)
+                Some(RequestKey::Source(src_ip))
             },
-            RequestKeyKind::DestIp => todo!(),
-            RequestKeyKind::Uri => {
-                let uri = session.downstream_session.req_header().uri.to_string();
-                RequestKey::Uri(uri)
+            RequestKeyKind::DestIp => None,
+            RequestKeyKind::Uri { pattern } => {
+                let uri_path = session.downstream_session.req_header().uri.path();
+                if pattern.is_match(uri_path) {
+                    Some(RequestKey::Uri(uri_path.to_string()))
+                } else {
+                    None
+                }
             },
         }
     }
@@ -126,6 +153,15 @@ where
     refill_qty: usize,
 }
 
+impl<Key> Debug for Rater<Key>
+where
+    Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
+{
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str("Rater { ... }")
+    }
+}
+
 impl<Key> Rater<Key>
 where
     Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
diff --git a/source/river/src/proxy/request_filters.rs b/source/river/src/proxy/request_filters.rs
index e267fdd..144ce7c 100644
--- a/source/river/src/proxy/request_filters.rs
+++ b/source/river/src/proxy/request_filters.rs
@@ -50,7 +50,8 @@ impl RequestFilterMod for CidrRangeFilter {
     async fn request_filter(&self, session: &mut Session, _ctx: &mut RiverContext) -> Result<bool> {
         let Some(addr) = session.downstream_session.client_addr() else {
             // Unable to determine source address, assuming it should be blocked
-            return Err(Error::new_down(ErrorType::Custom("Missing Client Address")));
+            session.downstream_session.respond_error(401).await;
+            return Ok(true);
         };
         let SocketAddr::Inet(addr) = addr else {
             // CIDR filters don't apply to UDS
@@ -59,7 +60,8 @@ impl RequestFilterMod for CidrRangeFilter {
         let ip_addr = addr.ip();
 
         if self.blocks.iter().any(|b| b.contains(&ip_addr)) {
-            Err(Error::new_down(ErrorType::ConnectRefused))
+            session.downstream_session.respond_error(401).await;
+            Ok(true)
         } else {
             Ok(false)
         }

From 473c2634bd06e8283e14e2bb7fc47b5dbab87804 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Wed, 28 Aug 2024 17:29:34 +0200
Subject: [PATCH 06/23] Fix tests

---
 source/river/src/config/kdl/mod.rs  |  6 +++---
 source/river/src/config/kdl/test.rs | 16 +++++++++++++++-
 source/river/src/config/toml.rs     |  4 +++-
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/source/river/src/config/kdl/mod.rs b/source/river/src/config/kdl/mod.rs
index f46afb2..75ddead 100644
--- a/source/river/src/config/kdl/mod.rs
+++ b/source/river/src/config/kdl/mod.rs
@@ -15,7 +15,7 @@ use crate::{
         PathControl, ProxyConfig, SelectionKind, TlsConfig, UpstreamOptions,
     },
     proxy::{
-        rate_limiting::{Rater, RaterConfig, RaterInstance, RaterInstanceConfig, RequestKeyKind},
+        rate_limiting::{RaterConfig, RaterInstanceConfig, RequestKeyKind},
         request_selector::{
             null_selector, source_addr_and_uri_path_selector, uri_path_selector, RequestSelector,
         },
@@ -331,7 +331,8 @@ fn extract_service(
                         .iter()
                         .map(|(k, v)| (*k, v.value()))
                         .collect::<BTreeMap<&str, &KdlValue>>();
-                    rl.rules.push(make_rate_limiter(threads_per_service, doc, node, valslice)?);
+                    rl.rules
+                        .push(make_rate_limiter(threads_per_service, doc, node, valslice)?);
                 }
                 other => {
                     return Err(
@@ -340,7 +341,6 @@ fn extract_service(
                 }
             }
         }
-
     }
 
     Ok(ProxyConfig {
diff --git a/source/river/src/config/kdl/test.rs b/source/river/src/config/kdl/test.rs
index 08a62b9..0940da4 100644
--- a/source/river/src/config/kdl/test.rs
+++ b/source/river/src/config/kdl/test.rs
@@ -1,12 +1,13 @@
 use std::{collections::BTreeMap, net::SocketAddr};
 
 use pingora::upstreams::peer::HttpPeer;
+use regex::Regex;
 
 use crate::{
     config::internal::{
         FileServerConfig, ListenerConfig, ListenerKind, ProxyConfig, UpstreamOptions,
     },
-    proxy::request_selector::uri_path_selector,
+    proxy::{rate_limiting::{RaterConfig, RaterInstanceConfig}, request_selector::uri_path_selector},
 };
 
 #[test]
@@ -87,6 +88,13 @@ fn load_test() {
                     health_checks: crate::config::internal::HealthCheckKind::None,
                     discovery: crate::config::internal::DiscoveryKind::Static,
                 },
+                rate_limiting: crate::config::internal::RateLimitingConfig {
+                    timeout_ms: Some(100),
+                    rules: vec![
+                        RaterInstanceConfig { rater_cfg: RaterConfig { threads: 8, max_buckets: 4000, max_tokens_per_bucket: 10, refill_interval_millis: 10, refill_qty: 1 }, kind: crate::proxy::rate_limiting::RequestKeyKind::SourceIp },
+                        RaterInstanceConfig { rater_cfg: RaterConfig { threads: 8, max_buckets: 2000, max_tokens_per_bucket: 20, refill_interval_millis: 1, refill_qty: 5 }, kind: crate::proxy::rate_limiting::RequestKeyKind::Uri { pattern: Regex::new("static/.*").unwrap() } },
+                    ],
+                },
             },
             ProxyConfig {
                 name: "Example2".into(),
@@ -104,6 +112,10 @@ fn load_test() {
                     request_filters: vec![],
                 },
                 upstream_options: UpstreamOptions::default(),
+                rate_limiting: crate::config::internal::RateLimitingConfig {
+                    timeout_ms: None,
+                    rules: vec![],
+                },
             },
         ],
         file_servers: vec![FileServerConfig {
@@ -147,6 +159,7 @@ fn load_test() {
             upstream_options,
             upstreams,
             path_control,
+            rate_limiting,
         } = abp;
         assert_eq!(*name, ebp.name);
         assert_eq!(*listeners, ebp.listeners);
@@ -160,6 +173,7 @@ fn load_test() {
                 assert_eq!(a.sni, e.sni);
             });
         assert_eq!(*path_control, ebp.path_control);
+        assert_eq!(*rate_limiting, ebp.rate_limiting);
     }
 
     for (afs, efs) in val.file_servers.iter().zip(expected.file_servers.iter()) {
diff --git a/source/river/src/config/toml.rs b/source/river/src/config/toml.rs
index c5b2963..f2004ef 100644
--- a/source/river/src/config/toml.rs
+++ b/source/river/src/config/toml.rs
@@ -206,7 +206,7 @@ pub mod test {
 
     use crate::config::{
         apply_toml,
-        internal::{self, UpstreamOptions},
+        internal::{self, RateLimitingConfig, UpstreamOptions},
         toml::{ConnectorConfig, ListenerConfig, ProxyConfig, System},
     };
 
@@ -366,6 +366,7 @@ pub mod test {
                         request_filters: vec![],
                     },
                     upstream_options: UpstreamOptions::default(),
+                    rate_limiting: RateLimitingConfig::default(),
                 },
                 internal::ProxyConfig {
                     name: "Example2".into(),
@@ -383,6 +384,7 @@ pub mod test {
                         request_filters: vec![],
                     },
                     upstream_options: UpstreamOptions::default(),
+                    rate_limiting: RateLimitingConfig::default(),
                 },
             ],
             file_servers: Vec::new(),

From dc1087d1c2b33829419d27ae9e11a7be20f40dc1 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Wed, 28 Aug 2024 17:29:43 +0200
Subject: [PATCH 07/23] Cargo fmt

---
 source/river/src/config/kdl/test.rs     | 29 ++++++++++++++++++++++---
 source/river/src/proxy/rate_limiting.rs | 10 ++++-----
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/source/river/src/config/kdl/test.rs b/source/river/src/config/kdl/test.rs
index 0940da4..1a5018d 100644
--- a/source/river/src/config/kdl/test.rs
+++ b/source/river/src/config/kdl/test.rs
@@ -7,7 +7,10 @@ use crate::{
     config::internal::{
         FileServerConfig, ListenerConfig, ListenerKind, ProxyConfig, UpstreamOptions,
     },
-    proxy::{rate_limiting::{RaterConfig, RaterInstanceConfig}, request_selector::uri_path_selector},
+    proxy::{
+        rate_limiting::{RaterConfig, RaterInstanceConfig},
+        request_selector::uri_path_selector,
+    },
 };
 
 #[test]
@@ -91,8 +94,28 @@ fn load_test() {
                 rate_limiting: crate::config::internal::RateLimitingConfig {
                     timeout_ms: Some(100),
                     rules: vec![
-                        RaterInstanceConfig { rater_cfg: RaterConfig { threads: 8, max_buckets: 4000, max_tokens_per_bucket: 10, refill_interval_millis: 10, refill_qty: 1 }, kind: crate::proxy::rate_limiting::RequestKeyKind::SourceIp },
-                        RaterInstanceConfig { rater_cfg: RaterConfig { threads: 8, max_buckets: 2000, max_tokens_per_bucket: 20, refill_interval_millis: 1, refill_qty: 5 }, kind: crate::proxy::rate_limiting::RequestKeyKind::Uri { pattern: Regex::new("static/.*").unwrap() } },
+                        RaterInstanceConfig {
+                            rater_cfg: RaterConfig {
+                                threads: 8,
+                                max_buckets: 4000,
+                                max_tokens_per_bucket: 10,
+                                refill_interval_millis: 10,
+                                refill_qty: 1,
+                            },
+                            kind: crate::proxy::rate_limiting::RequestKeyKind::SourceIp,
+                        },
+                        RaterInstanceConfig {
+                            rater_cfg: RaterConfig {
+                                threads: 8,
+                                max_buckets: 2000,
+                                max_tokens_per_bucket: 20,
+                                refill_interval_millis: 1,
+                                refill_qty: 5,
+                            },
+                            kind: crate::proxy::rate_limiting::RequestKeyKind::Uri {
+                                pattern: Regex::new("static/.*").unwrap(),
+                            },
+                        },
                     ],
                 },
             },
diff --git a/source/river/src/proxy/rate_limiting.rs b/source/river/src/proxy/rate_limiting.rs
index f801434..952a1f5 100644
--- a/source/river/src/proxy/rate_limiting.rs
+++ b/source/river/src/proxy/rate_limiting.rs
@@ -10,9 +10,7 @@ use regex::Regex;
 pub enum RequestKeyKind {
     SourceIp,
     DestIp,
-    Uri {
-        pattern: Regex
-    },
+    Uri { pattern: Regex },
 }
 
 impl PartialEq for RequestKeyKind {
@@ -23,7 +21,7 @@ impl PartialEq for RequestKeyKind {
             (Self::Uri { pattern: pattern1 }, Self::Uri { pattern: pattern2 }) => {
                 pattern1.as_str() == pattern2.as_str()
             }
-            _ => false
+            _ => false,
         }
     }
 }
@@ -62,7 +60,7 @@ impl RaterInstance {
                     SocketAddr::Unix(_) => return None,
                 };
                 Some(RequestKey::Source(src_ip))
-            },
+            }
             RequestKeyKind::DestIp => None,
             RequestKeyKind::Uri { pattern } => {
                 let uri_path = session.downstream_session.req_header().uri.path();
@@ -71,7 +69,7 @@ impl RaterInstance {
                 } else {
                     None
                 }
-            },
+            }
         }
     }
 }

From f69b52ef96663733ca9ac2c693e2df83c8ca2615 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Wed, 28 Aug 2024 17:32:00 +0200
Subject: [PATCH 08/23] Clean up some warnings

---
 Cargo.lock                              |  46 +---
 Cargo.toml                              |   2 +-
 experiments/rate-limiting/Cargo.toml    |  12 -
 experiments/rate-limiting/src/lib.rs    |   1 -
 experiments/rate-limiting/src/rater.rs  | 331 ------------------------
 source/river/src/proxy/rate_limiting.rs |  11 +-
 6 files changed, 13 insertions(+), 390 deletions(-)
 delete mode 100644 experiments/rate-limiting/Cargo.toml
 delete mode 100644 experiments/rate-limiting/src/lib.rs
 delete mode 100644 experiments/rate-limiting/src/rater.rs

diff --git a/Cargo.lock b/Cargo.lock
index 3e9278a..a05bf96 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1053,15 +1053,6 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
 
-[[package]]
-name = "matchers"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
-dependencies = [
- "regex-automata 0.1.10",
-]
-
 [[package]]
 name = "maud"
 version = "0.26.0"
@@ -1749,17 +1740,6 @@ dependencies = [
  "getrandom",
 ]
 
-[[package]]
-name = "rate-limiting"
-version = "0.1.0"
-dependencies = [
- "concread",
- "leaky-bucket",
- "tokio",
- "tracing",
- "tracing-subscriber",
-]
-
 [[package]]
 name = "redox_syscall"
 version = "0.4.1"
@@ -1777,17 +1757,8 @@ checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-automata 0.4.6",
- "regex-syntax 0.8.3",
-]
-
-[[package]]
-name = "regex-automata"
-version = "0.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
-dependencies = [
- "regex-syntax 0.6.29",
+ "regex-automata",
+ "regex-syntax",
 ]
 
 [[package]]
@@ -1798,15 +1769,9 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-syntax 0.8.3",
+ "regex-syntax",
 ]
 
-[[package]]
-name = "regex-syntax"
-version = "0.6.29"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
-
 [[package]]
 name = "regex-syntax"
 version = "0.8.3"
@@ -2528,7 +2493,6 @@ dependencies = [
  "bytes",
  "libc",
  "mio",
- "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
  "socket2",
@@ -2696,14 +2660,10 @@ version = "0.3.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
 dependencies = [
- "matchers",
  "nu-ansi-term",
- "once_cell",
- "regex",
  "sharded-slab",
  "smallvec",
  "thread_local",
- "tracing",
  "tracing-core",
  "tracing-log",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 9a2e53b..2b75343 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [workspace]
 resolver = "2"
-members = [ "experiments/rate-limiting",
+members = [
     "source/river",
 ]
 
diff --git a/experiments/rate-limiting/Cargo.toml b/experiments/rate-limiting/Cargo.toml
deleted file mode 100644
index f15b3ea..0000000
--- a/experiments/rate-limiting/Cargo.toml
+++ /dev/null
@@ -1,12 +0,0 @@
-[package]
-name = "rate-limiting"
-version = "0.1.0"
-edition = "2021"
-publish = false
-
-[dependencies]
-concread = "0.5.3"
-leaky-bucket = "1.1.2"
-tokio = { version = "1.39.3", features = ["full"] }
-tracing = "0.1.40"
-tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
diff --git a/experiments/rate-limiting/src/lib.rs b/experiments/rate-limiting/src/lib.rs
deleted file mode 100644
index f492ce5..0000000
--- a/experiments/rate-limiting/src/lib.rs
+++ /dev/null
@@ -1 +0,0 @@
-pub mod rater;
diff --git a/experiments/rate-limiting/src/rater.rs b/experiments/rate-limiting/src/rater.rs
deleted file mode 100644
index d683230..0000000
--- a/experiments/rate-limiting/src/rater.rs
+++ /dev/null
@@ -1,331 +0,0 @@
-use std::{fmt::Debug, hash::Hash, sync::Arc, time::Duration};
-
-use concread::arcache::{ARCache, ARCacheBuilder};
-use leaky_bucket::RateLimiter;
-
-/// Configuration for the [`Rater`]
-#[derive(Debug, PartialEq, Clone)]
-pub struct RaterConfig {
-    /// The number of expected concurrent threads - should match the number of
-    /// tokio threadpool workers
-    pub threads: usize,
-    /// The peak number of leaky buckets we aim to have live at once
-    ///
-    /// NOTE: This is not a hard limit of the amount of memory used. See [`ARCacheBuilder`]
-    /// for docs on calculating actual memory usage based on these parameters
-    pub max_buckets: usize,
-    /// The max and initial number of tokens in the leaky bucket - this is the number of
-    /// requests that can go through without any waiting if the bucket is full
-    pub max_tokens_per_bucket: usize,
-    /// The interval between "refills" of the bucket, e.g. the bucket refills `refill_qty`
-    /// every `refill_interval_millis`
-    pub refill_interval_millis: usize,
-    /// The number of tokens added to the bucket every `refill_interval_millis`
-    pub refill_qty: usize,
-}
-
-/// A concurrent rate limiting structure
-///
-/// ## Implementation details and notes
-///
-/// For performance and resource reasons, this provides an *approximation* of exact rate
-/// limiting. Currently, there are a few "false positive" cases that can permit more than
-/// the expected number of actions to occur.
-///
-/// Rater is currently modeled as a Least Recently Used (LRU) cache of leaky buckets mapped
-/// by a key. This is done to provide a bounded quantity of leaky buckets, without requiring
-/// a worker task to "cull" the oldest buckets. Instead, unused buckets will naturally
-/// "fall out" of the cache if they are not used.
-///
-/// ### Too many unique keys at too high of a rate
-///
-/// If there is a very high diversity of Keys provided, it is possible that keys could
-/// be evicted from the cache before they would naturally expire or be refilled. In this
-/// case, Rater will appear to not apply rate limiting, as the evicted bucket will be
-/// replaced with a new, initially full bucket. This can be mitigated by choosing a
-/// bucket storage capacity that is large enough to hold enough buckets to handle the
-/// expected requests per second. e.g. if there is room for 1M buckets, and a bucket would
-/// refill one token every 100ms, then we would expect to be able to handle at least 100K
-/// requests with unique keys per second without evicting the buckets before the bucket
-/// would refill anyway.
-///
-/// ### A burst of previously-unseen keys
-///
-/// If a number of requests appear around the same time for a Key that is not resident
-/// in the cache, it is possible that all worker threads will create a new bucket and
-/// attempt to add their buckets to the cache, though only one will be persisted, and
-/// the others will be lost.
-///
-/// For example if there are N worker threads, and N requests with the same key arrive
-/// at roughly the same time, it is possible that we will create N new leaky buckets,
-/// each that will give one immediately-ready token for the request. However, in the
-/// worst case (N - 1) of these tokens won't "count", as (N - 1) of these buckets
-/// will be thrown away, and not counted in the one bucket that was persisted
-///
-/// This worst case is extremely unlikely, as it would require N requests with the same Key
-/// to arrive in the time window necessary to write to the cache, and for all N requests
-/// to be distributed to N different worker threads that all attempt to find the Key
-/// at the same time.
-///
-/// There is no mitigation for this currently, other than treating the "max tokens per
-/// bucket" as an approximate value, with up to "number of worker threads" of false
-/// positives as an acceptable bound.
-pub struct Rater<Key>
-where
-    Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
-{
-    cache: ARCache<Key, Arc<RateLimiter>>,
-    max_tokens_per_bucket: usize,
-    refill_interval_millis: usize,
-    refill_qty: usize,
-}
-
-impl<Key> Debug for Rater<Key>
-where
-    Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
-{
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str("Rater { ... }")
-    }
-}
-
-impl<Key> Rater<Key>
-where
-    Key: Hash + Eq + Ord + Clone + Debug + Sync + Send + 'static,
-{
-    /// Create a new rate limiter with the given configuration.
-    ///
-    /// See [`RaterConfig`] for configuration options.
-    pub fn new(config: RaterConfig) -> Self {
-        let RaterConfig {
-            threads,
-            max_buckets,
-            max_tokens_per_bucket,
-            refill_interval_millis,
-            refill_qty,
-        } = config;
-        let cache = ARCacheBuilder::new()
-            .set_expected_workload(
-                // total
-                //
-                // total number of items you want to have in memory
-                max_buckets,
-                // threads
-                //
-                // the number of read threads you expect concurrently (AT LEAST 1)
-                threads.max(1),
-                // ex_ro_miss
-                //
-                // the expected average number of cache misses per read operation
-                1,
-                // ex_rw_miss
-                //
-                // the expected average number of writes or write cache misses per operation
-                1,
-                // read_cache
-                //
-                // ?
-                false,
-            )
-            .build()
-            .expect("Creation of rate limiter should not fail");
-
-        Self {
-            cache,
-            max_tokens_per_bucket,
-            refill_interval_millis,
-            refill_qty: refill_qty.min(max_tokens_per_bucket),
-        }
-    }
-
-    /// Obtain a ticket for the given Key.
-    ///
-    /// If the Key does not exist already, it will be created.
-    pub fn get_ticket(&self, key: Key) -> Ticket {
-        let mut reader = self.cache.read();
-
-        if let Some(find) = reader.get(&key) {
-            // Rate limiter DID exist in the cache
-            tracing::trace!(?key, "rate limiting cache hit",);
-            Ticket {
-                limiter: find.clone(),
-            }
-        } else {
-            let new_limiter = Arc::new(self.new_rate_limiter());
-            tracing::debug!(?key, "rate limiting cache miss",);
-            reader.insert(key, new_limiter.clone());
-            reader.finish();
-            Ticket {
-                limiter: new_limiter,
-            }
-        }
-    }
-
-    fn new_rate_limiter(&self) -> RateLimiter {
-        RateLimiter::builder()
-            .initial(self.max_tokens_per_bucket)
-            .max(self.max_tokens_per_bucket)
-            .interval(Duration::from_millis(self.refill_interval_millis as u64))
-            .refill(self.refill_qty)
-            .fair(true)
-            .build()
-    }
-}
-
-#[derive(Debug, PartialEq, Clone)]
-pub enum TicketError {
-    BurstLimitExceeded,
-}
-
-/// A claim ticket for the leaky bucket queue
-///
-/// You are expected to call [`Ticket::wait()`] to wait for your turn to perform
-/// the rate limited option.
-#[must_use = "You must call `Ticket::wait()` to wait your turn!"]
-pub struct Ticket {
-    limiter: Arc<RateLimiter>,
-}
-
-impl Ticket {
-    /// Wait for our "turn" granted by the leaky bucket
-    ///
-    /// * If the bucket has a token available, `Ok(())` will be returned immediately
-    /// * If the bucket does not have a token available, this function will yield until
-    ///   a token is refilled
-    /// * If the bucket has too many pending waiters, then `Err(TicketError)` will be
-    ///   returned immediately
-    ///
-    /// NOTE: In the future, we would like to be able to return immediately if there
-    /// are too many pending requests at once, instead of queueing requests that are
-    /// going to end up timing out anyway.
-    ///
-    /// However, this is not supported by the [`leaky-bucket`] crate today, enqueueing
-    /// will always succeed, and we will handle this one layer up by adding a timeout
-    /// to requests.
-    ///
-    /// This should be fixed in the future as a performance optimization, but for now
-    /// we give ourselves the API surface to make this change with minimal fuss in
-    /// in the future.
-    pub async fn wait(self) -> Result<(), TicketError> {
-        self.limiter.acquire_owned(1).await;
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use tokio::sync::mpsc::channel;
-
-    use super::*;
-    use std::{ops::Add, time::Instant};
-
-    #[tokio::test]
-    async fn smoke() {
-        let _ = tracing_subscriber::fmt::try_init();
-        let config = RaterConfig {
-            threads: 2,
-            max_buckets: 5,
-            max_tokens_per_bucket: 3,
-            refill_interval_millis: 10,
-            refill_qty: 1,
-        };
-
-        let rater = Arc::new(Rater::new(config));
-
-        for i in 0..100 {
-            let start = Instant::now();
-            let bucket = rater.get_ticket("bob".to_string());
-            bucket.wait().await.unwrap();
-            tracing::info!("Success {i} took {:?}!", start.elapsed());
-        }
-    }
-
-    #[tokio::test]
-    async fn concurrent_fewer() {
-        let _ = tracing_subscriber::fmt::try_init();
-        let config = RaterConfig {
-            threads: 8,
-            max_buckets: 16,
-            max_tokens_per_bucket: 3,
-            refill_interval_millis: 10,
-            refill_qty: 1,
-        };
-        let rater = Arc::new(Rater::new(config));
-        let mut handles = vec![];
-
-        for thread in 0..8 {
-            let rater = rater.clone();
-            let hdl = tokio::task::spawn(async move {
-                for i in 1..32 {
-                    let name = fizzbuzz(i);
-                    let start = Instant::now();
-                    let bucket = rater.get_ticket(name.clone());
-                    bucket.wait().await.unwrap();
-                    tracing::info!("{thread}:{i}:{name} took {:?}!", start.elapsed());
-                }
-            });
-            handles.push(hdl);
-        }
-
-        for h in handles.into_iter() {
-            h.await.unwrap();
-        }
-    }
-
-    #[tokio::test]
-    async fn concurrent_more() {
-        let _ = tracing_subscriber::fmt::try_init();
-        let config = RaterConfig {
-            threads: 8,
-            max_buckets: 128,
-            max_tokens_per_bucket: 3,
-            refill_interval_millis: 10,
-            refill_qty: 1,
-        };
-        let rater = Arc::new(Rater::new(config));
-        let (htx, mut hrx) = channel(1024);
-        let deadline = Instant::now().add(Duration::from_millis(10));
-
-        for thread in 0..8 {
-            let rater = rater.clone();
-            let htxin = htx.clone();
-            let hdl = tokio::task::spawn(async move {
-                for i in 1..32 {
-                    let name = fizzbuzz(i);
-                    let rater = rater.clone();
-                    let hdl = tokio::task::spawn(async move {
-                        tokio::time::sleep_until(deadline.into()).await;
-                        let start = Instant::now();
-                        let bucket = rater.get_ticket(name.clone());
-                        let res =
-                            tokio::time::timeout(Duration::from_millis(100), bucket.wait()).await;
-                        match res {
-                            Ok(Ok(())) => {
-                                tracing::info!("{thread}:{i}:{name} took {:?}!", start.elapsed())
-                            }
-                            Ok(Err(_)) => unreachable!(),
-                            Err(_) => tracing::warn!("{thread}:{i}:{name} gave up after 100ms!"),
-                        }
-                    });
-                    htxin.send(hdl).await.unwrap();
-                }
-                drop(htxin);
-            });
-            htx.send(hdl).await.unwrap();
-        }
-        drop(htx);
-
-        while let Some(hdl) = hrx.recv().await {
-            hdl.await.unwrap();
-        }
-    }
-
-    fn fizzbuzz(i: usize) -> String {
-        match i {
-            i if i % 15 == 0 => "fizzbuzz".to_string(),
-            i if i % 3 == 0 => "fizz".to_string(),
-            i if i % 5 == 0 => "buzz".to_string(),
-            i => format!("{i}"),
-        }
-    }
-}
diff --git a/source/river/src/proxy/rate_limiting.rs b/source/river/src/proxy/rate_limiting.rs
index 952a1f5..22018dc 100644
--- a/source/river/src/proxy/rate_limiting.rs
+++ b/source/river/src/proxy/rate_limiting.rs
@@ -9,19 +9,24 @@ use regex::Regex;
 #[derive(Debug, Clone)]
 pub enum RequestKeyKind {
     SourceIp,
+    #[allow(dead_code)]
     DestIp,
-    Uri { pattern: Regex },
+    Uri {
+        pattern: Regex,
+    },
 }
 
 impl PartialEq for RequestKeyKind {
     fn eq(&self, other: &Self) -> bool {
         match (self, other) {
             (Self::SourceIp, Self::SourceIp) => true,
+            (Self::SourceIp, _) => false,
             (Self::DestIp, Self::DestIp) => true,
+            (Self::DestIp, _) => false,
             (Self::Uri { pattern: pattern1 }, Self::Uri { pattern: pattern2 }) => {
                 pattern1.as_str() == pattern2.as_str()
             }
-            _ => false,
+            (Self::Uri { .. }, _) => false,
         }
     }
 }
@@ -29,6 +34,7 @@ impl PartialEq for RequestKeyKind {
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum RequestKey {
     Source(IpAddr),
+    #[allow(dead_code)]
     Dest(IpAddr),
     Uri(String),
 }
@@ -243,6 +249,7 @@ where
     }
 }
 
+#[allow(dead_code)]
 #[derive(Debug, PartialEq, Clone)]
 pub enum TicketError {
     BurstLimitExceeded,

From 0e05f092b6c0fe8655a9e3d39bb93f2deb7cf50d Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Wed, 28 Aug 2024 17:34:07 +0200
Subject: [PATCH 09/23] Remove localhost cidr block

---
 source/river/assets/test-config.kdl | 2 +-
 source/river/src/config/kdl/test.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/river/assets/test-config.kdl b/source/river/assets/test-config.kdl
index 5822da0..f55b8be 100644
--- a/source/river/assets/test-config.kdl
+++ b/source/river/assets/test-config.kdl
@@ -87,7 +87,7 @@ services {
         // This section is optional.
         path-control {
             request-filters {
-                filter kind="block-cidr-range" addrs="192.168.0.0/16, 10.0.0.0/8, 2001:0db8::0/32, 127.0.0.1"
+                filter kind="block-cidr-range" addrs="192.168.0.0/16, 10.0.0.0/8, 2001:0db8::0/32"
             }
             upstream-request {
                 filter kind="remove-header-key-regex" pattern=".*(secret|SECRET).*"
diff --git a/source/river/src/config/kdl/test.rs b/source/river/src/config/kdl/test.rs
index 1a5018d..1bdbe1b 100644
--- a/source/river/src/config/kdl/test.rs
+++ b/source/river/src/config/kdl/test.rs
@@ -81,7 +81,7 @@ fn load_test() {
                         ("kind".to_string(), "block-cidr-range".to_string()),
                         (
                             "addrs".to_string(),
-                            "192.168.0.0/16, 10.0.0.0/8, 2001:0db8::0/32, 127.0.0.1".to_string(),
+                            "192.168.0.0/16, 10.0.0.0/8, 2001:0db8::0/32".to_string(),
                         ),
                     ])],
                 },

From 5013d0313180d90b012ebda18838268a65ec674a Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Wed, 28 Aug 2024 17:41:37 +0200
Subject: [PATCH 10/23] Remove unwrap

---
 source/river/src/proxy/rate_limiting.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/river/src/proxy/rate_limiting.rs b/source/river/src/proxy/rate_limiting.rs
index 22018dc..c0ef938 100644
--- a/source/river/src/proxy/rate_limiting.rs
+++ b/source/river/src/proxy/rate_limiting.rs
@@ -60,7 +60,7 @@ impl RaterInstance {
     pub fn get_key(&self, session: &mut Session) -> Option<RequestKey> {
         match &self.kind {
             RequestKeyKind::SourceIp => {
-                let src = session.downstream_session.client_addr().unwrap();
+                let src = session.downstream_session.client_addr()?;
                 let src_ip = match src {
                     SocketAddr::Inet(addr) => addr.ip(),
                     SocketAddr::Unix(_) => return None,

From 481aba6bfa2174181a9e389f1803aff618e7a12b Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Wed, 28 Aug 2024 17:44:15 +0200
Subject: [PATCH 11/23] Add comment

---
 source/river/src/proxy/rate_limiting.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/source/river/src/proxy/rate_limiting.rs b/source/river/src/proxy/rate_limiting.rs
index c0ef938..044777b 100644
--- a/source/river/src/proxy/rate_limiting.rs
+++ b/source/river/src/proxy/rate_limiting.rs
@@ -1,3 +1,9 @@
+//! Rate Limiting
+//!
+//! This is an implementation of request rate limiting.
+//!
+//! See the [`Rater`] structure for more details
+
 use std::{fmt::Debug, hash::Hash, net::IpAddr, sync::Arc, time::Duration};
 
 use concread::arcache::{ARCache, ARCacheBuilder};

From a4a0b162c3e3ad42b6a6f5a97f1d7dda526c4776 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Wed, 28 Aug 2024 20:07:51 +0200
Subject: [PATCH 12/23] Add rate limiting docs

---
 user-manual/src/config/kdl.md | 124 ++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)

diff --git a/user-manual/src/config/kdl.md b/user-manual/src/config/kdl.md
index a148169..eda9baf 100644
--- a/user-manual/src/config/kdl.md
+++ b/user-manual/src/config/kdl.md
@@ -97,6 +97,15 @@ services {
                 filter kind="upsert-header" key="x-with-love-from" value="river"
             }
         }
+        rate-limiting {
+            timeout millis=100
+
+            rule kind="source-ip" \
+                max-buckets=4000 tokens-per-bucket=10 refill-qty=1 refill-rate-ms=10
+
+            rule kind="uri" pattern="static/.*" \
+                max-buckets=2000 tokens-per-bucket=20 refill-qty=5 refill-rate-ms=1
+        }
     }
     Example3 {
         listeners {
@@ -255,6 +264,121 @@ Filters at this stage are the earliest. Currently supported filters:
     * Arguments: `key="KEY" value="VALUE"`, where `KEY` is a valid HTTP header key, and `VALUE` is a valid HTTP header value
     * The given header will be added or replaced to `VALUE`
 
+### `services.$NAME.rate-limiting`
+
+This section contains the configuration for rate limiting rules.
+
+Rate limiting rules are used to limit the total number of requests made by downstream clients,
+based on various criteria.
+
+Note that Rate limiting is on a **per service** basis, services do not share rate limiting
+information.
+
+This section is optional.
+
+Example:
+
+```
+rate-limiting {
+    timeout millis=100
+
+    rule kind="source-ip" \
+        max-buckets=4000 tokens-per-bucket=10 refill-qty=1 refill-rate-ms=10
+
+    rule kind="uri" pattern="static/.*" \
+        max-buckets=2000 tokens-per-bucket=20 refill-qty=5 refill-rate-ms=1
+}
+```
+
+#### `services.$NAME.rate-limiting.timeout`
+
+The `timeout` parameter is used to set the total timeout for acquiring all rate limiting tokens.
+
+If acquiring applicable rate limiting tokens takes longer than this time, the request will not be
+forwarded and will respond with a 429 error.
+
+This parameter is mandatory if the `rate-limiting` section is present.
+
+This is specified in the form:
+
+`timeout millis=TIME`, where `TIME` is an unsigned integer
+
+**Implementation Note**: The rate limiting timeout is a temporary implementation detail to limit
+requests from waiting "too long" to obtain their tokens. In the future, it is planned to modify
+the leaky bucket implementation to instead set an upper limit on the maximum "token debt", or
+how many requests are waiting for a token. Instead of waiting and timing out, requests will instead
+be given immediate feedback that the rate limiting is overcongested, and return a 429 error immediately,
+instead of after a given timeout.
+
+When this change occurs, the `timeout` parameter will be deprecated, and replaced with a `max-token-debt`
+parameter instead.
+
+#### `services.$NAME.rate-limiting.rule`
+
+Rules are used to specify rate limiting parameters, and applicability of rules to a given request.
+
+##### Leaky Buckets
+
+Rate limiting in River uses a [Leaky Bucket] model for determining whether a request can be served
+immediately, or if it should be delayed (or rejected). For a given rule, a "bucket" of "tokens"
+is created, where one "token" is required for each request.
+
+The bucket for a rule starts with a configurable `tokens-per-bucket` number. When a request arrives,
+it attempts to take one token from the bucket. If one is available, it is served immediately. Otherwise,
+the request waits in a first-in, first-out order for a token to become available.
+
+The bucket is refilled at a configurable rate, specified by `refill-rate-ms`, and adds a configurable
+number of tokens specified by `refill-qty`. The number of tokens in the bucket will never exceed the
+initial `tokens-per-bucket` number.
+
+Once a refill occurs, requests may become ready if a token becomes available.
+
+[Leaky Bucket]: https://en.wikipedia.org/wiki/Leaky_bucket
+
+##### How many buckets?
+
+Some rules require many buckets. For example, rules based on the source IP address will create a bucket
+for each unique IP address of downstream users.
+
+However, each of these buckets require space to contain the metadata, and to avoid unbounded growth,
+we allow for a configurable `max-buckets` number, which serves to influence the total memory required
+for storing buckets. This uses an [Adaptive Replacement Cache]
+to allow for concurrent access to these buckets, as well as the ability to automatically buckets that
+are not actively being used (somewhat similar to an LRU or "Least Recently Used" cache).
+
+[Adaptive Replacement Cache]: https://docs.rs/concread/latest/concread/arcache/index.html
+
+There is a trade off here: The larger `max-buckets` is, the longer that River can "remember" a bucket
+for a given factor, such as specific IP addresses. However, it also requires more resident memory to
+retain this information.
+
+If `max-buckets` is set too low, then buckets will be "evicted" from the cache, meaning that subsequent
+requests matching that bucket will require the creation of a new bucket (with a full set of tokens),
+potentially defeating the objective of accurate rate limiting.
+
+##### Gotta claim 'em all
+
+When multiple rules apply to a single request, for example rules based on both source IP address,
+and the URI path, then a request must claim ALL applicable tokens before proceeding. If a given IP
+address is making it's first request, but to a URI that that has an empty bucket, it will immediately
+obtain the IP address token, but be forced to wait until the URI token has been claimed
+
+##### Kinds of Rules
+
+Currently two kinds of rules are supported:
+
+* `kind="source-ip"` - this tracks the IP address of the requestor. A unique bucket will be created for
+  the IPv4 or IPv6 address of the requestor.
+* `kind="uri" pattern="REGEX"` - This tracks the URI path of the request, such as `static/images/example.jpg`
+    * If the request's URI path matches the provided `REGEX`, the full URI path will be assigned to a given
+      bucket
+    * For example, if the regex `static/.*` was provided:
+        * `index.html` would not match this rule, and would not require obtaining a token
+        * `static/images/example.jpg` would match this rule, and would require obtaining a token
+        * `static/styles/example.css` would also match this rule, and would require obtaining a token
+        * Note that `static/images/example.jpg` and `static/styles/example.css` would each have a UNIQUE
+          bucket.
+
 ### `services.$NAME.file-server`
 
 This section is only allowed when `connectors` and `path-control` are not present.

From 9f972bbd056a849984c2ceab03cc0bc0b74e219e Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Thu, 29 Aug 2024 11:59:48 +0200
Subject: [PATCH 13/23] Apply suggestions from code review

Co-authored-by: Brandon Pitman <brandon.pitman@gmail.com>
---
 user-manual/src/config/kdl.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/user-manual/src/config/kdl.md b/user-manual/src/config/kdl.md
index eda9baf..ef5e16a 100644
--- a/user-manual/src/config/kdl.md
+++ b/user-manual/src/config/kdl.md
@@ -338,12 +338,12 @@ Once a refill occurs, requests may become ready if a token becomes available.
 ##### How many buckets?
 
 Some rules require many buckets. For example, rules based on the source IP address will create a bucket
-for each unique IP address of downstream users.
+for each unique source IP address observed in a request.
 
 However, each of these buckets require space to contain the metadata, and to avoid unbounded growth,
 we allow for a configurable `max-buckets` number, which serves to influence the total memory required
 for storing buckets. This uses an [Adaptive Replacement Cache]
-to allow for concurrent access to these buckets, as well as the ability to automatically buckets that
+to allow for concurrent access to these buckets, as well as the ability to automatically evict buckets that
 are not actively being used (somewhat similar to an LRU or "Least Recently Used" cache).
 
 [Adaptive Replacement Cache]: https://docs.rs/concread/latest/concread/arcache/index.html

From 1e72764471798d4f3a99f5a6391bc4c8fbf6c29d Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Thu, 29 Aug 2024 12:45:49 +0200
Subject: [PATCH 14/23] Switch to "now or never" behavior instead of managing
 timeouts

---
 source/river/assets/test-config.kdl     |   4 -
 source/river/src/config/internal.rs     |   1 -
 source/river/src/config/kdl/mod.rs      |  68 +++------
 source/river/src/config/kdl/test.rs     |   6 +-
 source/river/src/proxy/mod.rs           |  71 +++------
 source/river/src/proxy/rate_limiting.rs | 188 ++++++++----------------
 6 files changed, 99 insertions(+), 239 deletions(-)

diff --git a/source/river/assets/test-config.kdl b/source/river/assets/test-config.kdl
index f55b8be..7875226 100644
--- a/source/river/assets/test-config.kdl
+++ b/source/river/assets/test-config.kdl
@@ -45,10 +45,6 @@ services {
         // BOTH the `source-ip` rule (from the `1.2.3.4` bucket), AND the `uri` rule (from the
         // `/static/style.css` bucket)
         rate-limiting {
-            // If a request takes more than 100ms to obtain all necessary tokens, a 429 error
-            // will be returned
-            timeout millis=100
-
             // This rate limiting rule is based on the source IP address
             //
             // * Up to the last 4000 IP addresses will be remembered
diff --git a/source/river/src/config/internal.rs b/source/river/src/config/internal.rs
index e496c80..074e0b7 100644
--- a/source/river/src/config/internal.rs
+++ b/source/river/src/config/internal.rs
@@ -115,7 +115,6 @@ impl Config {
 ///
 #[derive(Debug, Default, Clone, PartialEq)]
 pub struct RateLimitingConfig {
-    pub(crate) timeout_ms: Option<usize>,
     pub(crate) rules: Vec<RaterInstanceConfig>,
 }
 
diff --git a/source/river/src/config/kdl/mod.rs b/source/river/src/config/kdl/mod.rs
index 75ddead..dc7c525 100644
--- a/source/river/src/config/kdl/mod.rs
+++ b/source/river/src/config/kdl/mod.rs
@@ -4,23 +4,21 @@ use std::{
     path::PathBuf,
 };
 
-use kdl::{KdlDocument, KdlEntry, KdlNode, KdlValue};
-use miette::{bail, Diagnostic, SourceSpan};
-use pingora::{protocols::ALPN, upstreams::peer::HttpPeer};
-use regex::Regex;
-
 use crate::{
     config::internal::{
         Config, DiscoveryKind, FileServerConfig, HealthCheckKind, ListenerConfig, ListenerKind,
         PathControl, ProxyConfig, SelectionKind, TlsConfig, UpstreamOptions,
     },
     proxy::{
-        rate_limiting::{RaterConfig, RaterInstanceConfig, RequestKeyKind},
+        rate_limiting::{RaterConfig, RaterInstanceConfig, RegexShim, RequestKeyKind},
         request_selector::{
             null_selector, source_addr_and_uri_path_selector, uri_path_selector, RequestSelector,
         },
     },
 };
+use kdl::{KdlDocument, KdlEntry, KdlNode, KdlValue};
+use miette::{bail, Diagnostic, SourceSpan};
+use pingora::{protocols::ALPN, upstreams::peer::HttpPeer};
 
 use super::internal::RateLimitingConfig;
 
@@ -295,50 +293,18 @@ fn extract_service(
     if let Some(rl_node) = utils::optional_child_doc(doc, node, "rate-limiting") {
         let nodes = utils::data_nodes(doc, rl_node)?;
         for (node, name, args) in nodes.iter() {
-            match *name {
-                "timeout" => {
-                    let vals = utils::str_value_args(doc, args)?;
-                    let [("millis", val)] = vals.as_slice() else {
-                        return Err(Bad::docspan(
-                            "Unknown argument, missing 'millis'",
-                            doc,
-                            node.span(),
-                        )
-                        .into());
-                    };
-                    let Some(millis) = val.value().as_i64().and_then(|v| usize::try_from(v).ok())
-                    else {
-                        return Err(Bad::docspan(
-                            "'millis' value should be a number",
-                            doc,
-                            node.span(),
-                        )
-                        .into());
-                    };
-                    let old = rl.timeout_ms.replace(millis);
-                    if old.is_some() {
-                        return Err(Bad::docspan(
-                            "Can't specify 'timeout' twice!",
-                            doc,
-                            node.span(),
-                        )
-                        .into());
-                    }
-                }
-                "rule" => {
-                    let vals = utils::str_value_args(doc, args)?;
-                    let valslice = vals
-                        .iter()
-                        .map(|(k, v)| (*k, v.value()))
-                        .collect::<BTreeMap<&str, &KdlValue>>();
-                    rl.rules
-                        .push(make_rate_limiter(threads_per_service, doc, node, valslice)?);
-                }
-                other => {
-                    return Err(
-                        Bad::docspan(format!("Unknown name: '{other}'"), doc, node.span()).into(),
-                    );
-                }
+            if *name == "rule" {
+                let vals = utils::str_value_args(doc, args)?;
+                let valslice = vals
+                    .iter()
+                    .map(|(k, v)| (*k, v.value()))
+                    .collect::<BTreeMap<&str, &KdlValue>>();
+                rl.rules
+                    .push(make_rate_limiter(threads_per_service, doc, node, valslice)?);
+            } else {
+                return Err(
+                    Bad::docspan(format!("Unknown name: '{name}'"), doc, node.span()).into(),
+                );
             }
         }
     }
@@ -413,7 +379,7 @@ fn make_rate_limiter(
         }),
         "uri" => {
             let pattern = take_str("pattern")?;
-            let Ok(pattern) = Regex::new(pattern) else {
+            let Ok(pattern) = RegexShim::new(pattern) else {
                 return Err(Bad::docspan(
                     format!("'{pattern} should be a valid regular expression"),
                     doc,
diff --git a/source/river/src/config/kdl/test.rs b/source/river/src/config/kdl/test.rs
index 1bdbe1b..0e81586 100644
--- a/source/river/src/config/kdl/test.rs
+++ b/source/river/src/config/kdl/test.rs
@@ -8,7 +8,7 @@ use crate::{
         FileServerConfig, ListenerConfig, ListenerKind, ProxyConfig, UpstreamOptions,
     },
     proxy::{
-        rate_limiting::{RaterConfig, RaterInstanceConfig},
+        rate_limiting::{RaterConfig, RaterInstanceConfig, RegexShim},
         request_selector::uri_path_selector,
     },
 };
@@ -92,7 +92,6 @@ fn load_test() {
                     discovery: crate::config::internal::DiscoveryKind::Static,
                 },
                 rate_limiting: crate::config::internal::RateLimitingConfig {
-                    timeout_ms: Some(100),
                     rules: vec![
                         RaterInstanceConfig {
                             rater_cfg: RaterConfig {
@@ -113,7 +112,7 @@ fn load_test() {
                                 refill_qty: 5,
                             },
                             kind: crate::proxy::rate_limiting::RequestKeyKind::Uri {
-                                pattern: Regex::new("static/.*").unwrap(),
+                                pattern: RegexShim::new("static/.*").unwrap(),
                             },
                         },
                     ],
@@ -136,7 +135,6 @@ fn load_test() {
                 },
                 upstream_options: UpstreamOptions::default(),
                 rate_limiting: crate::config::internal::RateLimitingConfig {
-                    timeout_ms: None,
                     rules: vec![],
                 },
             },
diff --git a/source/river/src/proxy/mod.rs b/source/river/src/proxy/mod.rs
index c846666..51e28f2 100644
--- a/source/river/src/proxy/mod.rs
+++ b/source/river/src/proxy/mod.rs
@@ -4,10 +4,7 @@
 //! this includes creation of HTTP proxy services, as well as Path Control
 //! modifiers.
 
-use std::{
-    collections::{BTreeMap, BTreeSet},
-    time::Duration,
-};
+use std::collections::{BTreeMap, BTreeSet};
 
 use async_trait::async_trait;
 use futures_util::FutureExt;
@@ -23,7 +20,6 @@ use pingora_load_balancing::{
     Backend, Backends, LoadBalancer,
 };
 use pingora_proxy::{ProxyHttp, Session};
-use tokio::task::JoinSet;
 
 use crate::{
     config::internal::{PathControl, ProxyConfig, SelectionKind},
@@ -35,7 +31,7 @@ use crate::{
 };
 
 use self::{
-    rate_limiting::{Rater, RaterInstance, RequestKeyKind, TicketError},
+    rate_limiting::{Rater, RaterInstance, RequestKeyKind},
     request_filters::RequestFilterMod,
 };
 
@@ -62,7 +58,6 @@ pub struct RiverProxyService<BS: BackendSelection> {
     /// Load Balancer
     pub load_balancer: LoadBalancer<BS>,
     pub request_selector: RequestSelector,
-    pub rate_limiting_timeout: Duration,
     pub rate_limiters: RateLimiters,
 }
 
@@ -135,11 +130,6 @@ where
                 modifiers,
                 load_balancer: upstreams,
                 request_selector: conf.upstream_options.selector,
-                rate_limiting_timeout: conf
-                    .rate_limiting
-                    .timeout_ms
-                    .map(|v| Duration::from_millis(v as u64))
-                    .unwrap_or(Duration::from_secs(1)),
                 rate_limiters: RateLimiters {
                     request_filter_stage,
                 },
@@ -282,48 +272,31 @@ where
         // First: do rate limiting at this stage - quickly check if there are none and skip over
         // entirely if so
         if !self.rate_limiters.request_filter_stage.is_empty() {
-            // Use a joinset to capture all applicable rate limiters
+            // Attempt to get all tokens
+            //
+            // TODO: If https://github.com/udoprog/leaky-bucket/issues/17 is resolved we could
+            // remember the buckets that we did get approved for, and "return" the unused tokens.
+            //
+            // For now, if some tickets succeed but subsequent tickets fail, the preceeding
+            // approved tokens are just "burned".
             //
-            // TODO: avoid spawning one task per limiter?
-            let mut set: JoinSet<Result<(), TicketError>> = JoinSet::new();
+            // TODO: If https://github.com/udoprog/leaky-bucket/issues/34 is resolved we could
+            // support a "max debt" number, allowing us to delay if acquisition of the token
+            // would happen soon-ish, instead of immediately 429-ing if the token we need is
+            // about to become available.
             for limiter in self.rate_limiters.request_filter_stage.iter() {
                 if let Some(ticket) = limiter.get_ticket(session) {
-                    set.spawn(ticket.wait());
-                }
-            }
-
-            // This future returns when either ALL tickets have been obtained, OR
-            // when any ticket returns an error
-            let total_ticket_fut = async move {
-                while let Some(res) = set.join_next().await {
-                    match res {
-                        Ok(Ok(())) => {}
-                        Ok(Err(_e)) => return Err(()),
-                        Err(_) => return Err(()),
+                    match ticket.now_or_never() {
+                        rate_limiting::Outcome::Approved => {
+                            // Approved, move on
+                        }
+                        rate_limiting::Outcome::Declined => {
+                            tracing::trace!("Rejecting due to rate limiting failure");
+                            session.downstream_session.respond_error(429).await;
+                            return Ok(true);
+                        }
                     }
                 }
-                Ok(())
-            };
-
-            // Cap the waiting time in case all buckets are too full
-            //
-            // TODO: This is a workaround for not having the ability to immediately bail
-            // if buckets are "too full"!
-            let res = tokio::time::timeout(self.rate_limiting_timeout, total_ticket_fut).await;
-            match res {
-                Ok(Ok(())) => {
-                    // Rate limiter completed!
-                }
-                Ok(Err(())) => {
-                    tracing::trace!("Rejecting due to rate limiting failure");
-                    session.downstream_session.respond_error(429).await;
-                    return Ok(true);
-                }
-                Err(_) => {
-                    tracing::trace!("Rejecting due to rate limiting timeout");
-                    session.downstream_session.respond_error(429).await;
-                    return Ok(true);
-                }
             }
         }
 
diff --git a/source/river/src/proxy/rate_limiting.rs b/source/river/src/proxy/rate_limiting.rs
index 044777b..ddbd84c 100644
--- a/source/river/src/proxy/rate_limiting.rs
+++ b/source/river/src/proxy/rate_limiting.rs
@@ -4,7 +4,7 @@
 //!
 //! See the [`Rater`] structure for more details
 
-use std::{fmt::Debug, hash::Hash, net::IpAddr, sync::Arc, time::Duration};
+use std::{fmt::Debug, hash::Hash, net::IpAddr, ops::Deref, sync::Arc, time::Duration};
 
 use concread::arcache::{ARCache, ARCacheBuilder};
 use leaky_bucket::RateLimiter;
@@ -12,28 +12,36 @@ use pandora_module_utils::pingora::SocketAddr;
 use pingora_proxy::Session;
 use regex::Regex;
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq)]
 pub enum RequestKeyKind {
     SourceIp,
     #[allow(dead_code)]
     DestIp,
     Uri {
-        pattern: Regex,
+        pattern: RegexShim,
     },
 }
 
-impl PartialEq for RequestKeyKind {
+#[derive(Debug, Clone)]
+pub struct RegexShim(pub Regex);
+
+impl PartialEq for RegexShim {
     fn eq(&self, other: &Self) -> bool {
-        match (self, other) {
-            (Self::SourceIp, Self::SourceIp) => true,
-            (Self::SourceIp, _) => false,
-            (Self::DestIp, Self::DestIp) => true,
-            (Self::DestIp, _) => false,
-            (Self::Uri { pattern: pattern1 }, Self::Uri { pattern: pattern2 }) => {
-                pattern1.as_str() == pattern2.as_str()
-            }
-            (Self::Uri { .. }, _) => false,
-        }
+        self.0.as_str().eq(other.0.as_str())
+    }
+}
+
+impl Deref for RegexShim {
+    type Target = Regex;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl RegexShim {
+    pub fn new(pattern: &str) -> Result<Self, regex::Error> {
+        Ok(Self(Regex::new(pattern)?))
     }
 }
 
@@ -57,6 +65,12 @@ pub struct RaterInstanceConfig {
     pub kind: RequestKeyKind,
 }
 
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub enum Outcome {
+    Approved,
+    Declined,
+}
+
 impl RaterInstance {
     pub fn get_ticket(&self, session: &mut Session) -> Option<Ticket> {
         let key = self.get_key(session)?;
@@ -271,37 +285,21 @@ pub struct Ticket {
 }
 
 impl Ticket {
-    /// Wait for our "turn" granted by the leaky bucket
-    ///
-    /// * If the bucket has a token available, `Ok(())` will be returned immediately
-    /// * If the bucket does not have a token available, this function will yield until
-    ///   a token is refilled
-    /// * If the bucket has too many pending waiters, then `Err(TicketError)` will be
-    ///   returned immediately
-    ///
-    /// NOTE: In the future, we would like to be able to return immediately if there
-    /// are too many pending requests at once, instead of queueing requests that are
-    /// going to end up timing out anyway.
-    ///
-    /// However, this is not supported by the [`leaky-bucket`] crate today, enqueueing
-    /// will always succeed, and we will handle this one layer up by adding a timeout
-    /// to requests.
-    ///
-    /// This should be fixed in the future as a performance optimization, but for now
-    /// we give ourselves the API surface to make this change with minimal fuss in
-    /// in the future.
-    pub async fn wait(self) -> Result<(), TicketError> {
-        self.limiter.acquire_owned(1).await;
-        Ok(())
+    /// Try to get a token immediately from the bucket.
+    pub fn now_or_never(self) -> Outcome {
+        if self.limiter.try_acquire(1) {
+            Outcome::Approved
+        } else {
+            Outcome::Declined
+        }
     }
 }
 
 #[cfg(test)]
 mod test {
-    use tokio::sync::mpsc::channel;
-
     use super::*;
-    use std::{ops::Add, time::Instant};
+    use std::time::Instant;
+    use tokio::time::interval;
 
     #[tokio::test]
     async fn smoke() {
@@ -314,102 +312,32 @@ mod test {
             refill_qty: 1,
         };
 
-        let rater = Arc::new(Rater::new(config));
-
+        let rater = Arc::new(Rater::new(config.clone()));
+        let mut sleeper = interval(Duration::from_millis(6));
+        let start = Instant::now();
+        let mut approved = 0;
         for i in 0..100 {
-            let start = Instant::now();
-            let bucket = rater.get_ticket("bob".to_string());
-            bucket.wait().await.unwrap();
-            tracing::info!("Success {i} took {:?}!", start.elapsed());
-        }
-    }
+            sleeper.tick().await;
+            let ticket = rater.get_ticket("bob".to_string());
 
-    #[tokio::test]
-    async fn concurrent_fewer() {
-        let _ = tracing_subscriber::fmt::try_init();
-        let config = RaterConfig {
-            threads: 8,
-            max_buckets: 16,
-            max_tokens_per_bucket: 3,
-            refill_interval_millis: 10,
-            refill_qty: 1,
-        };
-        let rater = Arc::new(Rater::new(config));
-        let mut handles = vec![];
-
-        for thread in 0..8 {
-            let rater = rater.clone();
-            let hdl = tokio::task::spawn(async move {
-                for i in 1..32 {
-                    let name = fizzbuzz(i);
-                    let start = Instant::now();
-                    let bucket = rater.get_ticket(name.clone());
-                    bucket.wait().await.unwrap();
-                    tracing::info!("{thread}:{i}:{name} took {:?}!", start.elapsed());
+            match ticket.now_or_never() {
+                Outcome::Approved => {
+                    approved += 1;
+                    tracing::info!("Approved {i}!")
                 }
-            });
-            handles.push(hdl);
-        }
-
-        for h in handles.into_iter() {
-            h.await.unwrap();
-        }
-    }
-
-    #[tokio::test]
-    async fn concurrent_more() {
-        let _ = tracing_subscriber::fmt::try_init();
-        let config = RaterConfig {
-            threads: 8,
-            max_buckets: 128,
-            max_tokens_per_bucket: 3,
-            refill_interval_millis: 10,
-            refill_qty: 1,
-        };
-        let rater = Arc::new(Rater::new(config));
-        let (htx, mut hrx) = channel(1024);
-        let deadline = Instant::now().add(Duration::from_millis(10));
-
-        for thread in 0..8 {
-            let rater = rater.clone();
-            let htxin = htx.clone();
-            let hdl = tokio::task::spawn(async move {
-                for i in 1..32 {
-                    let name = fizzbuzz(i);
-                    let rater = rater.clone();
-                    let hdl = tokio::task::spawn(async move {
-                        tokio::time::sleep_until(deadline.into()).await;
-                        let start = Instant::now();
-                        let bucket = rater.get_ticket(name.clone());
-                        let res =
-                            tokio::time::timeout(Duration::from_millis(100), bucket.wait()).await;
-                        match res {
-                            Ok(Ok(())) => {
-                                tracing::info!("{thread}:{i}:{name} took {:?}!", start.elapsed())
-                            }
-                            Ok(Err(_)) => unreachable!(),
-                            Err(_) => tracing::warn!("{thread}:{i}:{name} gave up after 100ms!"),
-                        }
-                    });
-                    htxin.send(hdl).await.unwrap();
-                }
-                drop(htxin);
-            });
-            htx.send(hdl).await.unwrap();
+                Outcome::Declined => tracing::info!("Declined {i}!"),
+            }
         }
-        drop(htx);
+        let duration = start.elapsed();
+        let duration = duration.as_secs_f64();
+        let approved = approved as f64;
 
-        while let Some(hdl) = hrx.recv().await {
-            hdl.await.unwrap();
-        }
-    }
+        let expected_rate = 1000.0f64 / config.refill_interval_millis as f64;
+        let expected_ttl = (duration * expected_rate) + config.max_tokens_per_bucket as f64;
 
-    fn fizzbuzz(i: usize) -> String {
-        match i {
-            i if i % 15 == 0 => "fizzbuzz".to_string(),
-            i if i % 3 == 0 => "fizz".to_string(),
-            i if i % 5 == 0 => "buzz".to_string(),
-            i => format!("{i}"),
-        }
+        // Did we get +/-10% of the expected number of approvals?
+        tracing::info!(expected_ttl, actual_ttl = approved, "Rates");
+        assert!(approved > (expected_ttl * 0.9f64));
+        assert!(approved < (expected_ttl * 1.1f64));
     }
 }

From 6220dd53d8820d87eeea8af64241f2245f77fade Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Thu, 29 Aug 2024 12:49:00 +0200
Subject: [PATCH 15/23] Remove references to timeout and waiting for tokens to
 become available

---
 user-manual/src/config/kdl.md | 33 +++++----------------------------
 1 file changed, 5 insertions(+), 28 deletions(-)

diff --git a/user-manual/src/config/kdl.md b/user-manual/src/config/kdl.md
index ef5e16a..f121420 100644
--- a/user-manual/src/config/kdl.md
+++ b/user-manual/src/config/kdl.md
@@ -290,29 +290,6 @@ rate-limiting {
 }
 ```
 
-#### `services.$NAME.rate-limiting.timeout`
-
-The `timeout` parameter is used to set the total timeout for acquiring all rate limiting tokens.
-
-If acquiring applicable rate limiting tokens takes longer than this time, the request will not be
-forwarded and will respond with a 429 error.
-
-This parameter is mandatory if the `rate-limiting` section is present.
-
-This is specified in the form:
-
-`timeout millis=TIME`, where `TIME` is an unsigned integer
-
-**Implementation Note**: The rate limiting timeout is a temporary implementation detail to limit
-requests from waiting "too long" to obtain their tokens. In the future, it is planned to modify
-the leaky bucket implementation to instead set an upper limit on the maximum "token debt", or
-how many requests are waiting for a token. Instead of waiting and timing out, requests will instead
-be given immediate feedback that the rate limiting is overcongested, and return a 429 error immediately,
-instead of after a given timeout.
-
-When this change occurs, the `timeout` parameter will be deprecated, and replaced with a `max-token-debt`
-parameter instead.
-
 #### `services.$NAME.rate-limiting.rule`
 
 Rules are used to specify rate limiting parameters, and applicability of rules to a given request.
@@ -320,18 +297,18 @@ Rules are used to specify rate limiting parameters, and applicability of rules t
 ##### Leaky Buckets
 
 Rate limiting in River uses a [Leaky Bucket] model for determining whether a request can be served
-immediately, or if it should be delayed (or rejected). For a given rule, a "bucket" of "tokens"
-is created, where one "token" is required for each request.
+immediately, or if it should be rejected. For a given rule, a "bucket" of "tokens" is created, where
+one "token" is required for each request.
 
 The bucket for a rule starts with a configurable `tokens-per-bucket` number. When a request arrives,
 it attempts to take one token from the bucket. If one is available, it is served immediately. Otherwise,
-the request waits in a first-in, first-out order for a token to become available.
+the request is rejected immediately.
 
 The bucket is refilled at a configurable rate, specified by `refill-rate-ms`, and adds a configurable
 number of tokens specified by `refill-qty`. The number of tokens in the bucket will never exceed the
 initial `tokens-per-bucket` number.
 
-Once a refill occurs, requests may become ready if a token becomes available.
+Once a refill occurs, additional requests may be served.
 
 [Leaky Bucket]: https://en.wikipedia.org/wiki/Leaky_bucket
 
@@ -361,7 +338,7 @@ potentially defeating the objective of accurate rate limiting.
 When multiple rules apply to a single request, for example rules based on both source IP address,
 and the URI path, then a request must claim ALL applicable tokens before proceeding. If a given IP
 address is making it's first request, but to a URI that that has an empty bucket, it will immediately
-obtain the IP address token, but be forced to wait until the URI token has been claimed
+obtain the IP address token, but the request will be rejected as the URI bucket claim failed.
 
 ##### Kinds of Rules
 

From 92d5d86d2dc8dfe5e753a057e44bac3ddc79a314 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Thu, 29 Aug 2024 12:50:39 +0200
Subject: [PATCH 16/23] Cargo fmt

---
 source/river/src/config/kdl/test.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/source/river/src/config/kdl/test.rs b/source/river/src/config/kdl/test.rs
index 0e81586..09fd612 100644
--- a/source/river/src/config/kdl/test.rs
+++ b/source/river/src/config/kdl/test.rs
@@ -134,9 +134,7 @@ fn load_test() {
                     request_filters: vec![],
                 },
                 upstream_options: UpstreamOptions::default(),
-                rate_limiting: crate::config::internal::RateLimitingConfig {
-                    rules: vec![],
-                },
+                rate_limiting: crate::config::internal::RateLimitingConfig { rules: vec![] },
             },
         ],
         file_servers: vec![FileServerConfig {

From 769eda63e89dfd6954379f5b0abc20dedd7e7d93 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Thu, 29 Aug 2024 13:12:22 +0200
Subject: [PATCH 17/23] Break up rate limiting, make current rate limiting be
 "multi" flavored

---
 source/river/src/config/internal.rs           |  2 +-
 source/river/src/config/kdl/mod.rs            |  5 ++-
 source/river/src/proxy/mod.rs                 |  2 +-
 source/river/src/proxy/rate_limiting/mod.rs   | 34 +++++++++++++++++++
 .../multi.rs}                                 | 34 ++-----------------
 5 files changed, 43 insertions(+), 34 deletions(-)
 create mode 100644 source/river/src/proxy/rate_limiting/mod.rs
 rename source/river/src/proxy/{rate_limiting.rs => rate_limiting/multi.rs} (94%)

diff --git a/source/river/src/config/internal.rs b/source/river/src/config/internal.rs
index 074e0b7..b63f3ec 100644
--- a/source/river/src/config/internal.rs
+++ b/source/river/src/config/internal.rs
@@ -15,7 +15,7 @@ use pingora::{
 use tracing::warn;
 
 use crate::proxy::{
-    rate_limiting::RaterInstanceConfig,
+    rate_limiting::multi::RaterInstanceConfig,
     request_selector::{null_selector, RequestSelector},
 };
 
diff --git a/source/river/src/config/kdl/mod.rs b/source/river/src/config/kdl/mod.rs
index dc7c525..3fe209d 100644
--- a/source/river/src/config/kdl/mod.rs
+++ b/source/river/src/config/kdl/mod.rs
@@ -10,7 +10,10 @@ use crate::{
         PathControl, ProxyConfig, SelectionKind, TlsConfig, UpstreamOptions,
     },
     proxy::{
-        rate_limiting::{RaterConfig, RaterInstanceConfig, RegexShim, RequestKeyKind},
+        rate_limiting::{
+            multi::{RaterConfig, RaterInstanceConfig, RequestKeyKind},
+            RegexShim,
+        },
         request_selector::{
             null_selector, source_addr_and_uri_path_selector, uri_path_selector, RequestSelector,
         },
diff --git a/source/river/src/proxy/mod.rs b/source/river/src/proxy/mod.rs
index 51e28f2..d771c7e 100644
--- a/source/river/src/proxy/mod.rs
+++ b/source/river/src/proxy/mod.rs
@@ -31,7 +31,7 @@ use crate::{
 };
 
 use self::{
-    rate_limiting::{Rater, RaterInstance, RequestKeyKind},
+    rate_limiting::multi::{Rater, RaterInstance, RequestKeyKind},
     request_filters::RequestFilterMod,
 };
 
diff --git a/source/river/src/proxy/rate_limiting/mod.rs b/source/river/src/proxy/rate_limiting/mod.rs
new file mode 100644
index 0000000..fc53fbc
--- /dev/null
+++ b/source/river/src/proxy/rate_limiting/mod.rs
@@ -0,0 +1,34 @@
+use std::ops::Deref;
+
+use regex::Regex;
+
+pub mod multi;
+
+#[derive(Debug, Clone)]
+pub struct RegexShim(pub Regex);
+
+impl PartialEq for RegexShim {
+    fn eq(&self, other: &Self) -> bool {
+        self.0.as_str().eq(other.0.as_str())
+    }
+}
+
+impl Deref for RegexShim {
+    type Target = Regex;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl RegexShim {
+    pub fn new(pattern: &str) -> Result<Self, regex::Error> {
+        Ok(Self(Regex::new(pattern)?))
+    }
+}
+
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub enum Outcome {
+    Approved,
+    Declined,
+}
diff --git a/source/river/src/proxy/rate_limiting.rs b/source/river/src/proxy/rate_limiting/multi.rs
similarity index 94%
rename from source/river/src/proxy/rate_limiting.rs
rename to source/river/src/proxy/rate_limiting/multi.rs
index ddbd84c..06f4854 100644
--- a/source/river/src/proxy/rate_limiting.rs
+++ b/source/river/src/proxy/rate_limiting/multi.rs
@@ -4,13 +4,14 @@
 //!
 //! See the [`Rater`] structure for more details
 
-use std::{fmt::Debug, hash::Hash, net::IpAddr, ops::Deref, sync::Arc, time::Duration};
+use std::{fmt::Debug, hash::Hash, net::IpAddr, sync::Arc, time::Duration};
 
 use concread::arcache::{ARCache, ARCacheBuilder};
 use leaky_bucket::RateLimiter;
 use pandora_module_utils::pingora::SocketAddr;
 use pingora_proxy::Session;
-use regex::Regex;
+
+use super::{Outcome, RegexShim};
 
 #[derive(Debug, Clone, PartialEq)]
 pub enum RequestKeyKind {
@@ -22,29 +23,6 @@ pub enum RequestKeyKind {
     },
 }
 
-#[derive(Debug, Clone)]
-pub struct RegexShim(pub Regex);
-
-impl PartialEq for RegexShim {
-    fn eq(&self, other: &Self) -> bool {
-        self.0.as_str().eq(other.0.as_str())
-    }
-}
-
-impl Deref for RegexShim {
-    type Target = Regex;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl RegexShim {
-    pub fn new(pattern: &str) -> Result<Self, regex::Error> {
-        Ok(Self(Regex::new(pattern)?))
-    }
-}
-
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum RequestKey {
     Source(IpAddr),
@@ -65,12 +43,6 @@ pub struct RaterInstanceConfig {
     pub kind: RequestKeyKind,
 }
 
-#[derive(Debug, PartialEq, Clone, Copy)]
-pub enum Outcome {
-    Approved,
-    Declined,
-}
-
 impl RaterInstance {
     pub fn get_ticket(&self, session: &mut Session) -> Option<Ticket> {
         let key = self.get_key(session)?;

From 82ccad577aed98d424105b0b94ca9bba11f2a221 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Thu, 29 Aug 2024 13:56:50 +0200
Subject: [PATCH 18/23] Reorganize a bit

---
 source/river/src/config/internal.rs           |  2 +-
 source/river/src/config/kdl/mod.rs            |  9 +--
 source/river/src/proxy/mod.rs                 | 27 ++++++---
 source/river/src/proxy/rate_limiting/mod.rs   | 33 +++++++++++
 source/river/src/proxy/rate_limiting/multi.rs | 56 ++++---------------
 5 files changed, 67 insertions(+), 60 deletions(-)

diff --git a/source/river/src/config/internal.rs b/source/river/src/config/internal.rs
index b63f3ec..074e0b7 100644
--- a/source/river/src/config/internal.rs
+++ b/source/river/src/config/internal.rs
@@ -15,7 +15,7 @@ use pingora::{
 use tracing::warn;
 
 use crate::proxy::{
-    rate_limiting::multi::RaterInstanceConfig,
+    rate_limiting::RaterInstanceConfig,
     request_selector::{null_selector, RequestSelector},
 };
 
diff --git a/source/river/src/config/kdl/mod.rs b/source/river/src/config/kdl/mod.rs
index 3fe209d..e540d81 100644
--- a/source/river/src/config/kdl/mod.rs
+++ b/source/river/src/config/kdl/mod.rs
@@ -10,10 +10,7 @@ use crate::{
         PathControl, ProxyConfig, SelectionKind, TlsConfig, UpstreamOptions,
     },
     proxy::{
-        rate_limiting::{
-            multi::{RaterConfig, RaterInstanceConfig, RequestKeyKind},
-            RegexShim,
-        },
+        rate_limiting::{AllRequestKeyKind, RaterConfig, RaterInstanceConfig, RegexShim},
         request_selector::{
             null_selector, source_addr_and_uri_path_selector, uri_path_selector, RequestSelector,
         },
@@ -378,7 +375,7 @@ fn make_rate_limiter(
     match kind {
         "source-ip" => Ok(RaterInstanceConfig {
             rater_cfg,
-            kind: RequestKeyKind::SourceIp,
+            kind: AllRequestKeyKind::SourceIp,
         }),
         "uri" => {
             let pattern = take_str("pattern")?;
@@ -392,7 +389,7 @@ fn make_rate_limiter(
             };
             Ok(RaterInstanceConfig {
                 rater_cfg,
-                kind: RequestKeyKind::Uri { pattern },
+                kind: AllRequestKeyKind::Uri { pattern },
             })
         }
         other => Err(Bad::docspan(
diff --git a/source/river/src/proxy/mod.rs b/source/river/src/proxy/mod.rs
index d771c7e..1c82b34 100644
--- a/source/river/src/proxy/mod.rs
+++ b/source/river/src/proxy/mod.rs
@@ -31,7 +31,10 @@ use crate::{
 };
 
 use self::{
-    rate_limiting::multi::{Rater, RaterInstance, RequestKeyKind},
+    rate_limiting::{
+        multi::{MultiRequestKeyKind, Rater, RaterInstance},
+        AllRequestKeyKind,
+    },
     request_filters::RequestFilterMod,
 };
 
@@ -109,18 +112,26 @@ where
             .expect("static should not error");
         // end of TODO
 
+        // NOTE: Using a silly filter map here because we might have rules in the future
+        // that can't be matched until later stages, such as if we wanted rate limiting
+        // buckets for each upstream that we choose - I want the match statement in this
+        // to no longer compile when we add more RequestKeyKinds.
+        #[allow(clippy::unnecessary_filter_map)]
         let request_filter_stage = conf
             .rate_limiting
             .rules
             .iter()
             .filter_map(|cfg| match &cfg.kind {
-                yes @ RequestKeyKind::SourceIp | yes @ RequestKeyKind::Uri { .. } => {
-                    Some(RaterInstance {
-                        rater: Rater::new(cfg.rater_cfg.clone()),
-                        kind: yes.clone(),
-                    })
-                }
-                RequestKeyKind::DestIp => None,
+                AllRequestKeyKind::SourceIp => Some(RaterInstance {
+                    rater: Rater::new(cfg.rater_cfg.clone()),
+                    kind: MultiRequestKeyKind::SourceIp,
+                }),
+                AllRequestKeyKind::Uri { pattern } => Some(RaterInstance {
+                    rater: Rater::new(cfg.rater_cfg.clone()),
+                    kind: MultiRequestKeyKind::Uri {
+                        pattern: pattern.clone(),
+                    },
+                }),
             })
             .collect();
 
diff --git a/source/river/src/proxy/rate_limiting/mod.rs b/source/river/src/proxy/rate_limiting/mod.rs
index fc53fbc..10f97a1 100644
--- a/source/river/src/proxy/rate_limiting/mod.rs
+++ b/source/river/src/proxy/rate_limiting/mod.rs
@@ -32,3 +32,36 @@ pub enum Outcome {
     Approved,
     Declined,
 }
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum AllRequestKeyKind {
+    SourceIp,
+    Uri { pattern: RegexShim },
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct RaterInstanceConfig {
+    pub rater_cfg: RaterConfig,
+    pub kind: AllRequestKeyKind,
+}
+
+/// Configuration for the [`Rater`]
+#[derive(Debug, PartialEq, Clone)]
+pub struct RaterConfig {
+    /// The number of expected concurrent threads - should match the number of
+    /// tokio threadpool workers
+    pub threads: usize,
+    /// The peak number of leaky buckets we aim to have live at once
+    ///
+    /// NOTE: This is not a hard limit of the amount of memory used. See [`ARCacheBuilder`]
+    /// for docs on calculating actual memory usage based on these parameters
+    pub max_buckets: usize,
+    /// The max and initial number of tokens in the leaky bucket - this is the number of
+    /// requests that can go through without any waiting if the bucket is full
+    pub max_tokens_per_bucket: usize,
+    /// The interval between "refills" of the bucket, e.g. the bucket refills `refill_qty`
+    /// every `refill_interval_millis`
+    pub refill_interval_millis: usize,
+    /// The number of tokens added to the bucket every `refill_interval_millis`
+    pub refill_qty: usize,
+}
diff --git a/source/river/src/proxy/rate_limiting/multi.rs b/source/river/src/proxy/rate_limiting/multi.rs
index 06f4854..57a0ac3 100644
--- a/source/river/src/proxy/rate_limiting/multi.rs
+++ b/source/river/src/proxy/rate_limiting/multi.rs
@@ -11,36 +11,24 @@ use leaky_bucket::RateLimiter;
 use pandora_module_utils::pingora::SocketAddr;
 use pingora_proxy::Session;
 
-use super::{Outcome, RegexShim};
+use super::{Outcome, RaterConfig, RegexShim};
 
 #[derive(Debug, Clone, PartialEq)]
-pub enum RequestKeyKind {
+pub enum MultiRequestKeyKind {
     SourceIp,
-    #[allow(dead_code)]
-    DestIp,
-    Uri {
-        pattern: RegexShim,
-    },
+    Uri { pattern: RegexShim },
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum RequestKey {
+pub enum MultiRequestKey {
     Source(IpAddr),
-    #[allow(dead_code)]
-    Dest(IpAddr),
     Uri(String),
 }
 
 #[derive(Debug)]
 pub struct RaterInstance {
-    pub rater: Rater<RequestKey>,
-    pub kind: RequestKeyKind,
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub struct RaterInstanceConfig {
-    pub rater_cfg: RaterConfig,
-    pub kind: RequestKeyKind,
+    pub rater: Rater<MultiRequestKey>,
+    pub kind: MultiRequestKeyKind,
 }
 
 impl RaterInstance {
@@ -49,21 +37,20 @@ impl RaterInstance {
         Some(self.rater.get_ticket(key))
     }
 
-    pub fn get_key(&self, session: &mut Session) -> Option<RequestKey> {
+    pub fn get_key(&self, session: &mut Session) -> Option<MultiRequestKey> {
         match &self.kind {
-            RequestKeyKind::SourceIp => {
+            MultiRequestKeyKind::SourceIp => {
                 let src = session.downstream_session.client_addr()?;
                 let src_ip = match src {
                     SocketAddr::Inet(addr) => addr.ip(),
                     SocketAddr::Unix(_) => return None,
                 };
-                Some(RequestKey::Source(src_ip))
+                Some(MultiRequestKey::Source(src_ip))
             }
-            RequestKeyKind::DestIp => None,
-            RequestKeyKind::Uri { pattern } => {
+            MultiRequestKeyKind::Uri { pattern } => {
                 let uri_path = session.downstream_session.req_header().uri.path();
                 if pattern.is_match(uri_path) {
-                    Some(RequestKey::Uri(uri_path.to_string()))
+                    Some(MultiRequestKey::Uri(uri_path.to_string()))
                 } else {
                     None
                 }
@@ -72,27 +59,6 @@ impl RaterInstance {
     }
 }
 
-/// Configuration for the [`Rater`]
-#[derive(Debug, PartialEq, Clone)]
-pub struct RaterConfig {
-    /// The number of expected concurrent threads - should match the number of
-    /// tokio threadpool workers
-    pub threads: usize,
-    /// The peak number of leaky buckets we aim to have live at once
-    ///
-    /// NOTE: This is not a hard limit of the amount of memory used. See [`ARCacheBuilder`]
-    /// for docs on calculating actual memory usage based on these parameters
-    pub max_buckets: usize,
-    /// The max and initial number of tokens in the leaky bucket - this is the number of
-    /// requests that can go through without any waiting if the bucket is full
-    pub max_tokens_per_bucket: usize,
-    /// The interval between "refills" of the bucket, e.g. the bucket refills `refill_qty`
-    /// every `refill_interval_millis`
-    pub refill_interval_millis: usize,
-    /// The number of tokens added to the bucket every `refill_interval_millis`
-    pub refill_qty: usize,
-}
-
 /// A concurrent rate limiting structure
 ///
 /// ## Implementation details and notes

From 3f8cff14bf3b3cfcd644e85d0cd7d4eb47eb97c0 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Thu, 29 Aug 2024 16:54:39 +0200
Subject: [PATCH 19/23] Implement "single" or "grouped" URI matching

---
 source/river/assets/test-config.kdl           |  5 +-
 source/river/src/config/internal.rs           |  4 +-
 source/river/src/config/kdl/mod.rs            | 70 +++++++++-----
 source/river/src/config/kdl/test.rs           | 25 +++--
 source/river/src/proxy/mod.rs                 | 91 ++++++++++++-------
 source/river/src/proxy/rate_limiting/mod.rs   | 73 +++++++++------
 source/river/src/proxy/rate_limiting/multi.rs | 80 +++++++++-------
 .../river/src/proxy/rate_limiting/single.rs   | 68 ++++++++++++++
 8 files changed, 283 insertions(+), 133 deletions(-)
 create mode 100644 source/river/src/proxy/rate_limiting/single.rs

diff --git a/source/river/assets/test-config.kdl b/source/river/assets/test-config.kdl
index 7875226..99a5312 100644
--- a/source/river/assets/test-config.kdl
+++ b/source/river/assets/test-config.kdl
@@ -58,8 +58,11 @@ services {
             // * Up to the last 2000 URI paths will be remembered
             // * Each URI path can make a burst of 20 requests
             // * The bucket for each URI will refill at a rate of 5 requests per 1 millisecond
-            rule kind="uri" pattern="static/.*" \
+            rule kind="specific-uri" pattern="static/.*" \
                 max-buckets=2000 tokens-per-bucket=20 refill-qty=5 refill-rate-ms=1
+
+            rule kind="any-matching-uri" pattern=r".*\.mp4" \
+                tokens-per-bucket=50 refill-qty=2 refill-rate-ms=3
         }
 
         // Connectors are the "upstream" interfaces that we connect with. We can name as many
diff --git a/source/river/src/config/internal.rs b/source/river/src/config/internal.rs
index 074e0b7..db96c94 100644
--- a/source/river/src/config/internal.rs
+++ b/source/river/src/config/internal.rs
@@ -15,7 +15,7 @@ use pingora::{
 use tracing::warn;
 
 use crate::proxy::{
-    rate_limiting::RaterInstanceConfig,
+    rate_limiting::AllRateConfig,
     request_selector::{null_selector, RequestSelector},
 };
 
@@ -115,7 +115,7 @@ impl Config {
 ///
 #[derive(Debug, Default, Clone, PartialEq)]
 pub struct RateLimitingConfig {
-    pub(crate) rules: Vec<RaterInstanceConfig>,
+    pub(crate) rules: Vec<AllRateConfig>,
 }
 
 /// Add Path Control Modifiers
diff --git a/source/river/src/config/kdl/mod.rs b/source/river/src/config/kdl/mod.rs
index e540d81..68b8ea2 100644
--- a/source/river/src/config/kdl/mod.rs
+++ b/source/river/src/config/kdl/mod.rs
@@ -10,7 +10,11 @@ use crate::{
         PathControl, ProxyConfig, SelectionKind, TlsConfig, UpstreamOptions,
     },
     proxy::{
-        rate_limiting::{AllRequestKeyKind, RaterConfig, RaterInstanceConfig, RegexShim},
+        rate_limiting::{
+            multi::{MultiRaterConfig, MultiRequestKeyKind},
+            single::{SingleInstanceConfig, SingleRequestKeyKind},
+            AllRateConfig, RegexShim,
+        },
         request_selector::{
             null_selector, source_addr_and_uri_path_selector, uri_path_selector, RequestSelector,
         },
@@ -324,7 +328,7 @@ fn make_rate_limiter(
     doc: &KdlDocument,
     node: &KdlNode,
     args: BTreeMap<&str, &KdlValue>,
-) -> miette::Result<RaterInstanceConfig> {
+) -> miette::Result<AllRateConfig> {
     let take_num = |key: &str| -> miette::Result<usize> {
         let Some(val) = args.get(key) else {
             return Err(Bad::docspan(format!("Missing key: '{key}'"), doc, node.span()).into());
@@ -359,39 +363,57 @@ fn make_rate_limiter(
 
     // mandatory/common fields
     let kind = take_str("kind")?;
-    let max_buckets = take_num("max-buckets")?;
     let tokens_per_bucket = take_num("tokens-per-bucket")?;
     let refill_qty = take_num("refill-qty")?;
     let refill_rate_ms = take_num("refill-rate-ms")?;
 
-    let rater_cfg = RaterConfig {
-        threads: threads_per_service,
-        max_buckets,
+    let multi_cfg = || -> miette::Result<MultiRaterConfig> {
+        let max_buckets = take_num("max-buckets")?;
+        Ok(MultiRaterConfig {
+            threads: threads_per_service,
+            max_buckets,
+            max_tokens_per_bucket: tokens_per_bucket,
+            refill_interval_millis: refill_rate_ms,
+            refill_qty,
+        })
+    };
+
+    let single_cfg = || SingleInstanceConfig {
         max_tokens_per_bucket: tokens_per_bucket,
         refill_interval_millis: refill_rate_ms,
         refill_qty,
     };
 
+    let regex_pattern = || -> miette::Result<RegexShim> {
+        let pattern = take_str("pattern")?;
+        let Ok(pattern) = RegexShim::new(pattern) else {
+            return Err(Bad::docspan(
+                format!("'{pattern} should be a valid regular expression"),
+                doc,
+                node.span(),
+            )
+            .into());
+        };
+        Ok(pattern)
+    };
+
     match kind {
-        "source-ip" => Ok(RaterInstanceConfig {
-            rater_cfg,
-            kind: AllRequestKeyKind::SourceIp,
+        "source-ip" => Ok(AllRateConfig::Multi {
+            kind: MultiRequestKeyKind::SourceIp,
+            config: multi_cfg()?,
+        }),
+        "specific-uri" => Ok(AllRateConfig::Multi {
+            kind: MultiRequestKeyKind::Uri {
+                pattern: regex_pattern()?,
+            },
+            config: multi_cfg()?,
+        }),
+        "any-matching-uri" => Ok(AllRateConfig::Single {
+            kind: SingleRequestKeyKind::UriGroup {
+                pattern: regex_pattern()?,
+            },
+            config: single_cfg(),
         }),
-        "uri" => {
-            let pattern = take_str("pattern")?;
-            let Ok(pattern) = RegexShim::new(pattern) else {
-                return Err(Bad::docspan(
-                    format!("'{pattern} should be a valid regular expression"),
-                    doc,
-                    node.span(),
-                )
-                .into());
-            };
-            Ok(RaterInstanceConfig {
-                rater_cfg,
-                kind: AllRequestKeyKind::Uri { pattern },
-            })
-        }
         other => Err(Bad::docspan(
             format!("'{other} is not a known kind of rate limiting"),
             doc,
diff --git a/source/river/src/config/kdl/test.rs b/source/river/src/config/kdl/test.rs
index 09fd612..6cd2d89 100644
--- a/source/river/src/config/kdl/test.rs
+++ b/source/river/src/config/kdl/test.rs
@@ -1,14 +1,13 @@
 use std::{collections::BTreeMap, net::SocketAddr};
 
 use pingora::upstreams::peer::HttpPeer;
-use regex::Regex;
 
 use crate::{
     config::internal::{
         FileServerConfig, ListenerConfig, ListenerKind, ProxyConfig, UpstreamOptions,
     },
     proxy::{
-        rate_limiting::{RaterConfig, RaterInstanceConfig, RegexShim},
+        rate_limiting::{multi::MultiRaterConfig, AllRateConfig, RegexShim},
         request_selector::uri_path_selector,
     },
 };
@@ -93,28 +92,38 @@ fn load_test() {
                 },
                 rate_limiting: crate::config::internal::RateLimitingConfig {
                     rules: vec![
-                        RaterInstanceConfig {
-                            rater_cfg: RaterConfig {
+                        AllRateConfig::Multi {
+                            config: MultiRaterConfig {
                                 threads: 8,
                                 max_buckets: 4000,
                                 max_tokens_per_bucket: 10,
                                 refill_interval_millis: 10,
                                 refill_qty: 1,
                             },
-                            kind: crate::proxy::rate_limiting::RequestKeyKind::SourceIp,
+                            kind: crate::proxy::rate_limiting::multi::MultiRequestKeyKind::SourceIp,
                         },
-                        RaterInstanceConfig {
-                            rater_cfg: RaterConfig {
+                        AllRateConfig::Multi {
+                            config: MultiRaterConfig {
                                 threads: 8,
                                 max_buckets: 2000,
                                 max_tokens_per_bucket: 20,
                                 refill_interval_millis: 1,
                                 refill_qty: 5,
                             },
-                            kind: crate::proxy::rate_limiting::RequestKeyKind::Uri {
+                            kind: crate::proxy::rate_limiting::multi::MultiRequestKeyKind::Uri {
                                 pattern: RegexShim::new("static/.*").unwrap(),
                             },
                         },
+                        AllRateConfig::Single {
+                            config: crate::proxy::rate_limiting::single::SingleInstanceConfig {
+                                max_tokens_per_bucket: 50,
+                                refill_interval_millis: 3,
+                                refill_qty: 2,
+                            },
+                            kind: crate::proxy::rate_limiting::single::SingleRequestKeyKind::UriGroup {
+                                pattern: RegexShim::new(r".*\.mp4").unwrap(),
+                            },
+                        },
                     ],
                 },
             },
diff --git a/source/river/src/proxy/mod.rs b/source/river/src/proxy/mod.rs
index 1c82b34..951a2ed 100644
--- a/source/river/src/proxy/mod.rs
+++ b/source/river/src/proxy/mod.rs
@@ -31,10 +31,7 @@ use crate::{
 };
 
 use self::{
-    rate_limiting::{
-        multi::{MultiRequestKeyKind, Rater, RaterInstance},
-        AllRequestKeyKind,
-    },
+    rate_limiting::{multi::MultiRaterInstance, single::SingleInstance, Outcome},
     request_filters::RequestFilterMod,
 };
 
@@ -45,7 +42,8 @@ pub mod request_selector;
 pub mod response_modifiers;
 
 pub struct RateLimiters {
-    request_filter_stage: Vec<RaterInstance>,
+    request_filter_stage_multi: Vec<MultiRaterInstance>,
+    request_filter_stage_single: Vec<SingleInstance>,
 }
 
 /// The [RiverProxyService] is intended to capture the behaviors used to extend
@@ -112,28 +110,21 @@ where
             .expect("static should not error");
         // end of TODO
 
-        // NOTE: Using a silly filter map here because we might have rules in the future
-        // that can't be matched until later stages, such as if we wanted rate limiting
-        // buckets for each upstream that we choose - I want the match statement in this
-        // to no longer compile when we add more RequestKeyKinds.
-        #[allow(clippy::unnecessary_filter_map)]
-        let request_filter_stage = conf
-            .rate_limiting
-            .rules
-            .iter()
-            .filter_map(|cfg| match &cfg.kind {
-                AllRequestKeyKind::SourceIp => Some(RaterInstance {
-                    rater: Rater::new(cfg.rater_cfg.clone()),
-                    kind: MultiRequestKeyKind::SourceIp,
-                }),
-                AllRequestKeyKind::Uri { pattern } => Some(RaterInstance {
-                    rater: Rater::new(cfg.rater_cfg.clone()),
-                    kind: MultiRequestKeyKind::Uri {
-                        pattern: pattern.clone(),
-                    },
-                }),
-            })
-            .collect();
+        let mut request_filter_stage_multi = vec![];
+        let mut request_filter_stage_single = vec![];
+
+        for rule in conf.rate_limiting.rules {
+            match rule {
+                rate_limiting::AllRateConfig::Single { kind, config } => {
+                    let rater = SingleInstance::new(config, kind);
+                    request_filter_stage_single.push(rater);
+                }
+                rate_limiting::AllRateConfig::Multi { kind, config } => {
+                    let rater = MultiRaterInstance::new(config, kind);
+                    request_filter_stage_multi.push(rater);
+                }
+            }
+        }
 
         let mut my_proxy = pingora_proxy::http_proxy_service_with_name(
             &server.configuration,
@@ -142,7 +133,8 @@ where
                 load_balancer: upstreams,
                 request_selector: conf.upstream_options.selector,
                 rate_limiters: RateLimiters {
-                    request_filter_stage,
+                    request_filter_stage_multi,
+                    request_filter_stage_single,
                 },
             },
             &conf.name,
@@ -280,9 +272,40 @@ where
     where
         Self::CTX: Send + Sync,
     {
-        // First: do rate limiting at this stage - quickly check if there are none and skip over
-        // entirely if so
-        if !self.rate_limiters.request_filter_stage.is_empty() {
+        let multis = self
+            .rate_limiters
+            .request_filter_stage_multi
+            .iter()
+            .filter_map(|l| l.get_ticket(session));
+
+        let singles = self
+            .rate_limiters
+            .request_filter_stage_single
+            .iter()
+            .filter_map(|l| l.get_ticket(session));
+
+        // Attempt to get all tokens
+        //
+        // TODO: If https://github.com/udoprog/leaky-bucket/issues/17 is resolved we could
+        // remember the buckets that we did get approved for, and "return" the unused tokens.
+        //
+        // For now, if some tickets succeed but subsequent tickets fail, the preceeding
+        // approved tokens are just "burned".
+        //
+        // TODO: If https://github.com/udoprog/leaky-bucket/issues/34 is resolved we could
+        // support a "max debt" number, allowing us to delay if acquisition of the token
+        // would happen soon-ish, instead of immediately 429-ing if the token we need is
+        // about to become available.
+        if singles
+            .chain(multis)
+            .any(|t| t.now_or_never() == Outcome::Declined)
+        {
+            tracing::trace!("Rejecting due to rate limiting failure");
+            session.downstream_session.respond_error(429).await;
+            return Ok(true);
+        }
+
+        if !self.rate_limiters.request_filter_stage_single.is_empty() {
             // Attempt to get all tokens
             //
             // TODO: If https://github.com/udoprog/leaky-bucket/issues/17 is resolved we could
@@ -295,13 +318,13 @@ where
             // support a "max debt" number, allowing us to delay if acquisition of the token
             // would happen soon-ish, instead of immediately 429-ing if the token we need is
             // about to become available.
-            for limiter in self.rate_limiters.request_filter_stage.iter() {
+            for limiter in self.rate_limiters.request_filter_stage_single.iter() {
                 if let Some(ticket) = limiter.get_ticket(session) {
                     match ticket.now_or_never() {
-                        rate_limiting::Outcome::Approved => {
+                        Outcome::Approved => {
                             // Approved, move on
                         }
-                        rate_limiting::Outcome::Declined => {
+                        Outcome::Declined => {
                             tracing::trace!("Rejecting due to rate limiting failure");
                             session.downstream_session.respond_error(429).await;
                             return Ok(true);
diff --git a/source/river/src/proxy/rate_limiting/mod.rs b/source/river/src/proxy/rate_limiting/mod.rs
index 10f97a1..f6ba307 100644
--- a/source/river/src/proxy/rate_limiting/mod.rs
+++ b/source/river/src/proxy/rate_limiting/mod.rs
@@ -1,8 +1,34 @@
-use std::ops::Deref;
+use std::{ops::Deref, sync::Arc};
 
+use leaky_bucket::RateLimiter;
 use regex::Regex;
 
+use self::{
+    multi::{MultiRaterConfig, MultiRequestKeyKind},
+    single::{SingleInstanceConfig, SingleRequestKeyKind},
+};
+
+//
+// We have two kinds of rate limiters:
+//
+// * "Multi" rate limiters use a cache of buckets. These are used when we remember
+//   multiple bucket keys, like tracking all of the source IP addresses
+// * "Single" rate limiters use a single bucket, for example `any-matching-uri`,
+//   which uses a single bucket for all matching URIs
 pub mod multi;
+pub mod single;
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum AllRateConfig {
+    Single {
+        kind: SingleRequestKeyKind,
+        config: SingleInstanceConfig,
+    },
+    Multi {
+        kind: MultiRequestKeyKind,
+        config: MultiRaterConfig,
+    },
+}
 
 #[derive(Debug, Clone)]
 pub struct RegexShim(pub Regex);
@@ -33,35 +59,22 @@ pub enum Outcome {
     Declined,
 }
 
-#[derive(Debug, Clone, PartialEq)]
-pub enum AllRequestKeyKind {
-    SourceIp,
-    Uri { pattern: RegexShim },
+/// A claim ticket for the leaky bucket queue
+///
+/// You are expected to call [`Ticket::wait()`] to wait for your turn to perform
+/// the rate limited option.
+#[must_use = "You must call `Ticket::wait()` to wait your turn!"]
+pub struct Ticket {
+    limiter: Arc<RateLimiter>,
 }
 
-#[derive(Debug, Clone, PartialEq)]
-pub struct RaterInstanceConfig {
-    pub rater_cfg: RaterConfig,
-    pub kind: AllRequestKeyKind,
-}
-
-/// Configuration for the [`Rater`]
-#[derive(Debug, PartialEq, Clone)]
-pub struct RaterConfig {
-    /// The number of expected concurrent threads - should match the number of
-    /// tokio threadpool workers
-    pub threads: usize,
-    /// The peak number of leaky buckets we aim to have live at once
-    ///
-    /// NOTE: This is not a hard limit of the amount of memory used. See [`ARCacheBuilder`]
-    /// for docs on calculating actual memory usage based on these parameters
-    pub max_buckets: usize,
-    /// The max and initial number of tokens in the leaky bucket - this is the number of
-    /// requests that can go through without any waiting if the bucket is full
-    pub max_tokens_per_bucket: usize,
-    /// The interval between "refills" of the bucket, e.g. the bucket refills `refill_qty`
-    /// every `refill_interval_millis`
-    pub refill_interval_millis: usize,
-    /// The number of tokens added to the bucket every `refill_interval_millis`
-    pub refill_qty: usize,
+impl Ticket {
+    /// Try to get a token immediately from the bucket.
+    pub fn now_or_never(self) -> Outcome {
+        if self.limiter.try_acquire(1) {
+            Outcome::Approved
+        } else {
+            Outcome::Declined
+        }
+    }
 }
diff --git a/source/river/src/proxy/rate_limiting/multi.rs b/source/river/src/proxy/rate_limiting/multi.rs
index 57a0ac3..3886ab1 100644
--- a/source/river/src/proxy/rate_limiting/multi.rs
+++ b/source/river/src/proxy/rate_limiting/multi.rs
@@ -11,7 +11,36 @@ use leaky_bucket::RateLimiter;
 use pandora_module_utils::pingora::SocketAddr;
 use pingora_proxy::Session;
 
-use super::{Outcome, RaterConfig, RegexShim};
+use crate::proxy::rate_limiting::Ticket;
+
+use super::RegexShim;
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct MultiRaterInstanceConfig {
+    pub rater_cfg: MultiRaterConfig,
+    pub kind: MultiRequestKeyKind,
+}
+
+/// Configuration for the [`Rater`]
+#[derive(Debug, PartialEq, Clone)]
+pub struct MultiRaterConfig {
+    /// The number of expected concurrent threads - should match the number of
+    /// tokio threadpool workers
+    pub threads: usize,
+    /// The peak number of leaky buckets we aim to have live at once
+    ///
+    /// NOTE: This is not a hard limit of the amount of memory used. See [`ARCacheBuilder`]
+    /// for docs on calculating actual memory usage based on these parameters
+    pub max_buckets: usize,
+    /// The max and initial number of tokens in the leaky bucket - this is the number of
+    /// requests that can go through without any waiting if the bucket is full
+    pub max_tokens_per_bucket: usize,
+    /// The interval between "refills" of the bucket, e.g. the bucket refills `refill_qty`
+    /// every `refill_interval_millis`
+    pub refill_interval_millis: usize,
+    /// The number of tokens added to the bucket every `refill_interval_millis`
+    pub refill_qty: usize,
+}
 
 #[derive(Debug, Clone, PartialEq)]
 pub enum MultiRequestKeyKind {
@@ -26,18 +55,25 @@ pub enum MultiRequestKey {
 }
 
 #[derive(Debug)]
-pub struct RaterInstance {
+pub struct MultiRaterInstance {
     pub rater: Rater<MultiRequestKey>,
     pub kind: MultiRequestKeyKind,
 }
 
-impl RaterInstance {
-    pub fn get_ticket(&self, session: &mut Session) -> Option<Ticket> {
+impl MultiRaterInstance {
+    pub fn new(config: MultiRaterConfig, kind: MultiRequestKeyKind) -> Self {
+        Self {
+            rater: Rater::new(config),
+            kind,
+        }
+    }
+
+    pub fn get_ticket(&self, session: &Session) -> Option<Ticket> {
         let key = self.get_key(session)?;
         Some(self.rater.get_ticket(key))
     }
 
-    pub fn get_key(&self, session: &mut Session) -> Option<MultiRequestKey> {
+    pub fn get_key(&self, session: &Session) -> Option<MultiRequestKey> {
         match &self.kind {
             MultiRequestKeyKind::SourceIp => {
                 let src = session.downstream_session.client_addr()?;
@@ -131,8 +167,8 @@ where
     /// Create a new rate limiter with the given configuration.
     ///
     /// See [`RaterConfig`] for configuration options.
-    pub fn new(config: RaterConfig) -> Self {
-        let RaterConfig {
+    pub fn new(config: MultiRaterConfig) -> Self {
+        let MultiRaterConfig {
             threads,
             max_buckets,
             max_tokens_per_bucket,
@@ -207,34 +243,10 @@ where
     }
 }
 
-#[allow(dead_code)]
-#[derive(Debug, PartialEq, Clone)]
-pub enum TicketError {
-    BurstLimitExceeded,
-}
-
-/// A claim ticket for the leaky bucket queue
-///
-/// You are expected to call [`Ticket::wait()`] to wait for your turn to perform
-/// the rate limited option.
-#[must_use = "You must call `Ticket::wait()` to wait your turn!"]
-pub struct Ticket {
-    limiter: Arc<RateLimiter>,
-}
-
-impl Ticket {
-    /// Try to get a token immediately from the bucket.
-    pub fn now_or_never(self) -> Outcome {
-        if self.limiter.try_acquire(1) {
-            Outcome::Approved
-        } else {
-            Outcome::Declined
-        }
-    }
-}
-
 #[cfg(test)]
 mod test {
+    use crate::proxy::rate_limiting::Outcome;
+
     use super::*;
     use std::time::Instant;
     use tokio::time::interval;
@@ -242,7 +254,7 @@ mod test {
     #[tokio::test]
     async fn smoke() {
         let _ = tracing_subscriber::fmt::try_init();
-        let config = RaterConfig {
+        let config = MultiRaterConfig {
             threads: 2,
             max_buckets: 5,
             max_tokens_per_bucket: 3,
diff --git a/source/river/src/proxy/rate_limiting/single.rs b/source/river/src/proxy/rate_limiting/single.rs
new file mode 100644
index 0000000..2d23d47
--- /dev/null
+++ b/source/river/src/proxy/rate_limiting/single.rs
@@ -0,0 +1,68 @@
+use std::{sync::Arc, time::Duration};
+
+use leaky_bucket::RateLimiter;
+use pingora_proxy::Session;
+
+use super::{RegexShim, Ticket};
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct SingleInstanceConfig {
+    /// The max and initial number of tokens in the leaky bucket - this is the number of
+    /// requests that can go through without any waiting if the bucket is full
+    pub max_tokens_per_bucket: usize,
+    /// The interval between "refills" of the bucket, e.g. the bucket refills `refill_qty`
+    /// every `refill_interval_millis`
+    pub refill_interval_millis: usize,
+    /// The number of tokens added to the bucket every `refill_interval_millis`
+    pub refill_qty: usize,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum SingleRequestKeyKind {
+    UriGroup { pattern: RegexShim },
+}
+
+#[derive(Debug)]
+pub struct SingleInstance {
+    pub limiter: Arc<RateLimiter>,
+    pub kind: SingleRequestKeyKind,
+}
+
+impl SingleInstance {
+    /// Create a new rate limiter with the given configuration.
+    ///
+    /// See [`RaterConfig`] for configuration options.
+    pub fn new(config: SingleInstanceConfig, kind: SingleRequestKeyKind) -> Self {
+        let SingleInstanceConfig {
+            max_tokens_per_bucket,
+            refill_interval_millis,
+            refill_qty,
+        } = config;
+
+        let limiter = RateLimiter::builder()
+            .initial(max_tokens_per_bucket)
+            .max(max_tokens_per_bucket)
+            .interval(Duration::from_millis(refill_interval_millis as u64))
+            .refill(refill_qty)
+            .fair(true)
+            .build();
+        let limiter = Arc::new(limiter);
+
+        Self { limiter, kind }
+    }
+
+    pub fn get_ticket(&self, session: &Session) -> Option<Ticket> {
+        match &self.kind {
+            SingleRequestKeyKind::UriGroup { pattern } => {
+                let uri_path = session.downstream_session.req_header().uri.path();
+                if pattern.is_match(uri_path) {
+                    Some(Ticket {
+                        limiter: self.limiter.clone(),
+                    })
+                } else {
+                    None
+                }
+            }
+        }
+    }
+}

From d0e171b0cf6a75066b26a777ad110633ba775454 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Thu, 29 Aug 2024 17:04:05 +0200
Subject: [PATCH 20/23] Update docs

---
 source/river/assets/test-config.kdl |  5 +++++
 user-manual/src/config/kdl.md       | 34 ++++++++++++++++++++---------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/source/river/assets/test-config.kdl b/source/river/assets/test-config.kdl
index 99a5312..ea054f3 100644
--- a/source/river/assets/test-config.kdl
+++ b/source/river/assets/test-config.kdl
@@ -61,6 +61,11 @@ services {
             rule kind="specific-uri" pattern="static/.*" \
                 max-buckets=2000 tokens-per-bucket=20 refill-qty=5 refill-rate-ms=1
 
+            // This rate limiting is based on ANY URI paths that match the pattern
+            //
+            // * A single bucket will be used for all URIs that match the pattern
+            // * We allow a burst of up to 50 requests for any MP4 files
+            // * The bucket for all MP4 files will refill at a rate of 2 requests per 3 milliseconds
             rule kind="any-matching-uri" pattern=r".*\.mp4" \
                 tokens-per-bucket=50 refill-qty=2 refill-rate-ms=3
         }
diff --git a/user-manual/src/config/kdl.md b/user-manual/src/config/kdl.md
index f121420..a923632 100644
--- a/user-manual/src/config/kdl.md
+++ b/user-manual/src/config/kdl.md
@@ -280,13 +280,14 @@ Example:
 
 ```
 rate-limiting {
-    timeout millis=100
-
     rule kind="source-ip" \
         max-buckets=4000 tokens-per-bucket=10 refill-qty=1 refill-rate-ms=10
 
-    rule kind="uri" pattern="static/.*" \
+    rule kind="specific-uri" pattern="static/.*" \
         max-buckets=2000 tokens-per-bucket=20 refill-qty=5 refill-rate-ms=1
+
+    rule kind="any-matching-uri" pattern=r".*\.mp4" \
+        tokens-per-bucket=50 refill-qty=2 refill-rate-ms=3
 }
 ```
 
@@ -315,7 +316,7 @@ Once a refill occurs, additional requests may be served.
 ##### How many buckets?
 
 Some rules require many buckets. For example, rules based on the source IP address will create a bucket
-for each unique source IP address observed in a request.
+for each unique source IP address observed in a request. We refer to these as "multi" rules.
 
 However, each of these buckets require space to contain the metadata, and to avoid unbounded growth,
 we allow for a configurable `max-buckets` number, which serves to influence the total memory required
@@ -333,6 +334,9 @@ If `max-buckets` is set too low, then buckets will be "evicted" from the cache,
 requests matching that bucket will require the creation of a new bucket (with a full set of tokens),
 potentially defeating the objective of accurate rate limiting.
 
+For "single" rules, or rules that do not have multiple buckets, a single bucket will be shared by all
+requests matching the rule.
+
 ##### Gotta claim 'em all
 
 When multiple rules apply to a single request, for example rules based on both source IP address,
@@ -342,19 +346,29 @@ obtain the IP address token, but the request will be rejected as the URI bucket
 
 ##### Kinds of Rules
 
-Currently two kinds of rules are supported:
+Currently three kinds of rules are supported:
 
-* `kind="source-ip"` - this tracks the IP address of the requestor. A unique bucket will be created for
-  the IPv4 or IPv6 address of the requestor.
-* `kind="uri" pattern="REGEX"` - This tracks the URI path of the request, such as `static/images/example.jpg`
-    * If the request's URI path matches the provided `REGEX`, the full URI path will be assigned to a given
-      bucket
+* `kind="source-ip"` - this tracks the IP address of the requestor.
+    * This rule is a "multi" rule: A unique bucket will be created for
+      the IPv4 or IPv6 address of the requestor.
+    * The `max-buckets` parameter controls how many IP addresses will be remembered.
+* `kind="specific-uri" pattern="REGEX"` - This tracks the URI path of the request, such as `static/images/example.jpg`
+    * This rule is a "multi" rule: if the request's URI path matches the provided `REGEX`,
+      the full URI path will be assigned to a given bucket
     * For example, if the regex `static/.*` was provided:
         * `index.html` would not match this rule, and would not require obtaining a token
         * `static/images/example.jpg` would match this rule, and would require obtaining a token
         * `static/styles/example.css` would also match this rule, and would require obtaining a token
         * Note that `static/images/example.jpg` and `static/styles/example.css` would each have a UNIQUE
           bucket.
+* `kind="any-matching-uri" pattern="REGEX"` - This tracks the URI path of the request, such as `static/videos/example.mp4`
+    * This is a "single" rule: ANY path matching `REGEX` will share a single bucket
+    * For example, if the regex `.*\.mp4` was provided:
+        * `index.html` would not match this rule, and would not require obtaining a token
+        * `static/videos/example1.mp4` would match this rule, and would require obtaining a token
+        * `static/videos/example2.mp4` would also match this rule, and would require obtaining a token
+        * Note that `static/videos/example1.mp4` and `static/videos/example2.mp4` would share a SINGLE bucket
+          (also shared with any other path containing an MP4 file)
 
 ### `services.$NAME.file-server`
 

From e833387cf83d2614cd43a003a68627810bfd88d7 Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Thu, 29 Aug 2024 17:11:43 +0200
Subject: [PATCH 21/23] Fix example config

---
 source/river/assets/test-config.kdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/river/assets/test-config.kdl b/source/river/assets/test-config.kdl
index ea054f3..d6e1e2c 100644
--- a/source/river/assets/test-config.kdl
+++ b/source/river/assets/test-config.kdl
@@ -42,8 +42,8 @@ services {
         // the `source-ip` rule.
         //
         // A request to URI `/static/style.css` from IP 1.2.3.4 will need to get a token from
-        // BOTH the `source-ip` rule (from the `1.2.3.4` bucket), AND the `uri` rule (from the
-        // `/static/style.css` bucket)
+        // BOTH the `source-ip` rule (from the `1.2.3.4` bucket), AND the `specific-uri` rule
+        // (from the `/static/style.css` bucket)
         rate-limiting {
             // This rate limiting rule is based on the source IP address
             //

From 093049f2b8673e10988f41dc4329e9af3a972ffd Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Thu, 29 Aug 2024 17:15:31 +0200
Subject: [PATCH 22/23] Remove redundant rate limiting check

---
 source/river/src/proxy/mod.rs | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/source/river/src/proxy/mod.rs b/source/river/src/proxy/mod.rs
index 951a2ed..b875d88 100644
--- a/source/river/src/proxy/mod.rs
+++ b/source/river/src/proxy/mod.rs
@@ -305,35 +305,6 @@ where
             return Ok(true);
         }
 
-        if !self.rate_limiters.request_filter_stage_single.is_empty() {
-            // Attempt to get all tokens
-            //
-            // TODO: If https://github.com/udoprog/leaky-bucket/issues/17 is resolved we could
-            // remember the buckets that we did get approved for, and "return" the unused tokens.
-            //
-            // For now, if some tickets succeed but subsequent tickets fail, the preceeding
-            // approved tokens are just "burned".
-            //
-            // TODO: If https://github.com/udoprog/leaky-bucket/issues/34 is resolved we could
-            // support a "max debt" number, allowing us to delay if acquisition of the token
-            // would happen soon-ish, instead of immediately 429-ing if the token we need is
-            // about to become available.
-            for limiter in self.rate_limiters.request_filter_stage_single.iter() {
-                if let Some(ticket) = limiter.get_ticket(session) {
-                    match ticket.now_or_never() {
-                        Outcome::Approved => {
-                            // Approved, move on
-                        }
-                        Outcome::Declined => {
-                            tracing::trace!("Rejecting due to rate limiting failure");
-                            session.downstream_session.respond_error(429).await;
-                            return Ok(true);
-                        }
-                    }
-                }
-            }
-        }
-
         for filter in &self.modifiers.request_filters {
             match filter.request_filter(session, ctx).await {
                 // If Ok true: we're done handling this request

From 61f53643e1d36ce3e3b8ed2b1e0e32b854d1148f Mon Sep 17 00:00:00 2001
From: James Munns <james@onevariable.com>
Date: Thu, 29 Aug 2024 17:22:23 +0200
Subject: [PATCH 23/23] Fix some more docs

---
 source/river/src/proxy/rate_limiting/multi.rs  | 2 +-
 source/river/src/proxy/rate_limiting/single.rs | 2 +-
 user-manual/src/config/kdl.md                  | 7 ++++---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/source/river/src/proxy/rate_limiting/multi.rs b/source/river/src/proxy/rate_limiting/multi.rs
index 3886ab1..247bd99 100644
--- a/source/river/src/proxy/rate_limiting/multi.rs
+++ b/source/river/src/proxy/rate_limiting/multi.rs
@@ -166,7 +166,7 @@ where
 {
     /// Create a new rate limiter with the given configuration.
     ///
-    /// See [`RaterConfig`] for configuration options.
+    /// See [`MultiRaterConfig`] for configuration options.
     pub fn new(config: MultiRaterConfig) -> Self {
         let MultiRaterConfig {
             threads,
diff --git a/source/river/src/proxy/rate_limiting/single.rs b/source/river/src/proxy/rate_limiting/single.rs
index 2d23d47..42d968e 100644
--- a/source/river/src/proxy/rate_limiting/single.rs
+++ b/source/river/src/proxy/rate_limiting/single.rs
@@ -31,7 +31,7 @@ pub struct SingleInstance {
 impl SingleInstance {
     /// Create a new rate limiter with the given configuration.
     ///
-    /// See [`RaterConfig`] for configuration options.
+    /// See [`SingleInstanceConfig`] for configuration options.
     pub fn new(config: SingleInstanceConfig, kind: SingleRequestKeyKind) -> Self {
         let SingleInstanceConfig {
             max_tokens_per_bucket,
diff --git a/user-manual/src/config/kdl.md b/user-manual/src/config/kdl.md
index a923632..8c34872 100644
--- a/user-manual/src/config/kdl.md
+++ b/user-manual/src/config/kdl.md
@@ -98,13 +98,14 @@ services {
             }
         }
         rate-limiting {
-            timeout millis=100
-
             rule kind="source-ip" \
                 max-buckets=4000 tokens-per-bucket=10 refill-qty=1 refill-rate-ms=10
 
-            rule kind="uri" pattern="static/.*" \
+            rule kind="specific-uri" pattern="static/.*" \
                 max-buckets=2000 tokens-per-bucket=20 refill-qty=5 refill-rate-ms=1
+
+            rule kind="any-matching-uri" pattern=r".*\.mp4" \
+                tokens-per-bucket=50 refill-qty=2 refill-rate-ms=3
         }
     }
     Example3 {