From 38e18ff0ab32f4a9d840650e1e6cec22d211362e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lloren=C3=A7?= Date: Fri, 18 Oct 2024 12:15:14 +0200 Subject: [PATCH 1/4] Change how dynamic captcha stores data --- .../src/storage/registration_rates.rs | 97 +++++++++++++++---- 1 file changed, 80 insertions(+), 17 deletions(-) diff --git a/src/internet_identity/src/storage/registration_rates.rs b/src/internet_identity/src/storage/registration_rates.rs index 84d1e0855c..67d255e12b 100644 --- a/src/internet_identity/src/storage/registration_rates.rs +++ b/src/internet_identity/src/storage/registration_rates.rs @@ -9,10 +9,10 @@ use std::time::Duration; pub struct RegistrationRates { /// Min heap of registration timestamps to calculate the reference registration rate. - /// [prune_data] removes values that are in the past whenever new data is added. + /// [prune_data] removes values that are older than now minus the reference rate interval whenever new data is added. reference_rate_data: MinHeap, /// Min heap of registration timestamps to calculate the current registration rate. - /// [prune_data] removes values that are in the past whenever new data is added. + /// [prune_data] removes values that are older than now minus the current rate interval whenever new data is added. current_rate_data: MinHeap, } @@ -47,16 +47,17 @@ impl RegistrationRates { } pub fn new_registration(&mut self) { self.prune_expired(); - let Some(data_retention) = dynamic_captcha_config() else { + // Return if no dynamic captcha + if dynamic_captcha_config().is_none() { return; }; let now = time(); self.reference_rate_data - .push(&(now + data_retention.reference_rate_retention_ns)) + .push(&now) .expect("out of memory"); self.current_rate_data - .push(&(now + data_retention.current_rate_retention_ns)) + .push(&now) .expect("out of memory"); } @@ -67,12 +68,10 @@ impl RegistrationRates { let reference_rate_per_second = calculate_registration_rate( now, &self.reference_rate_data, - config.reference_rate_retention_ns, ); let current_rate_per_second = calculate_registration_rate( now, &self.current_rate_data, - config.current_rate_retention_ns, ); let captcha_threshold_rate = reference_rate_per_second * config.threshold_multiplier; let rates = NormalizedRegistrationRates { @@ -84,8 +83,13 @@ impl RegistrationRates { } fn prune_expired(&mut self) { - prune_data(&mut self.reference_rate_data); - prune_data(&mut self.current_rate_data); + let Some(data_retention) = dynamic_captcha_config() else { + return; + }; + println!("in da prun_expired"); + println!("reference_rate {}", data_retention.reference_rate_retention_ns.to_string()); + prune_data(&mut self.reference_rate_data, data_retention.reference_rate_retention_ns); + prune_data(&mut self.current_rate_data, data_retention.current_rate_retention_ns); } } @@ -111,17 +115,13 @@ impl RegistrationRates { fn calculate_registration_rate( now: u64, data: &MinHeap, - data_retention_ns: u64, ) -> f64 { data - // get the oldest expiration timestamp + // get the oldest value .peek() - // calculate the registration timestamp from expiration - .map(|ts| ts - data_retention_ns) // calculate the time window length with respect to the current time .map(|ts| now - ts) - // the value _could_ be 0 if the oldest timestamp was added in the same execution round - // -> filter to avoid division by 0 + // There could be negative values if they haven't been pruned yet. .filter(|val| *val != 0) // use the value to calculate the rate per second .map(|val| rate_per_second(data.len(), val)) @@ -151,7 +151,7 @@ fn dynamic_captcha_config() -> Option { } } -fn prune_data(data: &mut MinHeap) { +fn prune_data(data: &mut MinHeap, interval_ns: u64) { const MAX_TO_PRUNE: usize = 100; let now = time(); @@ -162,7 +162,7 @@ fn prune_data(data: &mut MinHeap) { // The timestamps are sorted because the expiration is constant and time() is monotonic // -> we can stop pruning once we reach a not expired timestamp - if timestamp > now { + if timestamp > (now - interval_ns) { break; } data.pop(); @@ -281,6 +281,69 @@ mod test { ); } + #[test] + fn previous_config_should_not_affect_new_config() { + let mut registration_rates = setup(); + // initialize reference rate with 1 registration / second + for _ in 0..1000 { + registration_rates.new_registration(); + TIME.with_borrow_mut(|t| *t += Duration::from_secs(1).as_nanos() as u64); + } + + // raise current rate to 3 registrations / second + for _ in 0..100 { + registration_rates.new_registration(); + registration_rates.new_registration(); + registration_rates.new_registration(); + TIME.with_borrow_mut(|t| *t += Duration::from_secs(1).as_nanos() as u64); + } + + assert_eq!( + registration_rates.registration_rates().unwrap(), + NormalizedRegistrationRates { + current_rate_per_second: 3.0, + reference_rate_per_second: 1.2, // increases too, but more slowly + captcha_threshold_rate: 1.44, // reference rate * 1.2 (as per config) + } + ); + + // Change the config. + state::persistent_state_mut(|ps| { + ps.captcha_config = CaptchaConfig { + max_unsolved_captchas: 500, + captcha_trigger: CaptchaTrigger::Dynamic { + threshold_pct: 10, + current_rate_sampling_interval_s: 100, + reference_rate_sampling_interval_s: 100, + }, + } + }); + + // Needed to prune the data after new config + // It prunes 100 for each new registration + // we added 1100 before. Remove them with 110, plus 2 to remove the new 110 added. + for _ in 0..112 { + registration_rates.new_registration(); + TIME.with_borrow_mut(|t| *t += Duration::from_secs(1).as_nanos() as u64); + } + + // Set current rate to 2 registrations per seconds for 100 seconds + // which is the current and reference rate. + for _ in 0..100 { + registration_rates.new_registration(); + registration_rates.new_registration(); + TIME.with_borrow_mut(|t| *t += Duration::from_secs(1).as_nanos() as u64); + } + assert_eq!( + registration_rates.registration_rates().unwrap(), + NormalizedRegistrationRates { + current_rate_per_second: 2.0, + reference_rate_per_second: 2.0, // increases too, but more slowly + captcha_threshold_rate: 2.2, // reference rate * 1.1 (as per config) + } + ); + } + #[test] fn should_prune_old_data() { let mut registration_rates = setup(); From bfc5a9304d0b96ebfc987ccfbfc4a21322528dc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lloren=C3=A7?= Date: Fri, 18 Oct 2024 12:21:15 +0200 Subject: [PATCH 2/4] Format rust --- .../src/storage/registration_rates.rs | 41 ++++++++----------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/src/internet_identity/src/storage/registration_rates.rs b/src/internet_identity/src/storage/registration_rates.rs index 67d255e12b..82fa705001 100644 --- a/src/internet_identity/src/storage/registration_rates.rs +++ b/src/internet_identity/src/storage/registration_rates.rs @@ -53,26 +53,16 @@ impl RegistrationRates { }; let now = time(); - self.reference_rate_data - .push(&now) - .expect("out of memory"); - self.current_rate_data - .push(&now) - .expect("out of memory"); + self.reference_rate_data.push(&now).expect("out of memory"); + self.current_rate_data.push(&now).expect("out of memory"); } pub fn registration_rates(&self) -> Option { let config = dynamic_captcha_config()?; let now = time(); - let reference_rate_per_second = calculate_registration_rate( - now, - &self.reference_rate_data, - ); - let current_rate_per_second = calculate_registration_rate( - now, - &self.current_rate_data, - ); + let reference_rate_per_second = calculate_registration_rate(now, &self.reference_rate_data); + let current_rate_per_second = calculate_registration_rate(now, &self.current_rate_data); let captcha_threshold_rate = reference_rate_per_second * config.threshold_multiplier; let rates = NormalizedRegistrationRates { reference_rate_per_second, @@ -86,10 +76,14 @@ impl RegistrationRates { let Some(data_retention) = dynamic_captcha_config() else { return; }; - println!("in da prun_expired"); - println!("reference_rate {}", data_retention.reference_rate_retention_ns.to_string()); - prune_data(&mut self.reference_rate_data, data_retention.reference_rate_retention_ns); - prune_data(&mut self.current_rate_data, data_retention.current_rate_retention_ns); + prune_data( + &mut self.reference_rate_data, + data_retention.reference_rate_retention_ns, + ); + prune_data( + &mut self.current_rate_data, + data_retention.current_rate_retention_ns, + ); } } @@ -112,10 +106,7 @@ impl RegistrationRates { /// However, because the data is not actually spanning 3 weeks, this underestimates the actual rate. /// Taking into account that the data is only spanning 3 days we get the following: /// 3 registrations / 259200 seconds = 0.00001157407407 registrations / second -fn calculate_registration_rate( - now: u64, - data: &MinHeap, -) -> f64 { +fn calculate_registration_rate(now: u64, data: &MinHeap) -> f64 { data // get the oldest value .peek() @@ -324,9 +315,11 @@ mod test { // we added 1100 before. Remove them with 110, plus 2 to remove the new 110 added. for _ in 0..112 { registration_rates.new_registration(); - TIME.with_borrow_mut(|t| *t += Duration::from_secs(1).as_nanos() as u64); } + // Pass enough time for the added registrations to also be pruned by new ones. + TIME.with_borrow_mut(|t| *t += Duration::from_secs(100).as_nanos() as u64); + // Set current rate to 2 registrations per seconds for 100 seconds // which is the current and reference rate. for _ in 0..100 { @@ -339,7 +332,7 @@ mod test { NormalizedRegistrationRates { current_rate_per_second: 2.0, reference_rate_per_second: 2.0, // increases too, but more slowly - captcha_threshold_rate: 2.2, // reference rate * 1.1 (as per config) + captcha_threshold_rate: 2.2, // reference rate * 1.1 (as per config) } ); } From 3b17b89c4c19e4475475a474d3077ad613ff1ec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lloren=C3=A7?= Date: Fri, 18 Oct 2024 13:25:56 +0200 Subject: [PATCH 3/4] CR changes --- .../src/storage/registration_rates.rs | 30 +++++-------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/src/internet_identity/src/storage/registration_rates.rs b/src/internet_identity/src/storage/registration_rates.rs index 82fa705001..bd57fd10e3 100644 --- a/src/internet_identity/src/storage/registration_rates.rs +++ b/src/internet_identity/src/storage/registration_rates.rs @@ -112,8 +112,6 @@ fn calculate_registration_rate(now: u64, data: &MinHeap .peek() // calculate the time window length with respect to the current time .map(|ts| now - ts) - // There could be negative values if they haven't been pruned yet. - .filter(|val| *val != 0) // use the value to calculate the rate per second .map(|val| rate_per_second(data.len(), val)) // if we don't have data, the rate is 0 @@ -298,42 +296,30 @@ mod test { } ); - // Change the config. + // Change the config where both reference and base rate have the same interval. + let interval_s = 100; state::persistent_state_mut(|ps| { ps.captcha_config = CaptchaConfig { max_unsolved_captchas: 500, captcha_trigger: CaptchaTrigger::Dynamic { threshold_pct: 10, - current_rate_sampling_interval_s: 100, - reference_rate_sampling_interval_s: 100, + current_rate_sampling_interval_s: interval_s, + reference_rate_sampling_interval_s: interval_s, }, } }); - // Needed to prune the data after new config - // It prunes 100 for each new registration - // we added 1100 before. Remove them with 110, plus 2 to remove the new 110 added. - for _ in 0..112 { - registration_rates.new_registration(); - } - - // Pass enough time for the added registrations to also be pruned by new ones. - TIME.with_borrow_mut(|t| *t += Duration::from_secs(100).as_nanos() as u64); - // Set current rate to 2 registrations per seconds for 100 seconds - // which is the current and reference rate. for _ in 0..100 { registration_rates.new_registration(); registration_rates.new_registration(); TIME.with_borrow_mut(|t| *t += Duration::from_secs(1).as_nanos() as u64); } + let current_rates = registration_rates.registration_rates().unwrap(); + // If the intervals are the same, the rates are also the same. assert_eq!( - registration_rates.registration_rates().unwrap(), - NormalizedRegistrationRates { - current_rate_per_second: 2.0, - reference_rate_per_second: 2.0, // increases too, but more slowly - captcha_threshold_rate: 2.2, // reference rate * 1.1 (as per config) - } + current_rates.reference_rate_per_second, + current_rates.current_rate_per_second ); } From 11b55529e28347f67d82023b6848c877929c4762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lloren=C3=A7?= Date: Fri, 18 Oct 2024 15:11:49 +0200 Subject: [PATCH 4/4] Add filter by 0 --- src/internet_identity/src/storage/registration_rates.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/internet_identity/src/storage/registration_rates.rs b/src/internet_identity/src/storage/registration_rates.rs index bd57fd10e3..8ab12da740 100644 --- a/src/internet_identity/src/storage/registration_rates.rs +++ b/src/internet_identity/src/storage/registration_rates.rs @@ -112,6 +112,8 @@ fn calculate_registration_rate(now: u64, data: &MinHeap .peek() // calculate the time window length with respect to the current time .map(|ts| now - ts) + // the value _could_ be 0 if the oldest timestamp was added in the same execution round + .filter(|val| *val != 0) // use the value to calculate the rate per second .map(|val| rate_per_second(data.len(), val)) // if we don't have data, the rate is 0