From 6fb2cb9199986baa5d37bb80c29fc30f7eebea5e Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Wed, 8 Jun 2022 17:35:28 -0300 Subject: [PATCH 01/12] delete old database directories --- zebra-state/src/config.rs | 8 ++++++ zebrad/src/commands/start.rs | 47 +++++++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index e84417099e2..882db5df23a 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -57,6 +57,13 @@ pub struct Config { /// /// Set to `None` by default: Zebra continues syncing indefinitely. pub debug_stop_at_height: Option, + + /// Whether to delete the old database directories when present. + /// + /// Set to `true` by default. If this is set to `false`, + /// no check for old database versions will be made and nothing will be + /// deleted. + pub delete_old_database: bool, } fn gen_temp_path(prefix: &str) -> PathBuf { @@ -108,6 +115,7 @@ impl Default for Config { cache_dir, ephemeral: false, debug_stop_at_height: None, + delete_old_database: true, } } } diff --git a/zebrad/src/commands/start.rs b/zebrad/src/commands/start.rs index 05b4d726fcb..19f23eb7e65 100644 --- a/zebrad/src/commands/start.rs +++ b/zebrad/src/commands/start.rs @@ -59,7 +59,7 @@ //! * answers RPC client requests using the State Service and Mempool Service //! * submits client transactions to the node's mempool -use std::{cmp::max, ops::Add, time::Duration}; +use std::{cmp::max, fs::remove_dir_all, ops::Add, path::PathBuf, time::Duration}; use abscissa_core::{config, Command, FrameworkError, Options, Runnable}; use chrono::Utc; @@ -105,6 +105,11 @@ impl StartCmd { let config = app_config().clone(); info!(?config); + if config.state.delete_old_database { + info!("checking old database versions"); + check_and_delete_old_databases(config.state.cache_dir.clone())?; + } + info!("initializing node state"); let (state_service, read_only_state_service, latest_chain_tip, chain_tip_change) = zebra_state::init(config.state.clone(), config.network.network); @@ -601,3 +606,43 @@ impl config::Override for StartCmd { Ok(config) } } + +/// Check if there are old database folders and delete them from the filesystem. +/// +/// Iterate over the files and directories in the databases folder and delete if: +/// - The entry is a directory. +/// - The directory name has a lenght of at least 2 characters. +/// - The directory name has a prefix `v`. +/// - The directory name without the prefix can be parsed as an unsigned number. +/// - The parsed number is lower than the hardcoded `DATABASE_FORMAT_VERSION`. +fn check_and_delete_old_databases(cache_dir: PathBuf) -> Result<(), Report> { + let cache_dir = cache_dir.join("state"); + for entry in (cache_dir.read_dir()?).flatten() { + if entry.file_type()?.is_dir() { + let dir_name = entry + .file_name() + .into_string() + .expect("conversion from osstring to string should work."); + if dir_name.len() >= 2 && dir_name.starts_with('v') { + let potential_version_number_string = dir_name + .strip_prefix('v') + .expect("should not panic as we know there is a `v` prefix.") + .to_string(); + let version_number: u32 = + potential_version_number_string.parse().unwrap_or(u32::MAX); + if version_number < zebra_state::constants::DATABASE_FORMAT_VERSION { + let delete_path = cache_dir.join(dir_name); + info!( + "deleting {}", + delete_path + .to_str() + .expect("path is valid, should not panic") + ); + remove_dir_all(delete_path)?; + } + } + } + } + + Ok(()) +} From 3453b54da6dfc0212f64512c3d55a314d83fa8db Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Thu, 9 Jun 2022 16:42:15 -0300 Subject: [PATCH 02/12] check if state directory exists --- zebrad/src/commands/start.rs | 47 +++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/zebrad/src/commands/start.rs b/zebrad/src/commands/start.rs index 19f23eb7e65..1b6a663d10a 100644 --- a/zebrad/src/commands/start.rs +++ b/zebrad/src/commands/start.rs @@ -610,6 +610,7 @@ impl config::Override for StartCmd { /// Check if there are old database folders and delete them from the filesystem. /// /// Iterate over the files and directories in the databases folder and delete if: +/// - The state directory exists. /// - The entry is a directory. /// - The directory name has a lenght of at least 2 characters. /// - The directory name has a prefix `v`. @@ -617,28 +618,30 @@ impl config::Override for StartCmd { /// - The parsed number is lower than the hardcoded `DATABASE_FORMAT_VERSION`. fn check_and_delete_old_databases(cache_dir: PathBuf) -> Result<(), Report> { let cache_dir = cache_dir.join("state"); - for entry in (cache_dir.read_dir()?).flatten() { - if entry.file_type()?.is_dir() { - let dir_name = entry - .file_name() - .into_string() - .expect("conversion from osstring to string should work."); - if dir_name.len() >= 2 && dir_name.starts_with('v') { - let potential_version_number_string = dir_name - .strip_prefix('v') - .expect("should not panic as we know there is a `v` prefix.") - .to_string(); - let version_number: u32 = - potential_version_number_string.parse().unwrap_or(u32::MAX); - if version_number < zebra_state::constants::DATABASE_FORMAT_VERSION { - let delete_path = cache_dir.join(dir_name); - info!( - "deleting {}", - delete_path - .to_str() - .expect("path is valid, should not panic") - ); - remove_dir_all(delete_path)?; + if cache_dir.exists() { + for entry in (cache_dir.read_dir()?).flatten() { + if entry.file_type()?.is_dir() { + let dir_name = entry + .file_name() + .into_string() + .expect("conversion from osstring to string should work."); + if dir_name.len() >= 2 && dir_name.starts_with('v') { + let potential_version_number_string = dir_name + .strip_prefix('v') + .expect("should not panic as we know there is a `v` prefix.") + .to_string(); + let version_number: u32 = + potential_version_number_string.parse().unwrap_or(u32::MAX); + if version_number < zebra_state::constants::DATABASE_FORMAT_VERSION { + let delete_path = cache_dir.join(dir_name); + info!( + "deleting {}", + delete_path + .to_str() + .expect("path is valid, should not panic") + ); + remove_dir_all(delete_path)?; + } } } } From fd02f55cb0cfdf3dacbd37a77609275b96aa24c5 Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Wed, 15 Jun 2022 11:11:27 -0300 Subject: [PATCH 03/12] skip deleting when ephemeral --- zebrad/src/commands/start.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebrad/src/commands/start.rs b/zebrad/src/commands/start.rs index 1b6a663d10a..adaea23144d 100644 --- a/zebrad/src/commands/start.rs +++ b/zebrad/src/commands/start.rs @@ -105,7 +105,7 @@ impl StartCmd { let config = app_config().clone(); info!(?config); - if config.state.delete_old_database { + if !config.state.ephemeral && config.state.delete_old_database { info!("checking old database versions"); check_and_delete_old_databases(config.state.cache_dir.clone())?; } From 3cd8ab67450559f7a95bf4011a6b4e43152ef05d Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Wed, 15 Jun 2022 16:49:39 -0300 Subject: [PATCH 04/12] split `check_and_delete_old_databases` --- zebrad/src/commands/start.rs | 50 ++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/zebrad/src/commands/start.rs b/zebrad/src/commands/start.rs index adaea23144d..0cb45dc002d 100644 --- a/zebrad/src/commands/start.rs +++ b/zebrad/src/commands/start.rs @@ -620,27 +620,14 @@ fn check_and_delete_old_databases(cache_dir: PathBuf) -> Result<(), Report> { let cache_dir = cache_dir.join("state"); if cache_dir.exists() { for entry in (cache_dir.read_dir()?).flatten() { - if entry.file_type()?.is_dir() { - let dir_name = entry - .file_name() - .into_string() - .expect("conversion from osstring to string should work."); - if dir_name.len() >= 2 && dir_name.starts_with('v') { - let potential_version_number_string = dir_name - .strip_prefix('v') - .expect("should not panic as we know there is a `v` prefix.") - .to_string(); - let version_number: u32 = - potential_version_number_string.parse().unwrap_or(u32::MAX); + if let Some(dir_name) = parse_dir_name(entry) { + if let Some(version_number) = parse_version_number(dir_name.clone()) { if version_number < zebra_state::constants::DATABASE_FORMAT_VERSION { let delete_path = cache_dir.join(dir_name); - info!( - "deleting {}", - delete_path - .to_str() - .expect("path is valid, should not panic") - ); - remove_dir_all(delete_path)?; + info!("deleting outdated state directory {:?}", delete_path,); + if remove_dir_all(delete_path).is_err() { + continue; + } } } } @@ -649,3 +636,28 @@ fn check_and_delete_old_databases(cache_dir: PathBuf) -> Result<(), Report> { Ok(()) } + +fn parse_dir_name(entry: std::fs::DirEntry) -> Option { + if let Ok(file_type) = entry.file_type() { + if file_type.is_dir() { + if let Ok(dir_name) = entry.file_name().into_string() { + return Some(dir_name); + } + } + } + None +} + +fn parse_version_number(dir_name: String) -> Option { + if dir_name.len() >= 2 && dir_name.starts_with('v') { + if let Some(potential_version_number) = dir_name.strip_prefix('v') { + return Some( + potential_version_number + .to_string() + .parse() + .unwrap_or(u32::MAX), + ); + } + } + None +} From 189970a85470be44db49c53136b3fc07a6aa8d93 Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Wed, 15 Jun 2022 18:16:54 -0300 Subject: [PATCH 05/12] move `check_and_delete_old_databases` to state --- zebra-state/src/config.rs | 66 +++++++++++++++++++++++++++++++++++- zebra-state/src/lib.rs | 2 +- zebrad/src/commands/start.rs | 59 ++------------------------------ 3 files changed, 68 insertions(+), 59 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index 882db5df23a..ec4e66045f4 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -1,4 +1,7 @@ -use std::path::PathBuf; +use std::{ + fs::{remove_dir_all, DirEntry, ReadDir}, + path::PathBuf, +}; use serde::{Deserialize, Serialize}; @@ -119,3 +122,64 @@ impl Default for Config { } } } + +/// Check if there are old database folders and delete them from the filesystem. +/// +/// Iterate over the files and directories in the databases folder and delete if: +/// - The state directory exists. +/// - The entry is a directory. +/// - The directory name has a lenght of at least 2 characters. +/// - The directory name has a prefix `v`. +/// - The directory name without the prefix can be parsed as an unsigned number. +/// - The parsed number is lower than the hardcoded `DATABASE_FORMAT_VERSION`. +pub fn check_and_delete_old_databases(cache_dir: PathBuf) { + let cache_dir = cache_dir.join("state"); + if let Some(read_dir) = read_dir(cache_dir.clone()) { + for entry in read_dir.flatten() { + if let Some(dir_name) = parse_dir_name(entry) { + if let Some(version_number) = parse_version_number(dir_name.clone()) { + if version_number < crate::constants::DATABASE_FORMAT_VERSION { + let delete_path = cache_dir.join(dir_name); + if remove_dir_all(delete_path.clone()).is_ok() { + info!("deleted outdated state directory {:?}", delete_path); + } + } + } + } + } + } +} + +fn read_dir(cache_dir: PathBuf) -> Option { + if cache_dir.exists() { + if let Ok(read_dir) = cache_dir.read_dir() { + return Some(read_dir); + } + } + None +} + +fn parse_dir_name(entry: DirEntry) -> Option { + if let Ok(file_type) = entry.file_type() { + if file_type.is_dir() { + if let Ok(dir_name) = entry.file_name().into_string() { + return Some(dir_name); + } + } + } + None +} + +fn parse_version_number(dir_name: String) -> Option { + if dir_name.len() >= 2 && dir_name.starts_with('v') { + if let Some(potential_version_number) = dir_name.strip_prefix('v') { + return Some( + potential_version_number + .to_string() + .parse() + .unwrap_or(u32::MAX), + ); + } + } + None +} diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index 3e8601bd934..86d87d9aeda 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -28,7 +28,7 @@ mod util; #[cfg(test)] mod tests; -pub use config::Config; +pub use config::{check_and_delete_old_databases, Config}; pub use constants::MAX_BLOCK_REORG_HEIGHT; pub use error::{BoxError, CloneError, CommitBlockError, ValidateContextError}; pub use request::{FinalizedBlock, HashOrHeight, PreparedBlock, ReadRequest, Request}; diff --git a/zebrad/src/commands/start.rs b/zebrad/src/commands/start.rs index 0cb45dc002d..e87c9715695 100644 --- a/zebrad/src/commands/start.rs +++ b/zebrad/src/commands/start.rs @@ -59,7 +59,7 @@ //! * answers RPC client requests using the State Service and Mempool Service //! * submits client transactions to the node's mempool -use std::{cmp::max, fs::remove_dir_all, ops::Add, path::PathBuf, time::Duration}; +use std::{cmp::max, ops::Add, time::Duration}; use abscissa_core::{config, Command, FrameworkError, Options, Runnable}; use chrono::Utc; @@ -107,7 +107,7 @@ impl StartCmd { if !config.state.ephemeral && config.state.delete_old_database { info!("checking old database versions"); - check_and_delete_old_databases(config.state.cache_dir.clone())?; + zebra_state::check_and_delete_old_databases(config.state.cache_dir.clone()); } info!("initializing node state"); @@ -606,58 +606,3 @@ impl config::Override for StartCmd { Ok(config) } } - -/// Check if there are old database folders and delete them from the filesystem. -/// -/// Iterate over the files and directories in the databases folder and delete if: -/// - The state directory exists. -/// - The entry is a directory. -/// - The directory name has a lenght of at least 2 characters. -/// - The directory name has a prefix `v`. -/// - The directory name without the prefix can be parsed as an unsigned number. -/// - The parsed number is lower than the hardcoded `DATABASE_FORMAT_VERSION`. -fn check_and_delete_old_databases(cache_dir: PathBuf) -> Result<(), Report> { - let cache_dir = cache_dir.join("state"); - if cache_dir.exists() { - for entry in (cache_dir.read_dir()?).flatten() { - if let Some(dir_name) = parse_dir_name(entry) { - if let Some(version_number) = parse_version_number(dir_name.clone()) { - if version_number < zebra_state::constants::DATABASE_FORMAT_VERSION { - let delete_path = cache_dir.join(dir_name); - info!("deleting outdated state directory {:?}", delete_path,); - if remove_dir_all(delete_path).is_err() { - continue; - } - } - } - } - } - } - - Ok(()) -} - -fn parse_dir_name(entry: std::fs::DirEntry) -> Option { - if let Ok(file_type) = entry.file_type() { - if file_type.is_dir() { - if let Ok(dir_name) = entry.file_name().into_string() { - return Some(dir_name); - } - } - } - None -} - -fn parse_version_number(dir_name: String) -> Option { - if dir_name.len() >= 2 && dir_name.starts_with('v') { - if let Some(potential_version_number) = dir_name.strip_prefix('v') { - return Some( - potential_version_number - .to_string() - .parse() - .unwrap_or(u32::MAX), - ); - } - } - None -} From 7a57981f1e31466e35088489babd5f367584253c Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Wed, 15 Jun 2022 18:57:17 -0300 Subject: [PATCH 06/12] spawn `check_and_delete_old_databases` --- zebra-state/src/config.rs | 24 ++++++++++++++---------- zebrad/src/commands/start.rs | 19 +++++++++++++++---- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index ec4e66045f4..62aeb23a12d 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -132,16 +132,20 @@ impl Default for Config { /// - The directory name has a prefix `v`. /// - The directory name without the prefix can be parsed as an unsigned number. /// - The parsed number is lower than the hardcoded `DATABASE_FORMAT_VERSION`. -pub fn check_and_delete_old_databases(cache_dir: PathBuf) { - let cache_dir = cache_dir.join("state"); - if let Some(read_dir) = read_dir(cache_dir.clone()) { - for entry in read_dir.flatten() { - if let Some(dir_name) = parse_dir_name(entry) { - if let Some(version_number) = parse_version_number(dir_name.clone()) { - if version_number < crate::constants::DATABASE_FORMAT_VERSION { - let delete_path = cache_dir.join(dir_name); - if remove_dir_all(delete_path.clone()).is_ok() { - info!("deleted outdated state directory {:?}", delete_path); +pub async fn check_and_delete_old_databases(config: Config) { + if !config.ephemeral && config.delete_old_database { + info!("checking old database versions"); + + let cache_dir = config.cache_dir.join("state"); + if let Some(read_dir) = read_dir(cache_dir.clone()) { + for entry in read_dir.flatten() { + if let Some(dir_name) = parse_dir_name(entry) { + if let Some(version_number) = parse_version_number(dir_name.clone()) { + if version_number < crate::constants::DATABASE_FORMAT_VERSION { + let delete_path = cache_dir.join(dir_name); + if remove_dir_all(delete_path.clone()).is_ok() { + info!("deleted outdated state directory {:?}", delete_path); + } } } } diff --git a/zebrad/src/commands/start.rs b/zebrad/src/commands/start.rs index e87c9715695..f7471ccf4fc 100644 --- a/zebrad/src/commands/start.rs +++ b/zebrad/src/commands/start.rs @@ -105,10 +105,9 @@ impl StartCmd { let config = app_config().clone(); info!(?config); - if !config.state.ephemeral && config.state.delete_old_database { - info!("checking old database versions"); - zebra_state::check_and_delete_old_databases(config.state.cache_dir.clone()); - } + let mut old_databases_task_handle = tokio::spawn( + zebra_state::check_and_delete_old_databases(config.state.clone()).in_current_span(), + ); info!("initializing node state"); let (state_service, read_only_state_service, latest_chain_tip, chain_tip_change) = @@ -234,6 +233,8 @@ impl StartCmd { // startup tasks let groth16_download_handle_fused = (&mut groth16_download_handle).fuse(); pin!(groth16_download_handle_fused); + let old_databases_task_handle_fused = (&mut old_databases_task_handle).fuse(); + pin!(old_databases_task_handle_fused); // Wait for tasks to finish let exit_status = loop { @@ -296,6 +297,16 @@ impl StartCmd { exit_when_task_finishes = false; Ok(()) } + + // The same for the old databases task, we expect it to finish while Zebra is running. + old_databases_result = &mut old_databases_task_handle_fused => { + old_databases_result + .unwrap_or_else(|_| panic!( + "unexpected panic deleting old database directories")); + + exit_when_task_finishes = false; + Ok(()) + } }; // Stop Zebra if a task finished and returned an error, From 5c3595a590177a6d17467f4a2dbe842cb3c107ef Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Wed, 15 Jun 2022 19:21:29 -0300 Subject: [PATCH 07/12] simplity a bit --- zebra-state/src/config.rs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index 62aeb23a12d..451771b6fd0 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -128,24 +128,25 @@ impl Default for Config { /// Iterate over the files and directories in the databases folder and delete if: /// - The state directory exists. /// - The entry is a directory. -/// - The directory name has a lenght of at least 2 characters. +/// - The directory name has a length of at least 2 characters. /// - The directory name has a prefix `v`. /// - The directory name without the prefix can be parsed as an unsigned number. /// - The parsed number is lower than the hardcoded `DATABASE_FORMAT_VERSION`. pub async fn check_and_delete_old_databases(config: Config) { - if !config.ephemeral && config.delete_old_database { - info!("checking old database versions"); - - let cache_dir = config.cache_dir.join("state"); - if let Some(read_dir) = read_dir(cache_dir.clone()) { - for entry in read_dir.flatten() { - if let Some(dir_name) = parse_dir_name(entry) { - if let Some(version_number) = parse_version_number(dir_name.clone()) { - if version_number < crate::constants::DATABASE_FORMAT_VERSION { - let delete_path = cache_dir.join(dir_name); - if remove_dir_all(delete_path.clone()).is_ok() { - info!("deleted outdated state directory {:?}", delete_path); - } + if config.ephemeral || !config.delete_old_database { + return; + } + + info!("checking old database versions"); + let cache_dir = config.cache_dir.join("state"); + if let Some(read_dir) = read_dir(cache_dir.clone()) { + for entry in read_dir.flatten() { + if let Some(dir_name) = parse_dir_name(entry) { + if let Some(version_number) = parse_version_number(dir_name.clone()) { + if version_number < crate::constants::DATABASE_FORMAT_VERSION { + let delete_path = cache_dir.join(dir_name); + if remove_dir_all(delete_path.clone()).is_ok() { + info!("deleted outdated state directory {:?}", delete_path); } } } From 18916897995efe334ae1b00f95ef8083833eec80 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 16 Jun 2022 22:11:40 +1000 Subject: [PATCH 08/12] fix(state): only delete old database directories inside the cache directory (#4631) * Add function comments, tweak log * Simplify version parsing * Use spawn_blocking to launch the task on a separate thread, do the cleanup last * Abort the cleanup task when Zebra exits * Split directory deletion into its own function, handle ownership * Rename cache_dir to state_dir * If an outdated state directory is outside the cache directory, don't delete it * Minimise diffs --- zebra-state/src/config.rs | 124 +++++++++++++++++++++++++---------- zebrad/src/commands/start.rs | 10 +-- 2 files changed, 96 insertions(+), 38 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index 451771b6fd0..555bd2a5c84 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -1,9 +1,13 @@ +//! Cached state configuration for Zebra. + use std::{ - fs::{remove_dir_all, DirEntry, ReadDir}, - path::PathBuf, + fs::{canonicalize, remove_dir_all, DirEntry, ReadDir}, + path::{Path, PathBuf}, }; use serde::{Deserialize, Serialize}; +use tokio::task::{spawn_blocking, JoinHandle}; +use tracing::Span; use zebra_chain::parameters::Network; @@ -123,48 +127,105 @@ impl Default for Config { } } -/// Check if there are old database folders and delete them from the filesystem. +// Cleaning up old database versions + +/// Spawns a task that checks if there are old database folders, +/// and deletes them from the filesystem. /// /// Iterate over the files and directories in the databases folder and delete if: /// - The state directory exists. /// - The entry is a directory. -/// - The directory name has a length of at least 2 characters. /// - The directory name has a prefix `v`. /// - The directory name without the prefix can be parsed as an unsigned number. /// - The parsed number is lower than the hardcoded `DATABASE_FORMAT_VERSION`. -pub async fn check_and_delete_old_databases(config: Config) { +pub fn check_and_delete_old_databases(config: Config) -> JoinHandle<()> { + let current_span = Span::current(); + + spawn_blocking(move || { + current_span.in_scope(|| { + delete_old_databases(config); + info!("finished old database version cleanup task"); + }) + }) +} + +/// Check if there are old database folders and delete them from the filesystem. +/// +/// See [`check_and_delete_old_databases`] for details. +fn delete_old_databases(config: Config) { if config.ephemeral || !config.delete_old_database { return; } - info!("checking old database versions"); - let cache_dir = config.cache_dir.join("state"); - if let Some(read_dir) = read_dir(cache_dir.clone()) { - for entry in read_dir.flatten() { - if let Some(dir_name) = parse_dir_name(entry) { - if let Some(version_number) = parse_version_number(dir_name.clone()) { - if version_number < crate::constants::DATABASE_FORMAT_VERSION { - let delete_path = cache_dir.join(dir_name); - if remove_dir_all(delete_path.clone()).is_ok() { - info!("deleted outdated state directory {:?}", delete_path); - } - } - } + info!("checking for old database versions"); + + let state_dir = config.cache_dir.join("state"); + if let Some(state_dir) = read_dir(&state_dir) { + for entry in state_dir.flatten() { + let deleted_state = check_and_delete_database(&config, &entry); + + if let Some(deleted_state) = deleted_state { + info!(?deleted_state, "deleted outdated state directory"); } } } } -fn read_dir(cache_dir: PathBuf) -> Option { - if cache_dir.exists() { - if let Ok(read_dir) = cache_dir.read_dir() { +/// Return a `ReadDir` for `dir`, after checking that `dir` exists and can be read. +/// +/// Returns `None` if any operation fails. +fn read_dir(dir: &Path) -> Option { + if dir.exists() { + if let Ok(read_dir) = dir.read_dir() { return Some(read_dir); } } None } -fn parse_dir_name(entry: DirEntry) -> Option { +/// Check if `entry` is an old database directory, and delete it from the filesystem. +/// See [`check_and_delete_old_databases`] for details. +/// +/// If the directory was deleted, returns its path. +fn check_and_delete_database(config: &Config, entry: &DirEntry) -> Option { + let dir_name = parse_dir_name(entry)?; + let version_number = parse_version_number(&dir_name)?; + + if version_number >= crate::constants::DATABASE_FORMAT_VERSION { + return None; + } + + let outdated_path = entry.path(); + + // # Correctness + // + // Check that the path we're about to delete is inside the cache directory. + // If the user has symlinked the outdated state directory to a non-cache directory, + // we don't want to delete it, because it might contain other files. + // + // We don't attempt to guard against malicious symlinks created by attackers + // (TOCTOU attacks). Zebra should not be run with elevated privileges. + let cache_path = canonicalize(&config.cache_dir).ok()?; + let outdated_path = canonicalize(outdated_path).ok()?; + + if !outdated_path.starts_with(&cache_path) { + info!( + skipped_path = ?outdated_path, + ?cache_path, + "skipped cleanup of outdated state directory: state is outside cache directory", + ); + + return None; + } + + remove_dir_all(&outdated_path).ok().map(|()| outdated_path) +} + +/// Check if `entry` is a directory with a valid UTF-8 name. +/// (State directory names are guaranteed to be UTF-8.) +/// +/// Returns `None` if any operation fails. +fn parse_dir_name(entry: &DirEntry) -> Option { if let Ok(file_type) = entry.file_type() { if file_type.is_dir() { if let Ok(dir_name) = entry.file_name().into_string() { @@ -175,16 +236,11 @@ fn parse_dir_name(entry: DirEntry) -> Option { None } -fn parse_version_number(dir_name: String) -> Option { - if dir_name.len() >= 2 && dir_name.starts_with('v') { - if let Some(potential_version_number) = dir_name.strip_prefix('v') { - return Some( - potential_version_number - .to_string() - .parse() - .unwrap_or(u32::MAX), - ); - } - } - None +/// Parse the state version number from `dir_name`. +/// +/// Returns `None` if parsing fails, or the directory name is not in the expected format. +fn parse_version_number(dir_name: &str) -> Option { + dir_name + .strip_prefix('v') + .and_then(|version| version.parse().ok()) } diff --git a/zebrad/src/commands/start.rs b/zebrad/src/commands/start.rs index f7471ccf4fc..ec2cf8b3ec7 100644 --- a/zebrad/src/commands/start.rs +++ b/zebrad/src/commands/start.rs @@ -37,6 +37,8 @@ //! * contextually verifies blocks //! * handles in-memory storage of multiple non-finalized chains //! * handles permanent storage of the best finalized chain +//! * Old State Version Cleanup Task +//! * deletes outdated state versions //! * Block Gossip Task //! * runs in the background and continuously queries the state for //! newly committed blocks to be gossiped to peers @@ -105,10 +107,6 @@ impl StartCmd { let config = app_config().clone(); info!(?config); - let mut old_databases_task_handle = tokio::spawn( - zebra_state::check_and_delete_old_databases(config.state.clone()).in_current_span(), - ); - info!("initializing node state"); let (state_service, read_only_state_service, latest_chain_tip, chain_tip_change) = zebra_state::init(config.state.clone(), config.network.network); @@ -216,6 +214,9 @@ impl StartCmd { .in_current_span(), ); + let mut old_databases_task_handle = + zebra_state::check_and_delete_old_databases(config.state.clone()); + info!("spawned initial Zebra tasks"); // TODO: put tasks into an ongoing FuturesUnordered and a startup FuturesUnordered? @@ -334,6 +335,7 @@ impl StartCmd { // startup tasks groth16_download_handle.abort(); + old_databases_task_handle.abort(); exit_status } From 5a707e1fad1028dc3de6236fb56970f307bb65c4 Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Fri, 17 Jun 2022 13:11:59 -0300 Subject: [PATCH 09/12] add test --- zebrad/tests/acceptance.rs | 60 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 7c4e9022dd7..91e63067daf 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -1638,6 +1638,66 @@ async fn fully_synced_rpc_test() -> Result<()> { Ok(()) } +#[tokio::test] +async fn delete_old_databases() -> Result<()> { + use std::fs::create_dir; + + zebra_test::init(); + + let mut config = default_test_config()?; + let run_dir = testdir()?; + let cache_dir = run_dir.path().join("state"); + + // create cache dir + create_dir(cache_dir.clone())?; + + // create a v1 dir outside cache dir that should not be deleted + let outside_dir = run_dir.path().join("v1"); + create_dir(&outside_dir)?; + assert!(outside_dir.as_path().exists()); + + // create a `v1` dir insidecache dir that should be deleted + let inside_dir = cache_dir.join("v1"); + create_dir(&inside_dir)?; + assert!(inside_dir.as_path().exists()); + + // modify config with our cache dir and not ephimeral configuration + // (delete old databases function will not run when epehemeral = true) + config.state.cache_dir = cache_dir; + config.state.ephemeral = false; + + // run zebra with our config + let mut child = run_dir + .with_config(&mut config)? + .spawn_child(args!["start"])?; + + // delete checker running + child.expect_stdout_line_matches("checking for old database versions".to_string())?; + + // inside dir was deleted + child.expect_stdout_line_matches(format!( + "deleted outdated state directory deleted_state={:?}", + inside_dir + ))?; + assert!(!inside_dir.as_path().exists()); + + // outside dir was not deleted + assert!(outside_dir.as_path().exists()); + + // finish + child.kill()?; + + let output = child.wait_with_output()?; + let output = output.assert_failure()?; + + // [Note on port conflict](#Note on port conflict) + output + .assert_was_killed() + .wrap_err("Possible port conflict. Are there other acceptance tests running?")?; + + Ok(()) +} + /// Test sending transactions using a lightwalletd instance connected to a zebrad instance. /// /// See [`common::lightwalletd::send_transaction_test`] for more information. From 8a537b50caebd77e80a452e659e7fe4ea8086879 Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Mon, 20 Jun 2022 10:25:14 -0300 Subject: [PATCH 10/12] fix typos Co-authored-by: teor --- zebrad/tests/acceptance.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index a0ce2902030..64e31d8bd78 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -1660,13 +1660,13 @@ async fn delete_old_databases() -> Result<()> { create_dir(&outside_dir)?; assert!(outside_dir.as_path().exists()); - // create a `v1` dir insidecache dir that should be deleted + // create a `v1` dir inside cache dir that should be deleted let inside_dir = cache_dir.join("v1"); create_dir(&inside_dir)?; assert!(inside_dir.as_path().exists()); - // modify config with our cache dir and not ephimeral configuration - // (delete old databases function will not run when epehemeral = true) + // modify config with our cache dir and not ephemeral configuration + // (delete old databases function will not run when ephemeral = true) config.state.cache_dir = cache_dir; config.state.ephemeral = false; From 6cd8f57b7b3cd64dcad22ee20eac821f50bffdbe Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Mon, 20 Jun 2022 14:38:46 -0300 Subject: [PATCH 11/12] add `canonicalize` to test regex --- zebrad/tests/acceptance.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 64e31d8bd78..fdcea8af669 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -1644,7 +1644,7 @@ async fn fully_synced_rpc_test() -> Result<()> { #[tokio::test] async fn delete_old_databases() -> Result<()> { - use std::fs::create_dir; + use std::fs::{canonicalize, create_dir}; zebra_test::init(); @@ -1663,6 +1663,7 @@ async fn delete_old_databases() -> Result<()> { // create a `v1` dir inside cache dir that should be deleted let inside_dir = cache_dir.join("v1"); create_dir(&inside_dir)?; + let canonicalized_inside_dir = canonicalize(inside_dir.clone()).ok().unwrap(); assert!(inside_dir.as_path().exists()); // modify config with our cache dir and not ephemeral configuration @@ -1681,7 +1682,7 @@ async fn delete_old_databases() -> Result<()> { // inside dir was deleted child.expect_stdout_line_matches(format!( "deleted outdated state directory deleted_state={:?}", - inside_dir + canonicalized_inside_dir ))?; assert!(!inside_dir.as_path().exists()); From c95f46891481ef7c49c2552fda3d11d93176bc1f Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Mon, 20 Jun 2022 14:55:22 -0300 Subject: [PATCH 12/12] add another match to test --- zebrad/tests/acceptance.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index fdcea8af669..f969febd87e 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -1686,6 +1686,9 @@ async fn delete_old_databases() -> Result<()> { ))?; assert!(!inside_dir.as_path().exists()); + // deleting old databases task ended + child.expect_stdout_line_matches("finished old database version cleanup task".to_string())?; + // outside dir was not deleted assert!(outside_dir.as_path().exists());