Skip to content

Commit

Permalink
Merge of #4586
Browse files Browse the repository at this point in the history
  • Loading branch information
mergify[bot] authored Jun 20, 2022
2 parents 961fcb6 + c95f468 commit 0ece36b
Show file tree
Hide file tree
Showing 4 changed files with 217 additions and 2 deletions.
135 changes: 134 additions & 1 deletion zebra-state/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
use std::path::PathBuf;
//! Cached state configuration for Zebra.
use std::{
fs::{canonicalize, remove_dir_all, DirEntry, ReadDir},
path::{Path, PathBuf},
};

use serde::{Deserialize, Serialize};
use tokio::task::{spawn_blocking, JoinHandle};
use tracing::Span;

use zebra_chain::parameters::Network;

Expand Down Expand Up @@ -57,6 +64,13 @@ pub struct Config {
///
/// Set to `None` by default: Zebra continues syncing indefinitely.
pub debug_stop_at_height: Option<u32>,

/// Whether to delete the old database directories when present.
///
/// Set to `true` by default. If this is set to `false`,
/// no check for old database versions will be made and nothing will be
/// deleted.
pub delete_old_database: bool,
}

fn gen_temp_path(prefix: &str) -> PathBuf {
Expand Down Expand Up @@ -108,6 +122,125 @@ impl Default for Config {
cache_dir,
ephemeral: false,
debug_stop_at_height: None,
delete_old_database: true,
}
}
}

// Cleaning up old database versions

/// Spawns a task that checks if there are old database folders,
/// and deletes them from the filesystem.
///
/// Iterate over the files and directories in the databases folder and delete if:
/// - The state directory exists.
/// - The entry is a directory.
/// - The directory name has a prefix `v`.
/// - The directory name without the prefix can be parsed as an unsigned number.
/// - The parsed number is lower than the hardcoded `DATABASE_FORMAT_VERSION`.
pub fn check_and_delete_old_databases(config: Config) -> JoinHandle<()> {
let current_span = Span::current();

spawn_blocking(move || {
current_span.in_scope(|| {
delete_old_databases(config);
info!("finished old database version cleanup task");
})
})
}

/// Check if there are old database folders and delete them from the filesystem.
///
/// See [`check_and_delete_old_databases`] for details.
fn delete_old_databases(config: Config) {
if config.ephemeral || !config.delete_old_database {
return;
}

info!("checking for old database versions");

let state_dir = config.cache_dir.join("state");
if let Some(state_dir) = read_dir(&state_dir) {
for entry in state_dir.flatten() {
let deleted_state = check_and_delete_database(&config, &entry);

if let Some(deleted_state) = deleted_state {
info!(?deleted_state, "deleted outdated state directory");
}
}
}
}

/// Return a `ReadDir` for `dir`, after checking that `dir` exists and can be read.
///
/// Returns `None` if any operation fails.
fn read_dir(dir: &Path) -> Option<ReadDir> {
if dir.exists() {
if let Ok(read_dir) = dir.read_dir() {
return Some(read_dir);
}
}
None
}

/// Check if `entry` is an old database directory, and delete it from the filesystem.
/// See [`check_and_delete_old_databases`] for details.
///
/// If the directory was deleted, returns its path.
fn check_and_delete_database(config: &Config, entry: &DirEntry) -> Option<PathBuf> {
let dir_name = parse_dir_name(entry)?;
let version_number = parse_version_number(&dir_name)?;

if version_number >= crate::constants::DATABASE_FORMAT_VERSION {
return None;
}

let outdated_path = entry.path();

// # Correctness
//
// Check that the path we're about to delete is inside the cache directory.
// If the user has symlinked the outdated state directory to a non-cache directory,
// we don't want to delete it, because it might contain other files.
//
// We don't attempt to guard against malicious symlinks created by attackers
// (TOCTOU attacks). Zebra should not be run with elevated privileges.
let cache_path = canonicalize(&config.cache_dir).ok()?;
let outdated_path = canonicalize(outdated_path).ok()?;

if !outdated_path.starts_with(&cache_path) {
info!(
skipped_path = ?outdated_path,
?cache_path,
"skipped cleanup of outdated state directory: state is outside cache directory",
);

return None;
}

remove_dir_all(&outdated_path).ok().map(|()| outdated_path)
}

/// Check if `entry` is a directory with a valid UTF-8 name.
/// (State directory names are guaranteed to be UTF-8.)
///
/// Returns `None` if any operation fails.
fn parse_dir_name(entry: &DirEntry) -> Option<String> {
if let Ok(file_type) = entry.file_type() {
if file_type.is_dir() {
if let Ok(dir_name) = entry.file_name().into_string() {
return Some(dir_name);
}
}
}
None
}

/// Parse the state version number from `dir_name`.
///
/// Returns `None` if parsing fails, or the directory name is not in the expected format.
fn parse_version_number(dir_name: &str) -> Option<u32> {
dir_name
.strip_prefix('v')
.and_then(|version| version.parse().ok())
}
2 changes: 1 addition & 1 deletion zebra-state/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ mod util;
#[cfg(test)]
mod tests;

pub use config::Config;
pub use config::{check_and_delete_old_databases, Config};
pub use constants::MAX_BLOCK_REORG_HEIGHT;
pub use error::{BoxError, CloneError, CommitBlockError, ValidateContextError};
pub use request::{FinalizedBlock, HashOrHeight, PreparedBlock, ReadRequest, Request};
Expand Down
18 changes: 18 additions & 0 deletions zebrad/src/commands/start.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
//! * contextually verifies blocks
//! * handles in-memory storage of multiple non-finalized chains
//! * handles permanent storage of the best finalized chain
//! * Old State Version Cleanup Task
//! * deletes outdated state versions
//! * Block Gossip Task
//! * runs in the background and continuously queries the state for
//! newly committed blocks to be gossiped to peers
Expand Down Expand Up @@ -218,6 +220,9 @@ impl StartCmd {
.in_current_span(),
);

let mut old_databases_task_handle =
zebra_state::check_and_delete_old_databases(config.state.clone());

info!("spawned initial Zebra tasks");

// TODO: put tasks into an ongoing FuturesUnordered and a startup FuturesUnordered?
Expand All @@ -235,6 +240,8 @@ impl StartCmd {
// startup tasks
let groth16_download_handle_fused = (&mut groth16_download_handle).fuse();
pin!(groth16_download_handle_fused);
let old_databases_task_handle_fused = (&mut old_databases_task_handle).fuse();
pin!(old_databases_task_handle_fused);

// Wait for tasks to finish
let exit_status = loop {
Expand Down Expand Up @@ -297,6 +304,16 @@ impl StartCmd {
exit_when_task_finishes = false;
Ok(())
}

// The same for the old databases task, we expect it to finish while Zebra is running.
old_databases_result = &mut old_databases_task_handle_fused => {
old_databases_result
.unwrap_or_else(|_| panic!(
"unexpected panic deleting old database directories"));

exit_when_task_finishes = false;
Ok(())
}
};

// Stop Zebra if a task finished and returned an error,
Expand Down Expand Up @@ -324,6 +341,7 @@ impl StartCmd {

// startup tasks
groth16_download_handle.abort();
old_databases_task_handle.abort();

exit_status
}
Expand Down
64 changes: 64 additions & 0 deletions zebrad/tests/acceptance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1642,6 +1642,70 @@ async fn fully_synced_rpc_test() -> Result<()> {
Ok(())
}

#[tokio::test]
async fn delete_old_databases() -> Result<()> {
use std::fs::{canonicalize, create_dir};

zebra_test::init();

let mut config = default_test_config()?;
let run_dir = testdir()?;
let cache_dir = run_dir.path().join("state");

// create cache dir
create_dir(cache_dir.clone())?;

// create a v1 dir outside cache dir that should not be deleted
let outside_dir = run_dir.path().join("v1");
create_dir(&outside_dir)?;
assert!(outside_dir.as_path().exists());

// create a `v1` dir inside cache dir that should be deleted
let inside_dir = cache_dir.join("v1");
create_dir(&inside_dir)?;
let canonicalized_inside_dir = canonicalize(inside_dir.clone()).ok().unwrap();
assert!(inside_dir.as_path().exists());

// modify config with our cache dir and not ephemeral configuration
// (delete old databases function will not run when ephemeral = true)
config.state.cache_dir = cache_dir;
config.state.ephemeral = false;

// run zebra with our config
let mut child = run_dir
.with_config(&mut config)?
.spawn_child(args!["start"])?;

// delete checker running
child.expect_stdout_line_matches("checking for old database versions".to_string())?;

// inside dir was deleted
child.expect_stdout_line_matches(format!(
"deleted outdated state directory deleted_state={:?}",
canonicalized_inside_dir
))?;
assert!(!inside_dir.as_path().exists());

// deleting old databases task ended
child.expect_stdout_line_matches("finished old database version cleanup task".to_string())?;

// outside dir was not deleted
assert!(outside_dir.as_path().exists());

// finish
child.kill()?;

let output = child.wait_with_output()?;
let output = output.assert_failure()?;

// [Note on port conflict](#Note on port conflict)
output
.assert_was_killed()
.wrap_err("Possible port conflict. Are there other acceptance tests running?")?;

Ok(())
}

/// Test sending transactions using a lightwalletd instance connected to a zebrad instance.
///
/// See [`common::lightwalletd::send_transaction_test`] for more information.
Expand Down

0 comments on commit 0ece36b

Please sign in to comment.