Skip to content

Commit

Permalink
separate BlockBasedLog::open() from ::claim() (openzfs#506)
Browse files Browse the repository at this point in the history
Introduce new claim() functions to claim space with the ExtentAllocator
when opening a cache, instead of doing it as a side effect of open().

Use the "builder" pattern to initialize the ExtentAllocator from a
consistent state, ensuring that we don't try to claim after we start
allocating.

Add a heruistic check for leaked ExtentAllocator space.

Code cleanup; no functional change.
  • Loading branch information
ahrens authored Oct 16, 2021
1 parent 020b661 commit f19d419
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 31 deletions.
7 changes: 6 additions & 1 deletion cmd/zfs_object_agent/zettacache/src/block_allocator.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::base_types::*;
use crate::block_access::BlockAccess;
use crate::extent_allocator::ExtentAllocator;
use crate::extent_allocator::{ExtentAllocator, ExtentAllocatorBuilder};
use crate::space_map::{SpaceMap, SpaceMapEntry, SpaceMapPhys};
use crate::zettacache::DEFAULT_SLAB_SIZE;
use lazy_static::lazy_static;
Expand Down Expand Up @@ -1316,6 +1316,11 @@ impl BlockAllocatorPhys {
slab_buckets: DEFAULT_SLAB_BUCKETS.clone(),
}
}

pub fn claim(&self, builder: &mut ExtentAllocatorBuilder) {
self.spacemap.claim(builder);
self.spacemap_next.claim(builder);
}
}

pub async fn zcachedb_dump_spacemaps(
Expand Down
23 changes: 17 additions & 6 deletions cmd/zfs_object_agent/zettacache/src/block_based_log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::base_types::*;
use crate::block_access::BlockAccess;
use crate::block_access::EncodeType;
use crate::extent_allocator::ExtentAllocator;
use crate::extent_allocator::ExtentAllocatorBuilder;
use anyhow::Context;
use async_stream::stream;
use futures::stream::FuturesUnordered;
Expand All @@ -26,7 +27,7 @@ use util::get_tunable;

lazy_static! {
// XXX maybe this is wasteful for the smaller logs?
static ref DEFAULT_EXTENT_SIZE: u64 = get_tunable("default_extent_size", 128 * 1024 * 1024);
pub static ref DEFAULT_EXTENT_SIZE: u64 = get_tunable("default_extent_size", 128 * 1024 * 1024);
static ref ENTRIES_PER_CHUNK: usize = get_tunable("entries_per_chunk", 200);
}

Expand Down Expand Up @@ -59,13 +60,19 @@ impl<T: BlockBasedLogEntry> Default for BlockBasedLogPhys<T> {
}

impl<T: BlockBasedLogEntry> BlockBasedLogPhys<T> {
pub fn clear(&mut self, extent_allocator: Arc<ExtentAllocator>) {
pub fn clear(&mut self, extent_allocator: &ExtentAllocator) {
for extent in self.extents.values() {
extent_allocator.free(extent);
}
*self = Default::default();
}

pub fn claim(&self, builder: &mut ExtentAllocatorBuilder) {
for extent in self.extents.values() {
builder.claim(extent);
}
}

pub fn iter_chunks(
&self,
block_access: Arc<BlockAccess>,
Expand Down Expand Up @@ -147,6 +154,13 @@ impl<T: BlockBasedLogEntry> Default for BlockBasedLogWithSummaryPhys<T> {
}
}

impl<T: BlockBasedLogEntry> BlockBasedLogWithSummaryPhys<T> {
pub fn claim(&self, builder: &mut ExtentAllocatorBuilder) {
self.this.claim(builder);
self.chunk_summary.claim(builder);
}
}

pub trait BlockBasedLogEntry: 'static + OnDisk + Copy + Clone + Unpin + Send + Sync {}

#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
Expand Down Expand Up @@ -186,9 +200,6 @@ impl<T: BlockBasedLogEntry> BlockBasedLog<T> {
extent_allocator: Arc<ExtentAllocator>,
phys: BlockBasedLogPhys<T>,
) -> BlockBasedLog<T> {
for (_offset, extent) in phys.extents.iter() {
extent_allocator.claim(extent);
}
BlockBasedLog {
block_access,
extent_allocator,
Expand Down Expand Up @@ -288,7 +299,7 @@ impl<T: BlockBasedLogEntry> BlockBasedLog<T> {

pub fn clear(&mut self) {
self.pending_entries.clear();
self.phys.clear(self.extent_allocator.clone());
self.phys.clear(&self.extent_allocator);
}

fn next_write_location(&self) -> Extent {
Expand Down
51 changes: 34 additions & 17 deletions cmd/zfs_object_agent/zettacache/src/extent_allocator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ pub struct ExtentAllocatorPhys {
}

pub struct ExtentAllocator {
// XXX there's no real point of having a mutex here since this is only
// accessed under the big zettacache lock
state: std::sync::Mutex<ExtentAllocatorState>,
}

Expand All @@ -24,19 +22,42 @@ struct ExtentAllocatorState {
freeing: RangeTree, // not yet available for reallocation until this checkpoint completes
}

/// Note: no on-disk representation. Allocated extents must be .claim()ed
/// before .allocate()ing additional extents.
impl ExtentAllocator {
pub fn open(phys: &ExtentAllocatorPhys) -> ExtentAllocator {
pub struct ExtentAllocatorBuilder {
phys: ExtentAllocatorPhys,
allocatable: RangeTree,
}

impl ExtentAllocatorBuilder {
pub fn new(phys: ExtentAllocatorPhys) -> ExtentAllocatorBuilder {
let mut metadata_allocatable = RangeTree::new();
metadata_allocatable.add(
phys.first_valid_offset,
phys.last_valid_offset - phys.first_valid_offset,
);
ExtentAllocatorBuilder {
phys,
allocatable: metadata_allocatable,
}
}

pub fn claim(&mut self, extent: &Extent) {
self.allocatable.remove(extent.location.offset, extent.size);
}

pub fn allocatable_bytes(&self) -> u64 {
self.allocatable.space()
}
}

impl ExtentAllocator {
/// Since the on-disk representation doesn't indicate which extents are
/// allocated, they must all be .claim()ed first, via the
/// ExtentAllocatorBuilder.
pub fn open(builder: ExtentAllocatorBuilder) -> ExtentAllocator {
ExtentAllocator {
state: std::sync::Mutex::new(ExtentAllocatorState {
phys: *phys,
allocatable: metadata_allocatable,
phys: builder.phys,
allocatable: builder.allocatable,
freeing: RangeTree::new(),
}),
}
Expand All @@ -46,6 +67,10 @@ impl ExtentAllocator {
self.state.lock().unwrap().phys
}

pub fn allocatable_bytes(&self) -> u64 {
self.state.lock().unwrap().allocatable.space()
}

pub fn checkpoint_done(&self) {
let mut state = self.state.lock().unwrap();

Expand All @@ -55,14 +80,6 @@ impl ExtentAllocator {
}
}

pub fn claim(&self, extent: &Extent) {
self.state
.lock()
.unwrap()
.allocatable
.remove(extent.location.offset, extent.size);
}

pub fn allocate(&self, min_size: u64, max_size: u64) -> Extent {
let mut state = self.state.lock().unwrap();

Expand Down Expand Up @@ -91,7 +108,7 @@ impl ExtentAllocator {
// XXX the block allocator will keep using this, and overwriting our
// metadata, until we notify it.
panic!(
"no extents of at least {} bytes available; overwriting {} bytes of data blocks at offset {}",
"no extents of at least {} bytes available; need to overwrite {} bytes of data blocks at offset {}",
min_size, max_size, state.phys.last_valid_offset
);
} else {
Expand Down
5 changes: 5 additions & 0 deletions cmd/zfs_object_agent/zettacache/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::base_types::*;
use crate::block_access::*;
use crate::block_based_log::*;
use crate::extent_allocator::ExtentAllocator;
use crate::extent_allocator::ExtentAllocatorBuilder;
use crate::zettacache::AtimeHistogramPhys;
use more_asserts::*;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -56,6 +57,10 @@ impl ZettaCacheIndexPhys {
log: Default::default(),
}
}

pub fn claim(&self, builder: &mut ExtentAllocatorBuilder) {
self.log.claim(builder);
}
}

pub struct ZettaCacheIndex {
Expand Down
5 changes: 5 additions & 0 deletions cmd/zfs_object_agent/zettacache/src/space_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::block_access::*;
use crate::block_allocator::SlabId;
use crate::block_based_log::*;
use crate::extent_allocator::ExtentAllocator;
use crate::extent_allocator::ExtentAllocatorBuilder;
use crate::{
base_types::OnDisk,
block_based_log::{BlockBasedLog, BlockBasedLogEntry},
Expand Down Expand Up @@ -59,6 +60,10 @@ impl SpaceMapPhys {
alloc_entries: 0,
}
}

pub fn claim(&self, builder: &mut ExtentAllocatorBuilder) {
self.log.claim(builder);
}
}

impl SpaceMap {
Expand Down
46 changes: 39 additions & 7 deletions cmd/zfs_object_agent/zettacache/src/zettacache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ use crate::block_access::*;
use crate::block_allocator::zcachedb_dump_spacemaps;
use crate::block_allocator::BlockAllocator;
use crate::block_allocator::BlockAllocatorPhys;
use crate::block_based_log;
use crate::block_based_log::*;
use crate::extent_allocator::ExtentAllocator;
use crate::extent_allocator::ExtentAllocatorBuilder;
use crate::extent_allocator::ExtentAllocatorPhys;
use crate::index::*;
use crate::DumpStructuresOptions;
Expand Down Expand Up @@ -108,11 +110,15 @@ impl ZettaCheckpointPhys {
this
}

/*
fn all_logs(&self) -> Vec<&BlockBasedLogPhys> {
vec![&self.index, &self.chunk_summary, &self.operation_log]
fn claim(&self, builder: &mut ExtentAllocatorBuilder) {
self.block_allocator.claim(builder);
self.index.claim(builder);
self.operation_log.claim(builder);
if let Some((operation_log, index)) = self.merge_progress.as_ref() {
operation_log.claim(builder);
index.claim(builder);
}
}
*/
}

#[derive(Debug, Clone, Copy)]
Expand Down Expand Up @@ -599,7 +605,12 @@ impl ZettaCache {
println!("{:#?}", checkpoint);
}

let extent_allocator = Arc::new(ExtentAllocator::open(&checkpoint.extent_allocator));
let mut builder = ExtentAllocatorBuilder::new(checkpoint.extent_allocator);
// We should be able to get away without claiming the metadata space,
// since we aren't allocating anything, but we may also want to do this
// for verification (e.g. that there aren't overlapping Extents).
checkpoint.claim(&mut builder);
let extent_allocator = Arc::new(ExtentAllocator::open(builder));

if opts.dump_spacemaps {
zcachedb_dump_spacemaps(
Expand Down Expand Up @@ -646,7 +657,10 @@ impl ZettaCache {
metadata_start,
checkpoint.extent_allocator.first_valid_offset
);
let extent_allocator = Arc::new(ExtentAllocator::open(&checkpoint.extent_allocator));

let mut builder = ExtentAllocatorBuilder::new(checkpoint.extent_allocator);
checkpoint.claim(&mut builder);
let extent_allocator = Arc::new(ExtentAllocator::open(builder));

let operation_log = BlockBasedLog::open(
block_access.clone(),
Expand Down Expand Up @@ -1512,6 +1526,24 @@ impl ZettaCacheState {
merge_progress,
};

// There may be some metadata space allocated but not yet part of the
// checkpoint, because the merge task runs concurrently with this and
// may have allocated an extent which it hasn't yet told the checkpoint
// about. However, the amount of this space should be limited. If
// there's a large amount of space that's not accounted for in the
// checkpoint, it likely indicates a leak, e.g. a BlockBasedLog is no
// longer in the Checkpoint but wasn't .clear()'ed.
{
let mut checkpoint_extents = ExtentAllocatorBuilder::new(checkpoint.extent_allocator);
checkpoint.claim(&mut checkpoint_extents);
let checkpoint_bytes = checkpoint_extents.allocatable_bytes();
let allocator_bytes = self.extent_allocator.allocatable_bytes();
if allocator_bytes + *block_based_log::DEFAULT_EXTENT_SIZE * 4 < checkpoint_bytes {
warn!("possible leak of metadata space: {}MB available according to checkpoint but not in memory",
(checkpoint_bytes - allocator_bytes) / 1024 / 1024);
}
}

let mut checkpoint_location = self.super_phys.last_checkpoint_extent.location
+ self.super_phys.last_checkpoint_extent.size;

Expand Down Expand Up @@ -1678,7 +1710,7 @@ impl ZettaCacheState {
merging_state
.old_operation_log_phys
.clone()
.clear(self.extent_allocator.clone());
.clear(&self.extent_allocator);

// Move the "start" of the zettacache state histogram to reflect the new index
self.atime_histogram.reset_first(next_index.first_atime());
Expand Down

0 comments on commit f19d419

Please sign in to comment.