From 50830f9dd169cc508366c2910faeb1c12c6f20f6 Mon Sep 17 00:00:00 2001 From: Dimitri Bouniol Date: Mon, 2 Sep 2024 14:59:09 -0700 Subject: [PATCH 1/5] Added retention policy type for snapshots --- .../Snapshot/SnapshotRetentionPolicy.swift | 294 ++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotRetentionPolicy.swift diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotRetentionPolicy.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotRetentionPolicy.swift new file mode 100644 index 0000000..60cbd71 --- /dev/null +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotRetentionPolicy.swift @@ -0,0 +1,294 @@ +import Foundation + +/// A retention policy describing which snapshot iterations should be kept around on disk. +/// +/// Every write is made as a part of a top-level transaction that gets recorded atomically to disk as a snapshot iteration. These iterations can domtain edits to one or more datastores, and represent a complete view of all data at any one moment in time. Keeping iterations around allows you to rewind the datastores in a consistent and non-breaking way, though they take up disk space for all pages that are no longer current, ie. those containing deletions or older versions of records persisted to disk. +/// +/// A retention policy allows the disk persistence to automatically clean up these older iterations according to the policy you need for your app. The retention policy is only enforced when a write transaction completes, though the persistence may defer cleanup until later if write volumes are high. +public struct SnapshotRetentionPolicy: Sendable { + /// Internal predicate that tests if an iteration should be pruned. + /// + /// - Parameter iteration: The iteration to check. + /// - Parameter distance: How far the iteration is from the current root. The current root is `0` away from itself, while the next oldest iteration has a distance of `1`. + /// - Returns: `true` if the iteration, all its ancestors, and all it's other decedents should be pruned, `false` if the next iteration should be checked. + typealias PrunePredicate = @Sendable (_ iteration: SnapshotIteration, _ distance: Int) -> Bool + + /// Internal marker indicating if the retention policy refers to the ``none`` policy. + let isNone: Bool + + /// Internal marker indicating if the retention policy refers to the ``indefinite`` policy. + let isIndefinite: Bool + + /// Internal predicate that tests if an iteration should be pruned. + /// + /// - Parameter iteration: The iteration to check. + /// - Parameter distance: How far the iteration is from the current root. The current root is `0` away from itself, while the next oldest iteration has a distance of `1`. + /// - Returns: `true` if the iteration, all its ancestors, and all it's other decedents should be pruned, `false` if the next iteration should be checked. + let shouldPrune: PrunePredicate + + /// Internal initializer for creating a retention policy from flags and a predicate. + /// - Parameters: + /// - isNone: Wether this represents a ``none`` policy. + /// - isIndefinite: Wether this represents an ``indefinite`` policy. + /// - shouldPrune: The predicate to use when testing retention. + init( + isNone: Bool = false, + isIndefinite: Bool = false, + shouldPrune: @escaping PrunePredicate + ) { + self.isNone = isNone + self.isIndefinite = isIndefinite + self.shouldPrune = shouldPrune + } + + /// A retention policy that only the most recent iteration should be kept around on disk, and all other iterations should be discarded. + /// + /// - Note: It will not be possible to rewind the datastore to a previous state using this policy, and other processes won't be able to read from a read-only datastore while the main one is writing to it. + public static let none = SnapshotRetentionPolicy(isNone: true) { _, _ in true } + + /// A retention policy that includes all iterations. + /// + /// - Note: This policy may incur a large amount of disc usage, especially on datastores with many writes. + public static let indefinite = SnapshotRetentionPolicy(isIndefinite: true) { _, _ in false } + + /// A retention policy that retains the specified number of transactions, including the most recent transaction. + /// + /// To retain only the most recent transaction, specify a count of `0`. To retain the last 10 transactions, in addition to the current one (leaving up to 11 on disk at once), specify a count of `10`. Specifying a negative number will assert at runtime if assertions are enabled. + /// + /// This is a useful way to ensure a minimum number of transactions will always be accessible on disk at once for other processes to read, though the exact number an app will need will depend on how often write transactions occur, and how much disk space each write transaction occupies. + /// + /// - Parameter count: The number of additional transactions to retain. + /// - Returns: A policy retaining at most `count` additional transactions. + public static func transactionCount(_ count: Int) -> Self { + assert(count >= 0, "Transaction count must be larger or equal to 0") + return SnapshotRetentionPolicy { _, distance in distance > count} + } + + /// A retention policy that retains transactions younger than a specified duration. + /// + /// A retention cutoff is calculated right at the moment the last write transaction takes place, subtracting the specified `timeInterval` from this moment in time. Note that this policy is sensitive to time changes on the host, as previous transactions record their creation date in a runtime agnostic way that relies on an absolute date and time. + /// + /// - Note: This policy may be more stable than ``transactionCount(_:)``, but may incur a non-constant amount of additional disk space depending on write volume. + /// - Parameter timeInterval: The time interval in seconds to indicate an acceptable retention window. + /// - Returns: A policy retaining transactions as old as the specified `timeInterval`. + public static func duration(_ timeInterval: TimeInterval) -> Self { + SnapshotRetentionPolicy { iteration, _ in iteration.creationDate < Date(timeIntervalSinceNow: -timeInterval)} + } + + /// A retention policy that retains transactions younger than a specified duration. + /// + /// A retention cutoff is calculated right at the moment the last write transaction takes place, subtracting the specified `duration` from this moment in time. Note that this policy is sensitive to time changes on the host, as previous transactions record their creation date in a runtime agnostic way that relies on an absolute date and time. + /// + /// - Note: This policy may be more stable than ``transactionCount(_:)``, but may incur a non-constant amount of additional disk space depending on write volume. + /// - Parameter duration: The duration to indicate an acceptable retention window. + /// - Returns: A policy retaining transactions as old as the specified `duration`. + @_disfavoredOverload + @available(macOS 13.0, *) + public static func duration(_ duration: Duration) -> Self { + .duration(TimeInterval(duration.components.seconds)) + } + + /// A retention policy that retains transactions younger than a specified duration. + /// + /// A retention cutoff is calculated right at the moment the last write transaction takes place, subtracting the specified `duration` from this moment in time. Note that this policy is sensitive to time changes on the host, as previous transactions record their creation date in a runtime agnostic way that relies on an absolute date and time. + /// + /// - Note: This policy may be more stable than ``transactionCount(_:)``, but may incur a non-constant amount of additional disk space depending on write volume. + /// - Parameter duration: The duration in seconds to indicate an acceptable retention window. + /// - Returns: A policy retaining transactions as old as the specified `duration`. + public static func duration(_ duration: RetentionDuration) -> Self { + .duration(TimeInterval(duration.timeInterval)) + } + + /// A retention policy ensuring both specified policies are enforced before pruning a snapshot. + /// + /// This policy is useful to indicate that at least the specified number of transactions should be kept around, for at least a specified amount of time: + /// + /// persistence.retentionPolicy = .both(.transactionCount(10), and: .duration(.days(2))) + /// + /// As a result, this policy errs on the side of keeping transactions around when compared with ``either(_:or:)``. + /// + /// - Parameters: + /// - lhs: A policy to evaluate. + /// - rhs: Another policy to evaluate. + /// - Returns: A policy that ensures both `lhs` and `rhs` allow a transaction to be pruned before actually pruning it. + public static func both(_ lhs: SnapshotRetentionPolicy, and rhs: SnapshotRetentionPolicy) -> Self { + guard !lhs.isIndefinite, !rhs.isIndefinite else { return .indefinite } + if lhs.isNone { return rhs } + if rhs.isNone { return lhs } + return SnapshotRetentionPolicy { lhs.shouldIterationBePruned(iteration: $0, distance: $1) && rhs.shouldIterationBePruned(iteration: $0, distance: $1)} + } + + /// A retention policy ensuring either specified policies are enforced before pruning a snapshot. + /// + /// This policy is useful to indicate that at most the specified number of transactions should be kept around, for at most a specified amount of time: + /// + /// persistence.retentionPolicy = .either(.transactionCount(10), or: .duration(.days(2))) + /// + /// As a result, this policy errs on the side of removing transactions when compared with ``both(_:and:)``. + /// + /// - Parameters: + /// - lhs: A policy to evaluate. + /// - rhs: Another policy to evaluate. + /// - Returns: A policy that ensures either `lhs` or `rhs` allow a transaction to be pruned before actually pruning it. + public static func either(_ lhs: SnapshotRetentionPolicy, or rhs: SnapshotRetentionPolicy) -> Self { + guard !lhs.isNone, !rhs.isNone else { return .none } + if lhs.isIndefinite { return rhs } + if rhs.isIndefinite { return lhs } + return SnapshotRetentionPolicy { lhs.shouldIterationBePruned(iteration: $0, distance: $1) || rhs.shouldIterationBePruned(iteration: $0, distance: $1)} + } + + /// Internal method to check if an iteration should be pruned and removed from disk. + /// + /// - Parameter iteration: The iteration to check. + /// - Parameter distance: How far the iteration is from the current root. The current root is `0` away from itself, while the next oldest iteration has a distance of `1`. + /// - Returns: `true` if the iteration, all its ancestors, and all it's other decedents should be pruned, `false` if the next iteration should be checked. + func shouldIterationBePruned(iteration: SnapshotIteration, distance: Int) -> Bool { + shouldPrune(iteration, distance) + } +} + +/// The duration in time snapshot iterations should be retained for. +public struct RetentionDuration: Hashable, Sendable { + /// Internal representation of a retention duration. + @usableFromInline + var timeInterval: TimeInterval + + /// Internal initializer for creating a retention duration from a time interval. + @usableFromInline + init(timeInterval: TimeInterval) { + self.timeInterval = timeInterval + } + + /// A retention duration in seconds. + @inlinable + public static func seconds(_ seconds: Int) -> Self { + RetentionDuration(timeInterval: TimeInterval(seconds)) + } + + /// A retention duration in seconds. + @inlinable + public static func seconds(_ seconds: Float) -> Self { + RetentionDuration(timeInterval: TimeInterval(seconds)) + } + + /// A retention duration in minutes. + /// + /// - Warning: This duration does not take into account timezones or calendar dates, and strictly represents a duration of time. It therefore makes no guarantees to line up with minutes when leap seconds are applied. + @inlinable + public static func minutes(_ minutes: Int) -> Self { + RetentionDuration(timeInterval: TimeInterval(minutes)*60) + } + + /// A retention duration in minutes. + /// + /// - Warning: This duration does not take into account timezones or calendar dates, and strictly represents a duration of time. It therefore makes no guarantees to line up with minutes when leap seconds are applied. + @inlinable + public static func minutes(_ minutes: Float) -> Self { + RetentionDuration(timeInterval: TimeInterval(minutes)*60) + } + + /// A retention duration in hours. + /// + /// - Warning: This duration does not take into account timezones or calendar dates, and strictly represents a duration of time. It therefore makes no guarantees to line up with hours on a calendar across events like seasonal time changes dependent on timezone. + @inlinable + public static func hours(_ hours: Int) -> Self { + RetentionDuration(timeInterval: TimeInterval(hours)*60*60) + } + + /// A retention duration in hours. + /// + /// - Warning: This duration does not take into account timezones or calendar dates, and strictly represents a duration of time. It therefore makes no guarantees to line up with hours on a calendar across events like seasonal time changes dependent on timezone. + @inlinable + public static func hours(_ hours: Float) -> Self { + RetentionDuration(timeInterval: TimeInterval(hours)*60*60) + } + + /// A retention duration in 24 hour days. + /// + /// - Warning: This duration does not take into account timezones or calendar dates, and strictly represents a duration of time. It therefore makes no guarantees to line up with days on a calendar across events like seasonal time changes dependent on timezone. + @inlinable + public static func days(_ days: Int) -> Self { + RetentionDuration(timeInterval: TimeInterval(days)*60*60*24) + } + + /// A retention duration in 24 hour days. + /// + /// - Warning: This duration does not take into account timezones or calendar dates, and strictly represents a duration of time. It therefore makes no guarantees to line up with days on a calendar across events like seasonal time changes dependent on timezone. + @inlinable + public static func days(_ days: Float) -> Self { + RetentionDuration(timeInterval: TimeInterval(days)*60*60*24) + } + + /// A retention duration in weeks, defined as seven 24 hour days. + /// + /// - Warning: This duration does not take into account timezones or calendar dates, and strictly represents a duration of time. It therefore makes no guarantees to line up with days on a calendar across events like seasonal time changes dependent on timezone. + @inlinable + public static func weeks(_ weeks: Int) -> Self { + RetentionDuration(timeInterval: TimeInterval(weeks)*60*60*24*7) + } + + /// A retention duration in weeks, defined as seven 24 hour days. + /// + /// - Warning: This duration does not take into account timezones or calendar dates, and strictly represents a duration of time. It therefore makes no guarantees to line up with days on a calendar across events like seasonal time changes dependent on timezone. + @inlinable + public static func weeks(_ weeks: Float) -> Self { + RetentionDuration(timeInterval: TimeInterval(weeks)*60*60*24*7) + } + + /// A retention duration in months, defined as thirty 24 hour days. + /// + /// - Warning: This duration does not take into account timezones or calendar dates, and strictly represents a duration of time. It therefore makes no guarantees to line up with days or even months on a calendar across events like seasonal time changes dependent on timezone, different length months, or leap days. + @inlinable + public static func months(_ months: Int) -> Self { + RetentionDuration(timeInterval: TimeInterval(months)*60*60*24*30) + } + + /// A retention duration in months, defined as thirty 24 hour days. + /// + /// - Warning: This duration does not take into account timezones or calendar dates, and strictly represents a duration of time. It therefore makes no guarantees to line up with days or even months on a calendar across events like seasonal time changes dependent on timezone, different length months, or leap days. + @inlinable + public static func months(_ months: Float) -> Self { + RetentionDuration(timeInterval: TimeInterval(months)*60*60*24*30) + } +} + +extension RetentionDuration: Comparable { + @inlinable + public static func < (lhs: Self, rhs: Self) -> Bool { + lhs.timeInterval < rhs.timeInterval + } +} + +extension RetentionDuration: AdditiveArithmetic { + public static let zero = RetentionDuration(timeInterval: 0) + + @inlinable + public prefix static func + (rhs: Self) -> Self { + rhs + } + + @inlinable + public prefix static func - (rhs: Self) -> Self { + RetentionDuration(timeInterval: -rhs.timeInterval) + } + + @inlinable + public static func + (lhs: Self, rhs: Self) -> Self { + RetentionDuration(timeInterval: lhs.timeInterval + rhs.timeInterval) + } + + @inlinable + public static func += (lhs: inout Self, rhs: Self) { + lhs.timeInterval += rhs.timeInterval + } + + @inlinable + public static func - (lhs: Self, rhs: Self) -> Self { + RetentionDuration(timeInterval: lhs.timeInterval - rhs.timeInterval) + } + + @inlinable + public static func -= (lhs: inout Self, rhs: Self) { + lhs.timeInterval -= rhs.timeInterval + } +} From f1a6a91b320460878fdfb65a5c14afbf37cec765 Mon Sep 17 00:00:00 2001 From: Dimitri Bouniol Date: Wed, 9 Oct 2024 06:52:47 -0700 Subject: [PATCH 2/5] Improved datastore root and snapshot iteration caching --- .../Datastore/PersistenceDatastore.swift | 6 +- .../Disk Persistence/DiskPersistence.swift | 2 +- .../Disk Persistence/Snapshot/Snapshot.swift | 56 ++++++++++--------- .../Snapshot/SnapshotIteration.swift | 6 ++ 4 files changed, 40 insertions(+), 30 deletions(-) diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/PersistenceDatastore.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/PersistenceDatastore.swift index f1ac416..7d754ed 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/PersistenceDatastore.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/PersistenceDatastore.swift @@ -217,14 +217,16 @@ extension DiskPersistence.Datastore { extension DiskPersistence.Datastore { /// Load the root object from disk for the given identifier. - func loadRootObject(for rootIdentifier: DatastoreRootIdentifier) throws -> DatastoreRootManifest { + func loadRootObject(for rootIdentifier: DatastoreRootIdentifier, shouldCache: Bool = true) throws -> DatastoreRootManifest { let rootObjectURL = rootURL(for: rootIdentifier) let data = try Data(contentsOf: rootObjectURL) let root = try JSONDecoder.shared.decode(DatastoreRootManifest.self, from: data) - cachedRootObject = root + if shouldCache { + cachedRootObject = root + } return root } diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/DiskPersistence.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/DiskPersistence.swift index ba11304..c789d82 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/DiskPersistence.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/DiskPersistence.swift @@ -291,7 +291,7 @@ extension DiskPersistence { return snapshot } - let snapshot = Snapshot(id: snapshotID, persistence: self) + let snapshot = Snapshot(id: snapshotID, persistence: self, isExtendedIterationCacheEnabled: !_transactionRetentionPolicy.isIndefinite) snapshots[snapshotID] = snapshot return snapshot diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/Snapshot.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/Snapshot.swift index 15e15dd..0b9ab92 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/Snapshot.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/Snapshot.swift @@ -30,8 +30,9 @@ actor Snapshot { /// A cached instance of the manifest as last loaded from disk. var cachedManifest: SnapshotManifest? - /// A cached instance of the current iteration as last loaded from disk. - var cachedIteration: SnapshotIteration? + /// Cache for the loaded iterations as last loaded from disk. ``isExtendedIterationCacheEnabled`` controls if multiple iterations are cached or not. + var cachedIterations: [SnapshotIterationIdentifier : SnapshotIteration] = [:] + var isExtendedIterationCacheEnabled: Bool /// A pointer to the last manifest updater, so updates can be serialized after the last request. var lastUpdateManifestTask: Task? @@ -42,11 +43,13 @@ actor Snapshot { init( id: SnapshotIdentifier, persistence: DiskPersistence, - isBackup: Bool = false + isBackup: Bool = false, + isExtendedIterationCacheEnabled: Bool = false ) { self.id = id self.persistence = persistence self.isBackup = isBackup + self.isExtendedIterationCacheEnabled = isExtendedIterationCacheEnabled } } @@ -124,14 +127,25 @@ extension Snapshot { } } + func setExtendedIterationCacheEnabled(_ isEnabled: Bool) { + isExtendedIterationCacheEnabled = isEnabled + } + /// Load an iteration from disk, or create a suitable starting value if such a file does not exist. - private func loadIteration(for iterationID: SnapshotIterationIdentifier) throws -> SnapshotIteration { + func loadIteration(for iterationID: SnapshotIterationIdentifier?) async throws -> SnapshotIteration? { + guard let iterationID else { return nil } + if let iteration = cachedIterations[iterationID] { + return iteration + } do { let data = try Data(contentsOf: iterationURL(for: iterationID)) let iteration = try JSONDecoder.shared.decode(SnapshotIteration.self, from: data) - cachedIteration = iteration + if !isExtendedIterationCacheEnabled { + cachedIterations.removeAll() + } + cachedIterations[iteration.id] = iteration return iteration } catch { throw error @@ -155,7 +169,7 @@ extension Snapshot { cachedManifest = manifest } - /// Write the specified iteration to the store, and cache the results in ``Snapshot/cachedIteration``. + /// Write the specified iteration to the store, and cache the results in ``Snapshot/cachedIterations``. private func write(iteration: SnapshotIteration) throws where AccessMode == ReadWrite { let iterationURL = iterationURL(for: iteration.id) /// Make sure the directories exists first. @@ -166,7 +180,10 @@ extension Snapshot { try data.write(to: iterationURL, options: .atomic) /// Update the cache since we know what it should be. - cachedIteration = iteration + if !isExtendedIterationCacheEnabled { + cachedIterations.removeAll() + } + cachedIterations[iteration.id] = iteration } /// Load and update the manifest in an updater, returning the task for the updater. @@ -200,15 +217,8 @@ extension Snapshot { /// Load the manifest so we have a fresh copy, unless we have a cached copy already. var manifest = try cachedManifest ?? self.loadManifest() - var iteration: SnapshotIteration - if let cachedIteration, cachedIteration.id == manifest.currentIteration { - iteration = cachedIteration - } else if let iterationID = manifest.currentIteration { - iteration = try self.loadIteration(for: iterationID) - } else { - let date = Date() - iteration = SnapshotIteration(id: SnapshotIterationIdentifier(date: date), creationDate: date) - } + let precedingIteration = try await self.loadIteration(for: manifest.currentIteration) + var iteration = precedingIteration ?? SnapshotIteration() /// Let the updater do something with the manifest, storing the variable on the Task Local stack. let returnValue = try await SnapshotTaskLocals.$manifest.withValue((manifest, iteration)) { @@ -216,10 +226,10 @@ extension Snapshot { } /// Only write to the store if we changed the manifest for any reason - if iteration.isMeaningfullyChanged(from: cachedIteration) { + if iteration.isMeaningfullyChanged(from: precedingIteration) { iteration.creationDate = Date() iteration.id = SnapshotIterationIdentifier(date: iteration.creationDate) - iteration.precedingIteration = cachedIteration?.id + iteration.precedingIteration = precedingIteration?.id try write(iteration: iteration) } @@ -260,15 +270,7 @@ extension Snapshot { /// Load the manifest so we have a fresh copy, unless we have a cached copy already. let manifest = try cachedManifest ?? self.loadManifest() - var iteration: SnapshotIteration - if let cachedIteration, cachedIteration.id == manifest.currentIteration { - iteration = cachedIteration - } else if let iterationID = manifest.currentIteration { - iteration = try self.loadIteration(for: iterationID) - } else { - let date = Date() - iteration = SnapshotIteration(id: SnapshotIterationIdentifier(date: date), creationDate: date) - } + let iteration = try await self.loadIteration(for: manifest.currentIteration) ?? SnapshotIteration() /// Let the accessor do something with the manifest, storing the variable on the Task Local stack. return try await SnapshotTaskLocals.$manifest.withValue((manifest, iteration)) { diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotIteration.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotIteration.swift index 4feb622..a3f8b16 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotIteration.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotIteration.swift @@ -68,6 +68,12 @@ extension SnapshotIteration { } extension SnapshotIteration { + /// Initialize a snapshot iteration with a date + /// - Parameter date: The date to base the identifier and creation date off of. + init(date: Date = Date()) { + self.init(id: SnapshotIterationIdentifier(date: date), creationDate: date) + } + /// Internal method to check if an instance should be persisted based on iff it changed significantly from a previous iteration /// - Parameter existingInstance: The previous iteration to check /// - Returns: `true` if the iteration should be persisted, `false` if it represents the same data from `existingInstance`. From 2c259ec69a28b116de941222f3e7d795c64f3ed0 Mon Sep 17 00:00:00 2001 From: Dimitri Bouniol Date: Wed, 9 Oct 2024 06:52:50 -0700 Subject: [PATCH 3/5] Added the ability for a persistence to auto-prune snapshot iterations and assets according to the specified retention policy --- README.md | 5 +- .../Datastore/DatastoreIndexManifest.swift | 9 + .../Datastore/DatastoreRootManifest.swift | 22 ++ .../Datastore/PersistenceDatastore.swift | 103 ++++++++ .../Disk Persistence/DiskPersistence.swift | 162 +++++++++++- .../FileManager+Helpers.swift | 30 +++ .../Disk Persistence/Snapshot/Snapshot.swift | 67 +++++ .../Snapshot/SnapshotIteration.swift | 20 ++ ...skPersistenceDatastoreRetentionTests.swift | 245 ++++++++++++++++++ .../FileManagerTests.swift | 127 +++++++++ 10 files changed, 786 insertions(+), 4 deletions(-) create mode 100644 Sources/CodableDatastore/Persistence/Disk Persistence/FileManager+Helpers.swift create mode 100644 Tests/CodableDatastoreTests/DiskPersistenceDatastoreRetentionTests.swift create mode 100644 Tests/CodableDatastoreTests/FileManagerTests.swift diff --git a/README.md b/README.md index 30e6b93..c2868db 100644 --- a/README.md +++ b/README.md @@ -261,8 +261,7 @@ Note that in the example above, even though the author is persisted first, if an As this project matures towards release, the project will focus on the functionality and work listed below: - Force migration methods -- Composite indexes (via macros?) -- Cleaning up old resources on disk +- Composite indexes - Ranged deletes - Controls for the edit history - Helper types to use with SwiftUI/Observability/Combine that can make data available on the main actor and filter and stay up to date @@ -271,7 +270,7 @@ As this project matures towards release, the project will focus on the functiona - An example app - A memory persistence useful for testing apps with - A pre-configured data store tuned to storing pure Data, useful for types like Images -- Cleaning up memory leaks +- Cleaning up memory and file descriptor leaks The above list will be kept up to date during development and will likely see additions during that process. diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/DatastoreIndexManifest.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/DatastoreIndexManifest.swift index 4d45933..afd2ef3 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/DatastoreIndexManifest.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/DatastoreIndexManifest.swift @@ -86,6 +86,15 @@ extension DatastoreIndexManifest { } } +extension DatastoreIndexManifest { + func pagesToPrune(for mode: SnapshotPruneMode) -> Set { + switch mode { + case .pruneRemoved: Set(removedPageIDs) + case .pruneAdded: Set(addedPageIDs) + } + } +} + // MARK: - Decoding extension DatastoreIndexManifest { diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/DatastoreRootManifest.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/DatastoreRootManifest.swift index 4a0a799..dc78a8f 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/DatastoreRootManifest.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/DatastoreRootManifest.swift @@ -101,3 +101,25 @@ extension DatastoreRootManifest { } } } + +extension DatastoreRootManifest { + func indexesToPrune(for mode: SnapshotPruneMode) -> Set { + switch mode { + case .pruneRemoved: removedIndexes + case .pruneAdded: addedIndexes + } + } + + func indexManifestsToPrune( + for mode: SnapshotPruneMode, + options: SnapshotPruneOptions + ) -> Set { + switch (mode, options) { + case (.pruneRemoved, .pruneAndDelete): removedIndexManifests + case (.pruneAdded, .pruneAndDelete): addedIndexManifests + /// Flip the results when we aren't deleting, but only when removing from the bottom end. + case (.pruneRemoved, .pruneOnly): addedIndexManifests + case (.pruneAdded, .pruneOnly): [] + } + } +} diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/PersistenceDatastore.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/PersistenceDatastore.swift index 7d754ed..cccc883 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/PersistenceDatastore.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/PersistenceDatastore.swift @@ -156,6 +156,109 @@ extension DiskPersistence.Datastore { } } + func pruneRootObject(with identifier: RootObject.ID, mode: SnapshotPruneMode, shouldDelete: Bool) async throws { + let fileManager = FileManager() + let rootObject = try loadRootObject(for: identifier, shouldCache: false) + + /// Collect the indexes and related manifests we'll be deleting. + /// - For indexes, only collect the ones we'll be deleting since the ones we are keeping won't be making references to other deletable assets. + /// - For the manifests, we'll be deleting the entries that are being removed (relative to the direction we are removing from, so the removed ones from the oldest edge, and the added ones from the newest edge, as determined by the caller), while we'll be checking for pages to remove from entries that have just been added, but only when removing from the oldest edge. We only do this for the oldest edge because pages that have been "removed" from the newest edge are actually being _restored_ and not replaced, which maintains symmetry in a non-obvious way. + let indexesToPruneAndDelete = rootObject.indexesToPrune(for: mode) + let indexManifestsToPruneAndDelete = rootObject.indexManifestsToPrune(for: mode, options: .pruneAndDelete) + let indexManifestsToPrune = rootObject.indexManifestsToPrune(for: mode, options: .pruneOnly) + + /// Delete the index manifests and pages we know to be removed. + for indexManifestID in indexManifestsToPruneAndDelete { + let indexID = Index.ID(indexManifestID) + defer { + trackedIndexes.removeValue(forKey: indexID) + loadedIndexes.remove(indexID) + } + /// Skip any manifests for indexes being deleted, since we'll just unlink the whole directory in that case. + guard !indexesToPruneAndDelete.contains(indexID.indexID) else { continue } + + let manifestURL = manifestURL(for: indexID) + let manifest: DatastoreIndexManifest? + do { + manifest = try await DatastoreIndexManifest(contentsOf: manifestURL, id: indexID.manifestID) + } catch URLError.fileDoesNotExist, CocoaError.fileReadNoSuchFile, CocoaError.fileNoSuchFile, POSIXError.ENOENT { + manifest = nil + } catch { + print("Uncaught Manifest Error: \(error)") + throw error + } + + guard let manifest else { continue } + + /// Only delete the pages we know to be removed + let pagesToPruneAndDelete = manifest.pagesToPrune(for: mode) + for pageID in pagesToPruneAndDelete { + let indexedPageID = Page.ID(index: indexID, page: pageID) + defer { + trackedPages.removeValue(forKey: indexedPageID.withoutManifest) + loadedPages.remove(indexedPageID.withoutManifest) + } + + let pageURL = pageURL(for: indexedPageID) + + try? fileManager.removeItem(at: pageURL) + try? fileManager.removeDirectoryIfEmpty(url: pageURL.deletingLastPathComponent(), recursivelyRemoveParents: true) + } + + try? fileManager.removeItem(at: manifestURL) + } + + /// Prune the index manifests that were just added, as they themselves refer to other deleted pages. + for indexManifestID in indexManifestsToPrune { + let indexID = Index.ID(indexManifestID) + /// Skip any manifests for indexes being deleted, since we'll just unlink the whole directory in that case. + guard !indexesToPruneAndDelete.contains(indexID.indexID) else { continue } + + let manifestURL = manifestURL(for: indexID) + let manifest: DatastoreIndexManifest? + do { + manifest = try await DatastoreIndexManifest(contentsOf: manifestURL, id: indexID.manifestID) + } catch URLError.fileDoesNotExist, CocoaError.fileReadNoSuchFile, CocoaError.fileNoSuchFile, POSIXError.ENOENT { + manifest = nil + } catch { + print("Uncaught Manifest Error: \(error)") + throw error + } + + guard let manifest else { continue } + + /// Only delete the pages we know to be removed + let pagesToPruneAndDelete = manifest.pagesToPrune(for: mode) + for pageID in pagesToPruneAndDelete { + let indexedPageID = Page.ID(index: indexID, page: pageID) + defer { + trackedPages.removeValue(forKey: indexedPageID.withoutManifest) + loadedPages.remove(indexedPageID.withoutManifest) + } + + let pageURL = pageURL(for: indexedPageID) + + try? fileManager.removeItem(at: pageURL) + try? fileManager.removeDirectoryIfEmpty(url: pageURL.deletingLastPathComponent(), recursivelyRemoveParents: true) + } + } + + /// Delete any indexes in their entirety. + for indexID in indexesToPruneAndDelete { + try? fileManager.removeItem(at: indexURL(for: indexID)) + } + + /// If we are deleting the root object itself, do so at the very end as everything else would have been cleaned up. + if shouldDelete { + trackedRootObjects.removeValue(forKey: identifier) + loadedRootObjects.remove(identifier) + + let rootURL = rootURL(for: rootObject.id) + try? fileManager.removeItem(at: rootURL) + try? fileManager.removeDirectoryIfEmpty(url: rootURL.deletingLastPathComponent(), recursivelyRemoveParents: true) + } + } + func index(for identifier: Index.ID) -> Index { if let index = trackedIndexes[identifier]?.value { return index diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/DiskPersistence.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/DiskPersistence.swift index c789d82..cc36554 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/DiskPersistence.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/DiskPersistence.swift @@ -29,6 +29,11 @@ public actor DiskPersistence: Persistence { var lastTransaction: Transaction? + var _transactionRetentionPolicy: SnapshotRetentionPolicy = .indefinite + + var nextSnapshotIterationCandidateToEnforce: (snapshot: Snapshot, iteration: SnapshotIteration)? + var snapshotIterationPruningTask: Task? + /// Shared caches across all snapshots and datastores. var rollingRootObjectCacheIndex = 0 var rollingRootObjectCache: [Datastore.RootObject] = [] @@ -59,6 +64,10 @@ public actor DiskPersistence: Persistence { storeURL = readOnlyURL } + deinit { + snapshotIterationPruningTask?.cancel() + } + /// The default URL to use for disk persistences. static var defaultURL: URL { // TODO: Make non-throwing: https://github.com/mochidev/CodableDatastore/issues/15 @@ -517,7 +526,7 @@ extension DiskPersistence { else { throw DiskPersistenceError.cannotWrite } /// If we are read-write, apply the updated root objects to the snapshot. - try await self.updatingCurrentSnapshot { snapshot in + let (currentSnapshot, persistedIteration) = try await self.updatingCurrentSnapshot { snapshot in try await snapshot.updatingManifest { manifest, iteration in iteration.actionName = actionName iteration.addedDatastoreRoots = addedDatastoreRoots @@ -529,8 +538,159 @@ extension DiskPersistence { root: root.id ) } + return (snapshot, iteration) + } + } + + enforceRetentionPolicy(snapshot: currentSnapshot, fromIteration: persistedIteration) + } +} + +// MARK: - Retention Policy + +extension DiskPersistence where AccessMode == ReadWrite { + /// The current transaction retention policy for snapshot iterations written to disk. + public var transactionRetentionPolicy: SnapshotRetentionPolicy { + get async { + _transactionRetentionPolicy + } + } + + /// Update the transaction retention policy for snapshot iterations written to disk. + /// + /// - Parameter policy: The new policy to enforce on write. + /// + /// - SeeAlso: ``SnapshotRetentionPolicy``. + public func setTransactionRetentionPolicy(_ policy: SnapshotRetentionPolicy) async { + _transactionRetentionPolicy = policy + for (_, snapshot) in snapshots { + await snapshot.setExtendedIterationCacheEnabled(!_transactionRetentionPolicy.isIndefinite) + } + } + + /// Enforce the retention policy on the persistence immediately. + /// + /// - Note: Transaction retention policies are enforced after ever write transaction, so calling this method directly is often unecessary. However, it can be useful if the user requires disk resources immediately. + public func enforceRetentionPolicy() async { + // TODO: Don't create any snapshots if they don't exist yet + let info = try? await self.readingCurrentSnapshot { snapshot in + try await snapshot.readingManifest { manifest, iteration in + (snapshot: snapshot, iteration: iteration) + } + } + + if let (snapshot, iteration) = info { + enforceRetentionPolicy(snapshot: snapshot, fromIteration: iteration) + } + + await finishTransactionCleanup() + } +} + +extension DiskPersistence { + /// Internal method to envorce the retention policy after a transaction is written. + private func enforceRetentionPolicy(snapshot: Snapshot, fromIteration iteration: SnapshotIteration) { + nextSnapshotIterationCandidateToEnforce = (snapshot, iteration) + + if let snapshotIterationPruningTask { + /// Update the next snapshot iteration we should be checking, and cancel the existing task so we can move on to checking this iteration. + snapshotIterationPruningTask.cancel() + return + } + + /// Update the next snapshot iteration we should be checking, and enqueue a task since we know one isn't currently running. + checkNextSnapshotIterationCandidateForPruning() + } + + /// Private method to check the next candidate for pruning. + /// + /// First, this method walks down the linked list defining the iteration chain, from newest to oldest, and collects the iterations that should be pruned. Then, it iterates that list in reverse (from oldest to newest) actually removing the iterations as they are encountered. + /// - Note: This method should only ever be called when it is known that no `snapshotIterationPruningTask` is ongoing (it is nil), or when one just finishes. + @discardableResult + private func checkNextSnapshotIterationCandidateForPruning() -> Task? { + let transactionRetentionPolicy = _transactionRetentionPolicy + let iterationCandidate = nextSnapshotIterationCandidateToEnforce + + snapshotIterationPruningTask = nil + nextSnapshotIterationCandidateToEnforce = nil + + guard let (snapshot, iteration) = iterationCandidate, !transactionRetentionPolicy.isIndefinite + else { return nil } + + snapshotIterationPruningTask = Task.detached(priority: .background) { + await snapshot.setExtendedIterationCacheEnabled(true) + do { + var iterations: [SnapshotIteration] = [] + var distance = 1 + var mainlineSuccessorIteration = iteration + var currentIteration = iteration + + /// First, walk the preceding iteration chain to the oldest iteration we can open, collecting the ones that should be pruned. + while let precedingIterationID = currentIteration.precedingIteration, let precedingIteration = try? await snapshot.loadIteration(for: precedingIterationID) { + try Task.checkCancellation() + + if !iterations.isEmpty || transactionRetentionPolicy.shouldIterationBePruned(iteration: precedingIteration, distance: distance) { + iterations.append(precedingIteration) + } else { + mainlineSuccessorIteration = precedingIteration + } + currentIteration = precedingIteration + + distance += 1 + await Task.yield() + } + + /// Prune iterations from oldest to newest. + for (index, iteration) in iterations.enumerated().reversed() { + let mainlineSuccessorIteration = index > 0 ? iterations[index-1] : mainlineSuccessorIteration + + var iterationsToPrune: [SnapshotIteration] = [] + var successorCandidatesToCheck = iteration.successiveIterations + successorCandidatesToCheck.removeAll { $0 == mainlineSuccessorIteration.id } + + /// Walk the successor candidates all the way back up so newer iterations are pruned before the ones that reference them. We pull items off from the end, and add new ones to the beginning to make sure they stay in graph order. + while let successorCandidateID = successorCandidatesToCheck.popLast() { + try Task.checkCancellation() + guard let successorIteration = try? await snapshot.loadIteration(for: successorCandidateID) + else { continue } + + iterationsToPrune.append(successorIteration) + successorCandidatesToCheck.insert(contentsOf: successorIteration.successiveIterations, at: 0) + await Task.yield() + } + + /// First, remove the branch of iterations based on the one we are removing, but representing a history that was previously reverted. + /// Prune the iterations in atomic tasks so they don't get cancelled mid-way, and instead check for cancellation in between iterations. + for iteration in iterationsToPrune.reversed() { + try Task.checkCancellation() + try await Task { try await snapshot.pruneIteration(iteration, mode: .pruneAdded, shouldDelete: true) }.value + await Task.yield() + } + + /// Finally, prune the iteration itself. + try Task.checkCancellation() + try await Task { try await snapshot.pruneIteration(iteration, mode: .pruneRemoved, shouldDelete: true) }.value + await Task.yield() + } + + try Task.checkCancellation() + try await Task { try await snapshot.pruneIteration(mainlineSuccessorIteration, mode: .pruneRemoved, shouldDelete: false) }.value + await Task.yield() + } catch { + print("Pruning stopped: \(error)") } + + await self.checkNextSnapshotIterationCandidateForPruning()?.value } + + return snapshotIterationPruningTask + } + + /// Await any cleanup since the last complete write transaction to the persistence. + /// + /// - Note: An application is not required to await cleanup, as it'll be eventually completed on future runs. It is however useful in cases when disk resources must be cleared before progressing to another step. + public func finishTransactionCleanup() async { + await snapshotIterationPruningTask?.value } } diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/FileManager+Helpers.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/FileManager+Helpers.swift new file mode 100644 index 0000000..6906221 --- /dev/null +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/FileManager+Helpers.swift @@ -0,0 +1,30 @@ +// +// FileManager+Helpers.swift +// CodableDatastore +// +// Created by Dimitri Bouniol on 2024-09-08. +// Copyright © 2023-24 Mochi Development, Inc. All rights reserved. +// + +import Foundation + +enum DirectoryRemovalError: Error { + case missingEnumerator +} + +extension FileManager { + func removeDirectoryIfEmpty(url: URL, recursivelyRemoveParents: Bool) throws { + guard let enumerator = self.enumerator(at: url, includingPropertiesForKeys: [], options: [.skipsHiddenFiles, .skipsSubdirectoryDescendants, .skipsPackageDescendants, .includesDirectoriesPostOrder]) + else { throw DirectoryRemovalError.missingEnumerator } + + for case _ as URL in enumerator { + /// If this is called a single time, then we don't have an empty directory, and can stop + return + } + + try self.removeItem(at: url) + + guard recursivelyRemoveParents else { return } + try self.removeDirectoryIfEmpty(url: url.deletingLastPathComponent(), recursivelyRemoveParents: recursivelyRemoveParents) + } +} diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/Snapshot.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/Snapshot.swift index 0b9ab92..ccd3aa6 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/Snapshot.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/Snapshot.swift @@ -152,6 +152,63 @@ extension Snapshot { } } + func pruneIteration(_ iteration: SnapshotIteration, mode: SnapshotPruneMode, shouldDelete: Bool) async throws { + /// Collect the datastores and related roots we'll be deleting. + /// - For datastores, only collect the ones we'll be deleting since the ones we are keeping won't be making references to other deletable assets. + /// - For the datastore roots, we'll be deleting the entries that are being removed (relative to the direction we are removing from, so the removed ones from the oldest edge, and the added ones from the newest edge, as determined by the caller), while we'll be checking for more assets to remove from entries that have just been added, but only when removing from the oldest edge. We only do this for the oldest edge because entries that have been "removed" from the newest edge are actually being _restored_ and not replaced, which maintains symmetry in a non-obvious way. + let datastoresToPruneAndDelete = iteration.datastoresToPrune(for: mode) + var datastoreRootsToPruneAndDelete = iteration.datastoreRootsToPrune(for: mode, options: .pruneAndDelete) + var datastoreRootsToPrune = iteration.datastoreRootsToPrune(for: mode, options: .pruneOnly) + + let fileManager = FileManager() + + /// Start by deleting and pruning roots as needed. + if !datastoreRootsToPruneAndDelete.isEmpty || !datastoreRootsToPrune.isEmpty { + for (_, datastoreInfo) in iteration.dataStores { + /// Skip any roots for datastores being deleted, since we'll just unlink the whole directory in that case. + guard !datastoresToPruneAndDelete.contains(datastoreInfo.id) else { continue } + + let datastore = datastores[datastoreInfo.id] ?? DiskPersistence.Datastore(id: datastoreInfo.id, snapshot: self) + + /// Delete the root entries we know to be removed. + for datastoreRoot in datastoreRootsToPruneAndDelete { + // TODO: Clean this up by also storing the datastore ID in with the root ID… + do { + try await datastore.pruneRootObject(with: datastoreRoot, mode: mode, shouldDelete: true) + datastoreRootsToPruneAndDelete.remove(datastoreRoot) + } catch { + /// This datastore did not contain the specified root, skip it for now. + } + } + + /// Prune the root entries that were just added, as they themselves refer to other deleted assets. + for datastoreRoot in datastoreRootsToPrune { + // TODO: Clean this up by also storing the datastore ID in with the root ID… + do { + try await datastore.pruneRootObject(with: datastoreRoot, mode: mode, shouldDelete: false) + datastoreRootsToPrune.remove(datastoreRoot) + } catch { + /// This datastore did not contain the specified root, skip it for now. + } + } + } + } + + /// Delete any datastores in their entirety. + for datastoreID in datastoresToPruneAndDelete { + try? fileManager.removeItem(at: datastoreURL(for: datastoreID)) + } + + /// If we are deleting the instance itself, do so at the very end as everything else would have been cleaned up. + if shouldDelete { + cachedIterations.removeValue(forKey: iteration.id) + + let iterationURL = iterationURL(for: iteration.id) + try? fileManager.removeItem(at: iterationURL) + try? fileManager.removeDirectoryIfEmpty(url: iterationURL.deletingLastPathComponent(), recursivelyRemoveParents: true) + } + } + /// Write the specified manifest to the store, and cache the results in ``Snapshot/cachedManifest``. private func write(manifest: SnapshotManifest) throws where AccessMode == ReadWrite { /// Make sure the directories exists first. @@ -319,6 +376,16 @@ private enum SnapshotTaskLocals { static var manifest: (SnapshotManifest, SnapshotIteration)? } +enum SnapshotPruneMode { + case pruneRemoved + case pruneAdded +} + +enum SnapshotPruneOptions { + case pruneAndDelete + case pruneOnly +} + // MARK: - Datastore Management extension Snapshot { /// Load the datastore for the given key. diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotIteration.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotIteration.swift index a3f8b16..39df7a3 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotIteration.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotIteration.swift @@ -83,4 +83,24 @@ extension SnapshotIteration { else { return true } return false } + + func datastoresToPrune(for mode: SnapshotPruneMode) -> Set { + switch mode { + case .pruneRemoved: removedDatastores + case .pruneAdded: addedDatastores + } + } + + func datastoreRootsToPrune( + for mode: SnapshotPruneMode, + options: SnapshotPruneOptions + ) -> Set { + switch (mode, options) { + case (.pruneRemoved, .pruneAndDelete): removedDatastoreRoots + case (.pruneAdded, .pruneAndDelete): addedDatastoreRoots + /// Flip the results when we aren't deleting, but only when removing from the bottom end. + case (.pruneRemoved, .pruneOnly): addedDatastoreRoots + case (.pruneAdded, .pruneOnly): [] + } + } } diff --git a/Tests/CodableDatastoreTests/DiskPersistenceDatastoreRetentionTests.swift b/Tests/CodableDatastoreTests/DiskPersistenceDatastoreRetentionTests.swift new file mode 100644 index 0000000..5572d8c --- /dev/null +++ b/Tests/CodableDatastoreTests/DiskPersistenceDatastoreRetentionTests.swift @@ -0,0 +1,245 @@ +// +// DiskPersistenceDatastoreRetentionTests.swift +// CodableDatastore +// +// Created by Dimitri Bouniol on 2024-09-09. +// Copyright © 2023-24 Mochi Development, Inc. All rights reserved. +// + +#if !canImport(Darwin) +@preconcurrency import Foundation +#endif +import XCTest +@testable import CodableDatastore + +final class DiskPersistenceDatastoreRetentionTests: XCTestCase, @unchecked Sendable { + var temporaryStoreURL: URL = FileManager.default.temporaryDirectory + + override func setUp() async throws { + temporaryStoreURL = FileManager.default.temporaryDirectory.appendingPathComponent(ProcessInfo.processInfo.globallyUniqueString, isDirectory: true); + } + + override func tearDown() async throws { + try? FileManager.default.removeItem(at: temporaryStoreURL) + } + + func testTransactionCountPrunedDatastoreStillReadable() async throws { + struct TestFormat: DatastoreFormat { + enum Version: Int, CaseIterable { + case zero + } + + struct Instance: Codable, Identifiable { + var id: String + var value: String + var index: Int + var bucket: Int + } + + static let defaultKey: DatastoreKey = "test" + static let currentVersion = Version.zero + + let index = OneToOneIndex(\.index) + @Direct var bucket = Index(\.bucket) + } + + let max = 1000 + + do { + let persistence = try DiskPersistence(readWriteURL: temporaryStoreURL) + + let datastore = Datastore.JSONStore( + persistence: persistence, + format: TestFormat.self, + migrations: [ + .zero: { data, decoder in + try decoder.decode(TestFormat.Instance.self, from: data) + } + ] + ) + + await persistence.setTransactionRetentionPolicy(.transactionCount(0)) + try await persistence.createPersistenceIfNecessary() + + for index in 0.. Date: Fri, 11 Oct 2024 06:27:24 -0700 Subject: [PATCH 4/5] Updated edit history to allow for faster retention pruning Closes #230 --- .../Datastore/DatastoreRoot.swift | 26 ++++++++++ .../Disk Persistence/DiskPersistence.swift | 4 +- .../Disk Persistence/Snapshot/Snapshot.swift | 47 ++++++++++++++++--- .../Snapshot/SnapshotIteration.swift | 6 +-- .../Transaction/Transaction.swift | 4 +- 5 files changed, 73 insertions(+), 14 deletions(-) diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/DatastoreRoot.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/DatastoreRoot.swift index 76f0cc8..46b4505 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/DatastoreRoot.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Datastore/DatastoreRoot.swift @@ -10,12 +10,38 @@ import Foundation typealias DatastoreRootIdentifier = DatedIdentifier.Datastore.RootObject> +struct DatastoreRootReference: Codable, Hashable { + var datastoreID: DatastoreIdentifier? + var datastoreRootID: DatastoreRootIdentifier + + init(datastoreID: DatastoreIdentifier, datastoreRootID: DatastoreRootIdentifier) { + self.datastoreID = datastoreID + self.datastoreRootID = datastoreRootID + } + + init(from decoder: any Decoder) throws { + /// Attempt to decode a full object, otherwise fall back to a single value as it was prior to version 0.4 (2024-10-11) + do { + let container: KeyedDecodingContainer = try decoder.container(keyedBy: CodingKeys.self) + self.datastoreID = try container.decodeIfPresent(DatastoreIdentifier.self, forKey: .datastoreID) + self.datastoreRootID = try container.decode(DatastoreRootIdentifier.self, forKey: .datastoreRootID) + } catch { + self.datastoreID = nil + self.datastoreRootID = try decoder.singleValueContainer().decode(DatastoreRootIdentifier.self) + } + } +} + extension DiskPersistence.Datastore { actor RootObject: Identifiable { let datastore: DiskPersistence.Datastore let id: DatastoreRootIdentifier + nonisolated var referenceID: DatastoreRootReference { + DatastoreRootReference(datastoreID: datastore.id, datastoreRootID: id) + } + var _rootObject: DatastoreRootManifest? var isPersisted: Bool diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/DiskPersistence.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/DiskPersistence.swift index cc36554..b3cf4b3 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/DiskPersistence.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/DiskPersistence.swift @@ -505,8 +505,8 @@ extension DiskPersistence { func persist( actionName: String?, roots: [DatastoreKey : Datastore.RootObject], - addedDatastoreRoots: Set, - removedDatastoreRoots: Set + addedDatastoreRoots: Set, + removedDatastoreRoots: Set ) async throws { let containsEdits = try await readingCurrentSnapshot { snapshot in try await snapshot.readingManifest { manifest, iteration in diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/Snapshot.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/Snapshot.swift index ccd3aa6..d95e14b 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/Snapshot.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/Snapshot.swift @@ -162,7 +162,36 @@ extension Snapshot { let fileManager = FileManager() - /// Start by deleting and pruning roots as needed. + /// Start by deleting and pruning roots as needed. We attempt to do this twice, as older versions of the persistence (prior to 0.4) didn't record the datastore ID along with the root id, which would therefor require extra work. + /// First, delete the root entries we know to be removed. + for datastoreRoot in datastoreRootsToPruneAndDelete { + guard let datastoreID = datastoreRoot.datastoreID else { continue } + let datastore = datastores[datastoreID] ?? DiskPersistence.Datastore(id: datastoreID, snapshot: self) + do { + try await datastore.pruneRootObject(with: datastoreRoot.datastoreRootID, mode: mode, shouldDelete: true) + } catch URLError.fileDoesNotExist, CocoaError.fileReadNoSuchFile, CocoaError.fileNoSuchFile, POSIXError.ENOENT { + /// This datastore root is already gone. + } catch { + print("Could not delete datastore root \(datastoreRoot): \(error)") + throw error + } + datastoreRootsToPruneAndDelete.remove(datastoreRoot) + } + /// Prune the root entries that were just added, as they themselves refer to other deleted assets. + for datastoreRoot in datastoreRootsToPrune { + guard let datastoreID = datastoreRoot.datastoreID else { continue } + let datastore = datastores[datastoreID] ?? DiskPersistence.Datastore(id: datastoreID, snapshot: self) + do { + try await datastore.pruneRootObject(with: datastoreRoot.datastoreRootID, mode: mode, shouldDelete: false) + } catch URLError.fileDoesNotExist, CocoaError.fileReadNoSuchFile, CocoaError.fileNoSuchFile, POSIXError.ENOENT { + /// This datastore root is already gone. + } catch { + print("Could not prune datastore root \(datastoreRoot): \(error)") + throw error + } + datastoreRootsToPrune.remove(datastoreRoot) + } + /// If any regerences remain, funnel into this code path for very old persistences. if !datastoreRootsToPruneAndDelete.isEmpty || !datastoreRootsToPrune.isEmpty { for (_, datastoreInfo) in iteration.dataStores { /// Skip any roots for datastores being deleted, since we'll just unlink the whole directory in that case. @@ -172,23 +201,27 @@ extension Snapshot { /// Delete the root entries we know to be removed. for datastoreRoot in datastoreRootsToPruneAndDelete { - // TODO: Clean this up by also storing the datastore ID in with the root ID… do { - try await datastore.pruneRootObject(with: datastoreRoot, mode: mode, shouldDelete: true) + try await datastore.pruneRootObject(with: datastoreRoot.datastoreRootID, mode: mode, shouldDelete: true) datastoreRootsToPruneAndDelete.remove(datastoreRoot) - } catch { + } catch URLError.fileDoesNotExist, CocoaError.fileReadNoSuchFile, CocoaError.fileNoSuchFile, POSIXError.ENOENT { /// This datastore did not contain the specified root, skip it for now. + } catch { + print("Could not delete datastore root \(datastoreRoot): \(error).") + throw error } } /// Prune the root entries that were just added, as they themselves refer to other deleted assets. for datastoreRoot in datastoreRootsToPrune { - // TODO: Clean this up by also storing the datastore ID in with the root ID… do { - try await datastore.pruneRootObject(with: datastoreRoot, mode: mode, shouldDelete: false) + try await datastore.pruneRootObject(with: datastoreRoot.datastoreRootID, mode: mode, shouldDelete: false) datastoreRootsToPrune.remove(datastoreRoot) - } catch { + } catch URLError.fileDoesNotExist, CocoaError.fileReadNoSuchFile, CocoaError.fileNoSuchFile, POSIXError.ENOENT { /// This datastore did not contain the specified root, skip it for now. + } catch { + print("Could not prune datastore root \(datastoreRoot): \(error).") + throw error } } } diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotIteration.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotIteration.swift index 39df7a3..1b3087f 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotIteration.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Snapshot/SnapshotIteration.swift @@ -48,10 +48,10 @@ struct SnapshotIteration: Codable, Equatable, Identifiable { var removedDatastores: Set = [] /// The datastore roots that have been added in this iteration of the snapshot. - var addedDatastoreRoots: Set = [] + var addedDatastoreRoots: Set = [] /// The datastore roots that have been replaced in this iteration of the snapshot. - var removedDatastoreRoots: Set = [] + var removedDatastoreRoots: Set = [] } extension SnapshotIteration { @@ -94,7 +94,7 @@ extension SnapshotIteration { func datastoreRootsToPrune( for mode: SnapshotPruneMode, options: SnapshotPruneOptions - ) -> Set { + ) -> Set { switch (mode, options) { case (.pruneRemoved, .pruneAndDelete): removedDatastoreRoots case (.pruneAdded, .pruneAndDelete): addedDatastoreRoots diff --git a/Sources/CodableDatastore/Persistence/Disk Persistence/Transaction/Transaction.swift b/Sources/CodableDatastore/Persistence/Disk Persistence/Transaction/Transaction.swift index 240cfc5..ec64246 100644 --- a/Sources/CodableDatastore/Persistence/Disk Persistence/Transaction/Transaction.swift +++ b/Sources/CodableDatastore/Persistence/Disk Persistence/Transaction/Transaction.swift @@ -172,8 +172,8 @@ extension DiskPersistence { try await root.persistIfNeeded() } - let addedDatastoreRoots = Set(createdRootObjects.map(\.id)) - let removedDatastoreRoots = Set(deletedRootObjects.map(\.id)) + let addedDatastoreRoots = Set(createdRootObjects.map(\.referenceID)) + let removedDatastoreRoots = Set(deletedRootObjects.map(\.referenceID)) try await persistence.persist( actionName: actionName, From ef1d991687eec9e29799022b21ea7fec1adc493d Mon Sep 17 00:00:00 2001 From: Dimitri Bouniol Date: Fri, 11 Oct 2024 06:28:35 -0700 Subject: [PATCH 5/5] Updated readme to suggest 0.4.0 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c2868db..5c011b0 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Please check the [releases](https://github.com/mochidev/CodableDatastore/release dependencies: [ .package( url: "https://github.com/mochidev/CodableDatastore.git", - .upToNextMinor(from: "0.3.4") + .upToNextMinor(from: "0.4.0") ), ], ...