Skip to content

Commit

Permalink
TOSQUASH use NEMap for NonEmptyRunData
Browse files Browse the repository at this point in the history
  • Loading branch information
mheinzel committed Mar 6, 2025
1 parent 1995a20 commit 442e8a8
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 24 deletions.
1 change: 1 addition & 0 deletions lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ library extras
, lsm-tree:control
, lsm-tree:kmerge
, lsm-tree:prototypes
, nonempty-containers
, nothunks
, primitive
, QuickCheck
Expand Down
9 changes: 3 additions & 6 deletions src-extras/Database/LSMTree/Extras/MergingRunData.hs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ unsafeCreateMergingRun hfs hbio resolve indexType path counter = \case
MR.newCompleted numRuns totalDebt run

OngoingMergeData mergeType rds -> do
withRuns hfs hbio indexType path counter (unNonEmptyRunData <$> rds)
withRuns hfs hbio indexType path counter (toRunData <$> rds)
$ \runs -> do
n <- incrUniqCounter counter
let fsPaths = RunFsPaths path (RunNumber (uniqueToInt n))
Expand Down Expand Up @@ -121,8 +121,6 @@ mergingRunDataInvariant = \case
assertI "completed merges are non-trivial (at least two inputs)" $
n >= 2
OngoingMergeData _ rds -> do
assertI "inputs to ongoing merges aren't empty" $
all nonEmptyRunDataInvariant rds
assertI "ongoing merges are non-trivial (at least two inputs)" $
length rds >= 2
where
Expand All @@ -137,8 +135,7 @@ mapMergingRunData f g h = \case
CompletedMergeData t n r ->
CompletedMergeData t n $ mapRunData f g h r
OngoingMergeData t rs ->
OngoingMergeData t $
map (NonEmptyRunData . mapRunData f g h . unNonEmptyRunData) rs
OngoingMergeData t $ map (mapNonEmptyRunData f g h) rs

type SerialisedMergingRunData t =
MergingRunData t SerialisedKey SerialisedValue SerialisedBlob
Expand All @@ -163,7 +160,7 @@ labelMergingRunData (OngoingMergeData mt rds) =
tabulate "merging run state" ["OngoingMerge"]
. tabulate "merge type" [show mt]
. tabulate "merging run inputs" [showPowersOf 2 (length rds)]
. foldr ((.) . labelRunData . unNonEmptyRunData) id rds
. foldr ((.) . labelNonEmptyRunData) id rds

instance ( Arbitrary t, Ord k, Arbitrary k, Arbitrary v, Arbitrary b
) => Arbitrary (MergingRunData t k v b) where
Expand Down
43 changes: 26 additions & 17 deletions src-extras/Database/LSMTree/Extras/RunData.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ module Database.LSMTree.Extras.RunData (
, serialiseRunData
-- * NonEmptyRunData
, NonEmptyRunData (..)
, nonEmptyRunDataInvariant
, nonEmptyRunData
, toRunData
, mapNonEmptyRunData
, SerialisedNonEmptyRunData
-- * QuickCheck
, labelRunData
Expand All @@ -38,10 +40,10 @@ import Control.RefCount
import Data.Bifoldable (Bifoldable (bifoldMap))
import Data.Bifunctor
import Data.Foldable (for_)
import qualified Data.Map as M
import Data.Map.NonEmpty (NEMap)
import qualified Data.Map.NonEmpty as NEMap
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Maybe (mapMaybe)
import qualified Data.Vector as V
import Database.LSMTree.Extras (showPowersOf10)
import Database.LSMTree.Extras.Generators ()
Expand Down Expand Up @@ -176,7 +178,7 @@ withRunDataAsWriteBuffer ::
-> (WB.WriteBuffer -> Ref (WBB.WriteBufferBlobs IO h) -> IO a)
-> IO a
withRunDataAsWriteBuffer hfs f fsPaths rd action = do
let es = V.fromList . M.toList $ unRunData rd
let es = V.fromList . Map.toList $ unRunData rd
let maxn = NumEntries $ V.length es
let wbbPath = Paths.writeBufferBlobPath fsPaths
bracket (WBB.new hfs wbbPath) releaseRef $ \wbb -> do
Expand Down Expand Up @@ -234,19 +236,24 @@ serialiseRunData = mapRunData serialiseKey serialiseValue serialiseBlob
--
-- Note: 'b ~ Void' should rule out blobs.
newtype NonEmptyRunData k v b =
NonEmptyRunData { unNonEmptyRunData :: RunData k v b }
NonEmptyRunData { unNonEmptyRunData :: NEMap k (Entry v b) }
deriving stock (Eq, Show)

type SerialisedNonEmptyRunData =
NonEmptyRunData SerialisedKey SerialisedValue SerialisedBlob

nonEmptyRunData :: RunData k v b -> Maybe (NonEmptyRunData k v b)
nonEmptyRunData rd
| null (unRunData rd) = Nothing
| otherwise = Just (NonEmptyRunData rd)
nonEmptyRunData (RunData m) = NonEmptyRunData <$> NEMap.nonEmptyMap m

nonEmptyRunDataInvariant :: NonEmptyRunData k v b -> Bool
nonEmptyRunDataInvariant (NonEmptyRunData rd) = not (null (unRunData rd))
toRunData :: NonEmptyRunData k v b -> RunData k v b
toRunData (NonEmptyRunData m) = RunData (NEMap.toMap m)

mapNonEmptyRunData ::
Ord k'
=> (k -> k') -> (v -> v') -> (b -> b')
-> NonEmptyRunData k v b -> NonEmptyRunData k' v' b'
mapNonEmptyRunData f g h =
NonEmptyRunData . NEMap.mapKeys f . NEMap.map (bimap g h) . unNonEmptyRunData

type SerialisedNonEmptyRunData =
NonEmptyRunData SerialisedKey SerialisedValue SerialisedBlob

{-------------------------------------------------------------------------------
QuickCheck
Expand All @@ -271,7 +278,7 @@ labelRunData (RunData m) =
| otherwise = "no large k/op"

labelNonEmptyRunData :: SerialisedNonEmptyRunData -> Property -> Property
labelNonEmptyRunData (NonEmptyRunData rd) = labelRunData rd
labelNonEmptyRunData = labelRunData . toRunData

instance ( Ord k, Arbitrary k, Arbitrary v, Arbitrary b
) => Arbitrary (RunData k v b) where
Expand Down Expand Up @@ -311,7 +318,8 @@ genNonEmptyRunData ::
-> Gen b
-> Gen (NonEmptyRunData k v b)
genNonEmptyRunData genKey genVal genBlob =
genRunData genKey genVal genBlob `suchThatMap` nonEmptyRunData
NonEmptyRunData . NEMap.fromList <$>
liftArbitrary (liftArbitrary2 genKey (liftArbitrary2 genVal genBlob))

shrinkNonEmptyRunData ::
Ord k
Expand All @@ -321,8 +329,9 @@ shrinkNonEmptyRunData ::
-> NonEmptyRunData k v b
-> [NonEmptyRunData k v b]
shrinkNonEmptyRunData shrinkKey shrinkVal shrinkBlob =
mapMaybe nonEmptyRunData
. shrinkRunData shrinkKey shrinkVal shrinkBlob
map (NonEmptyRunData . NEMap.fromList)
. liftShrink (liftShrink2 shrinkKey (liftShrink2 shrinkVal shrinkBlob))
. NEMap.toList
. unNonEmptyRunData

-- | We cannot implement 'Arbitrary2' since we have constraints on @k@.
Expand Down
2 changes: 1 addition & 1 deletion test/Test/Database/LSMTree/Generators.hs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ tests = testGroup "Test.Database.LSMTree.Generators" [
, testGroup "NonEmptyRunData" $
prop_arbitraryAndShrinkPreserveInvariant
labelNonEmptyRunData
nonEmptyRunDataInvariant
noInvariant
, testGroup "MergingRunData" $
prop_arbitraryAndShrinkPreserveInvariant
@(SerialisedMergingRunData MR.LevelMergeType)
Expand Down

0 comments on commit 442e8a8

Please sign in to comment.