From 4f0106c0d15347d64b7bf5da4ed6ad56017d4e1b Mon Sep 17 00:00:00 2001 From: TimSheard Date: Wed, 13 Oct 2021 10:11:51 -0400 Subject: [PATCH 01/19] Added the Compact data modules --- libs/small-steps/small-steps.cabal | 7 + libs/small-steps/src/Data/Compact/Class.hs | 477 ++++++++++++++++ libs/small-steps/src/Data/Compact/KeyMap.hs | 574 ++++++++++++++++++++ 3 files changed, 1058 insertions(+) create mode 100644 libs/small-steps/src/Data/Compact/Class.hs create mode 100644 libs/small-steps/src/Data/Compact/KeyMap.hs diff --git a/libs/small-steps/small-steps.cabal b/libs/small-steps/small-steps.cabal index 9bcacbc07a8..74839df2334 100644 --- a/libs/small-steps/small-steps.cabal +++ b/libs/small-steps/small-steps.cabal @@ -45,6 +45,8 @@ library , Data.CanonicalMaps , Data.MemoBytes , Data.Coders + , Data.Compact.Class + , Data.Compact.KeyMap , Data.Pulse , Control.Provenance , Control.Iterate.SetAlgebra @@ -52,8 +54,10 @@ library , Control.SetAlgebra build-depends: aeson , ansi-wl-pprint + , array , base >=4.11 && <5 , bytestring + , cardano-prelude , cborg , containers , cryptonite @@ -63,6 +67,9 @@ library , free , mtl , nothunks + , prettyprinter + , primitive + , random , strict-containers , text , transformers >= 0.5 diff --git a/libs/small-steps/src/Data/Compact/Class.hs b/libs/small-steps/src/Data/Compact/Class.hs new file mode 100644 index 00000000000..5a38c6be3bd --- /dev/null +++ b/libs/small-steps/src/Data/Compact/Class.hs @@ -0,0 +1,477 @@ +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE FunctionalDependencies #-} +{-# LANGUAGE ScopedTypeVariables #-} + +-- HeapWords for Array and PrimArray +{-# OPTIONS_GHC -Wno-orphans #-} + +module Data.Compact.Class where + +import qualified Data.Array as A +import qualified Data.Primitive.Array as PA +import qualified Data.Array.MArray as MutA +import Data.Primitive.PrimArray + ( PrimArray, indexPrimArray, primArrayFromList, primArrayToList, sizeofPrimArray, copyPrimArray, + MutablePrimArray,unsafeFreezePrimArray , newPrimArray,sizeofMutablePrimArray, readPrimArray, writePrimArray, + ) + +import qualified Data.Primitive.SmallArray as Small +import Data.Primitive.SmallArray(SmallArray,SmallMutableArray) + +import Data.Primitive.Types (Prim (..)) +import GHC.Arr(STArray(..),unsafeFreezeSTArray) +import Control.Monad.ST (ST, runST) +import Cardano.Prelude (HeapWords (..)) + + +-- ============================================================================================ +-- Array like objects which can access elements by their index + +class Indexable t a where + index :: t a -> Int -> a + isize :: t a -> Int + fromlist :: [a] -> t a + tolist :: t a -> [a] + catenate :: Int -> [t a] -> t a + merge :: Ord a => Int -> [t a] -> t a + +-- Array like objects that store their elements in ascending order dupport Binary search + +-- | Find the index of 'k'. Use 'lo' and 'hi' to narrow the scope where 'k' may occur +-- This is possible because we assume 'arr' is maintained in ascending order of keys. +binsearch :: (Ord k, Indexable arr k) => Int -> Int -> k -> arr k -> Maybe Int +binsearch lo hi _k _v | lo > hi = Nothing +binsearch lo hi k v | lo == hi = if index v lo == k then Just lo else Nothing +binsearch lo _hi k v | index v lo == k = Just lo +binsearch _lo hi k v | index v hi == k = Just hi +binsearch lo hi _k _v | lo + 1 == hi = Nothing +binsearch lo hi k v = (if index v mid > k then binsearch lo mid k v else binsearch mid hi k v) + where + mid = lo + (div (hi - lo) 2) + + +-- | Find the index and the value at the least upper bound of 'target' +alub :: (Ord t1, Indexable t2 t1) => (Int, Int) -> t2 t1 -> t1 -> Maybe (Int, t1) +alub (lo, hi) arr target + | lo > hi = Nothing + | target <= index arr lo = Just (lo, index arr lo) + | lo == hi = Nothing + | lo + 1 == hi && index arr lo < target && target <= index arr hi = Just (hi, index arr hi) + | True = if target <= index arr mid then (alub (lo, mid) arr target) else (alub (mid, hi) arr target) + where + mid = lo + (div (hi - lo) 2) + +boundsCheck :: Indexable t1 a => (t1 a -> Int -> t2) -> t1 a -> Int -> t2 +boundsCheck indexf arr i | i>=0 && i < isize arr = indexf arr i +boundsCheck _ arr i = error ("boundscheck error, "++show i++", not in bounds (0.."++show (isize arr -1)++").") + +-- Built in type Instances + +instance Indexable PA.Array x where + index = boundsCheck PA.indexArray + isize = PA.sizeofArray + fromlist = PA.arrayFromList + tolist arr = foldr (:) [] arr + catenate = catArray + merge = mergeArray + +instance Prim a => Indexable PrimArray a where + index = boundsCheck indexPrimArray + isize = sizeofPrimArray + fromlist = primArrayFromList + tolist = primArrayToList + catenate = catArray + merge = mergeArray + +instance Indexable (A.Array Int) a where + index = (A.!) + isize arr = (hi - lo) + 1 where (lo, hi) = A.bounds arr + fromlist xs = (A.listArray (0, length xs -1) xs) + tolist arr = foldr (:) [] arr + catenate = catArray + merge = mergeArray + +instance Indexable SmallArray t where + index = Small.indexSmallArray + isize = Small.sizeofSmallArray + fromlist = Small.smallArrayFromList + tolist arr = foldr (:) [] arr + catenate = catArray + merge = mergeArray + +-- ======================================================================== +-- Pairs of Mutable Arrays and ImMutable Arrays that can be converted safely +-- ======================================================================== + + +mboundsCheck :: (ArrayPair arr marr a) => + (marr s a -> Int -> ST s a) -> marr s a -> Int -> ST s a +mboundsCheck indexf arr i | i>=0 && i < msize arr = indexf arr i +mboundsCheck _ arr i = error ("mboundscheck error, "++show i++", not in bounds (0.."++show (msize arr -1)++").") + + +class Indexable arr a => ArrayPair arr marr a | marr -> arr, arr -> marr where + mindex :: marr s a -> Int -> ST s a + msize :: marr s a -> Int + mnew :: Int -> ST s (marr s a) + mfreeze :: marr s a -> ST s (arr a) -- This should be the unsafe version that does not copy + mwrite :: marr s a -> Int -> a -> ST s () + mcopy :: forall s. marr s a -> Int -> arr a -> Int -> Int -> ST s () + +-- Built in type instances + +instance ArrayPair SmallArray SmallMutableArray a where + mindex = mboundsCheck Small.readSmallArray + msize = Small.sizeofSmallMutableArray + mnew size = Small.newSmallArray size undefined + mfreeze = Small.unsafeFreezeSmallArray + mwrite arr i a = if i>=0 && i<(msize arr) + then Small.writeSmallArray arr i a + else error ("mwrite error, "++show i++", not in bounds (0.."++show (msize arr -1)++").") + mcopy = Small.copySmallArray + +instance ArrayPair PA.Array PA.MutableArray a where + msize = PA.sizeofMutableArray + mindex = mboundsCheck PA.readArray + mnew n = PA.newArray n undefined + mfreeze = PA.unsafeFreezeArray + mwrite arr i a = + if i>=0 && i<(msize arr) + then PA.writeArray arr i a + else error ("mwrite error, "++show i++", not in bounds (0.."++show (msize arr -1)++").") + mcopy = PA.copyArray + +instance Prim a => ArrayPair PrimArray MutablePrimArray a where + msize = sizeofMutablePrimArray + mindex = mboundsCheck readPrimArray + mnew = newPrimArray + mfreeze = unsafeFreezePrimArray + mwrite arr i a = + if i>=0 && i<(msize arr) + then writePrimArray arr i a + else error ("mwrite error, "++show i++", not in bounds (0.."++show (msize arr -1)++").") + mcopy = copyPrimArray + +-- | MutArray fixes the index type to Int for the STArray type constructor +newtype MutArray s t = MutArray (STArray s Int t) + +instance ArrayPair (A.Array Int) MutArray a where + msize (MutArray (STArray lo hi _ _)) = hi - lo + 1 + mindex (MutArray arr) i = MutA.readArray arr i + mnew n = MutArray <$> (MutA.newArray_ (0,n-1)) + mfreeze (MutArray arr) = unsafeFreezeSTArray arr + mwrite (MutArray arr) i a = MutA.writeArray arr i a + mcopy marr startm arr start count = go startm start count + where go _i _j 0 = pure () + go i j n = do + mwrite marr i (index arr j) + go (i+1) (j+1) (n-1) + + +-- ======================================================= +-- Usefull functions that use Mutable Arrays + +-- | Build a mutable array from a list +mfromlist :: ArrayPair arr marr a => [a] -> ST s (marr s a) +mfromlist xs = do + marr <- mnew (length xs) + let loop _i [] = pure () + loop i (y:ys) = mwrite marr i y >> loop (i+1) ys + loop 0 xs + pure marr + +-- | concatenate a list of array like objects by allocating the target and then copying them 1 by 1. +-- catArray maintains index order, but mergeArray maintains ascending oder. +-- catArray [[2,1],[14],[6,5,11]] --> [2,1,14,6,5,11] +-- mergeArray [[1,2],[14],[5,6,11]] --> [1,2,5,6,11,14] +catArray :: ArrayPair arr marr a => Int -> [arr a] -> arr a +catArray totalsize xs = fst(withMutArray totalsize (build 0 xs)) + where build _next [] _marr = pure () + build next (arr: arrs) marr = + do let size = isize arr + mcopy marr next arr 0 size + build (next+size) arrs marr + +-- | Swap the values at indices 'i' and 'j' in mutable array 'marr' +swap :: ArrayPair arr marr a => marr s a -> Int -> Int -> ST s () +swap _ i j | i==j = pure () +swap marr i j = do + ti <- mindex marr i; + tj <- mindex marr j; + mwrite marr i tj + mwrite marr j ti + +mToList :: ArrayPair arr marr a => Int -> marr s a -> ST s [a] +mToList first marr = loop first [] + where hi = (msize marr - 1) + loop lo xs | lo > hi = pure(reverse xs) + loop lo xs = do {x <- mindex marr lo; loop (lo+1) (x:xs)} + +-- ================================================================ +-- Functions for using mutable initialization in a safe manner. +-- Using these functions is the safe way to use the method 'mfreeze' + +withMutArray:: ArrayPair arr marr a => Int -> (forall s. marr s a -> ST s x) -> (arr a,x) +withMutArray n process = runST $ do + marr <- mnew n + x <- process marr + arr <- mfreeze marr + pure (arr, x) + +with2MutArray :: + ( ArrayPair arr1 marr1 a, ArrayPair arr2 marr2 b) => + Int -> + Int -> + (forall s. marr1 s a -> marr2 s b -> ST s x) -> + (arr1 a, arr2 b,x) +with2MutArray size1 size2 process = runST $ do + arr1 <- mnew size1 + arr2 <- mnew size2 + x <- process arr1 arr2 + arr3 <- mfreeze arr1 + arr4 <- mfreeze arr2 + pure (arr3, arr4, x) + +-- ======================================================= +-- Abtract Searchable types (Arrays stored in ascending order) +-- These will be very usefull when we create maps as parallel arrays +-- the first sorted on key, and the second holdingthe associated value at the +-- same index as it's key. + +class Ord key => Search t key where + search :: key -> t -> Maybe Int + +instance Ord key => Search (PA.Array key) key + where search key v = binsearch 0 (isize v - 1) key v + +instance (Prim key,Ord key) => Search (PrimArray key) key + where search key v = binsearch 0 (isize v - 1) key v + +instance Ord key => Search (A.Array Int key) key + where search key v = binsearch 0 (isize v - 1) key v + +instance (Search t key) => Search [t] key where + search _ [] = Nothing + search key (x:xs) = + case search key x of + Nothing -> search key xs + Just i -> Just i + +instance Search t key => Search (Node t) key where + search key (Node _ x) = search key x + +-- ============================================================== +-- Overloaded operations on (Map k v) + +class Maplike m k v where + makemap :: [(k,v)] -> m k v + lookupmap :: Ord k => k -> m k v -> Maybe v + insertmap :: Ord k => k -> v -> m k v -> m k v + +-- ============================================================== +-- Overloaded operations on (Set k) + +class Setlike m k where + makeset :: [k] -> m k + elemset :: Ord k => k -> m k -> Bool + insertset :: Ord k => k -> m k -> m k + emptyset :: m k + +-- ========================================================= +-- HeapWords instances + +instance (HeapWords v) => HeapWords (A.Array Int v) where + heapWords arr = foldl accum (3 + n) arr + where + accum ans v = ans + heapWords v + n = isize arr + +instance (Prim a, HeapWords a) => HeapWords (PrimArray a) where + heapWords arr = 2 + (sizeofPrimArray arr * heapWords (index arr 0)) + +-- ======================================================= +-- Encoding lists with the structure of binary numbers + +-- | binary encoding of 'n', least significant bit on the front of the list +binary :: Integral n => n -> [n] +binary 0 = [] +binary 1 = [(1)] +binary n = (mod n 2) : binary (div n 2) + +-- | Compute a sparse list of non-zero Binary digits and their positional weights to represent 'n' +-- For example (sparseBinary 25) returns [(1,1),(1,8),(1,16)], I.e. we need: 1 one, +-- 1 eight, and 1 sixteen. Since this is binary, and we don't store the 0's, the digits are aways 1. +-- and the weights are powers of 2. +sparseBinary :: Int -> [(Int, Int)] +sparseBinary n = fix 1 (binary n) + where + fix _ [] = [] + fix m (x : xs) = + if x == 0 + then fix (m * 2) xs + else (x, m) : fix (m * 2) xs + +-- | Split a list of length 'n' into pieces, each piece has a power of two as its length. +-- For example: pieces [1..11] --> [(1,[1]), (2,[2,3]), (8,[4,5,6,7,8,9,10,11])] +pieces :: [a] -> [(Int, [a])] +pieces xs = chop parts xs + where + parts = sparseBinary (length xs) + chop [] _zs = [] + chop ((_, n) : ys) zs = (n, take n zs) : chop ys (drop n zs) + + +-- | When a list is represented with the structure of binary numbers, an important +-- property is that every such list has a full prefix. This is a prefix which has +-- contiguous powers of two. For example: +-- splitAtFullPrefix 1 (node 1) [node 1,node 2, node 4, node 8, node 32, node 128] +-- returns +-- (16, [node 1,node 1,node 2, node 4, node 8], [node 32,node 128]) +-- because [1,2,4,8] is the longest contiguous prefix consisting of adjacent powers of 2. +-- In the worst case the prefix has length 1. +splitAtFullPrefix :: (node -> Int) -> Int -> node -> [node] -> (Int,[node],[node]) +splitAtFullPrefix getsize _next node [] = (getsize node,[node],[]) +splitAtFullPrefix getsize next node1 (node2:more) = + let n = getsize node1 + m = getsize node2 + in if next==m + then case splitAtFullPrefix getsize (next*2) node2 more of + (count,prefix,rest) -> (count+n, node1:prefix, rest) + else (n,[node1],node2:more) + + +-- ============================================================================== + +-- | A node carries a 'size' and some array-like type 'arr' +data Node arr = Node {-# UNPACK #-} !Int arr + deriving Show + +arrayPart :: Node arr -> arr +arrayPart (Node _ arr) = arr + +nodesize :: Node arr -> Int +nodesize (Node i _) = i + +-- ================================================================== +-- Merging arrays to maintain ascending order. + +-- | Find the index in 'marr' of the smallest 't'. Return a pair (index,the-smallest-t) +-- The function 'smaller' compares two 't' for smallness. +-- 'pair' is the smallest (index,t) we have seen so far. 'lo' and 'hi' limit +-- the bounds of where to look. +smallestIndex ::(ArrayPair arr marr t) => (t -> t -> Bool) -> marr s t -> (Int,t) ->Int -> Int -> ST s (Int,t) +smallestIndex smaller marr initpair initlo hi = loop initpair initlo where + loop pair lo | lo > hi = pure pair + loop (pair@(_i,t)) lo = do + t2 <- mindex marr lo + if smaller t t2 + then loop pair (lo+1) + else loop (lo,t2) (lo+1) + +-- | Apply 'action' to each 't' in 'marr' in ascending order as determined by 'smaller' +-- 'state' is the current state, and 'lo' and 'hi' limit the bounds of where to look. +-- 'markIfDone' might alter 'marr' and return a new 'lo' limit, if the 'lo' index in +-- 'marr' has no more 't' objects to offer. +inOrder :: + (Int -> Int -> t -> PA.MutableArray s t -> ST s Int) -> + (t -> t -> Bool) -> + state -> + Int -> + Int -> + (state -> t -> ST s state) -> + PA.MutableArray s t -> -- array of items to be merged, This should be small. At most 20 or so. + ST s state +inOrder markIfDone smaller initstate initlo hi action marr = loop initlo initstate where + loop lo state | lo > hi = pure state + loop lo state = + do t <- mindex marr lo + (i,small) <- smallestIndex smaller marr (lo,t) (lo+1) hi + state' <- action state small + lo' <- markIfDone lo i small marr + loop lo' state' + +-- | A commonly used 'markIfDone' function. Test if 'next' is still in bounds for 'arr' +-- If so, them mutate 'marr' to indicate that next time we should look at index 'next+1' in arr. +-- If it is out of bounds, then swap the pairs in 'marr' at indexs 'i' and 'lo', and then +-- increment lo, so the pair that has no more to offer, is no longer in an active position. +mark1:: (Indexable arr t) => + Int -> Int -> (Int,arr t) -> PA.MutableArray s (Int,arr t) -> ST s Int +mark1 lo i (next,arr) marr = + do let next' = next+1 + if next' < isize arr + then mwrite marr i (next',arr) >> pure lo + else swap marr lo i >> pure(lo+1) + +-- | A commonly used 'smaller' function +smaller1 :: (Ord a, Indexable arr a) => (Int, arr a) -> (Int, arr a) -> Bool +smaller1 (i,arr1) (j,arr2) = index arr1 i < index arr2 j + +-- | A commonly used 'action' function. Appropriate when the 'arr' is simple with +-- no bells or whistles. Good for PrimArray, PA.Array, A.Array, Any array with a ArrayPair instance. +-- If we use an exotic array with no ArrayPair instance, we can stil merge, but we can't use this +-- action function. +action1:: (ArrayPair arr marr a, Indexable t a) => marr s a -> Int -> (Int, t a) -> ST s Int +action1 marr i (j,arr) = (mwrite marr i (index arr j) >> pure(i+1)) + + +-- | Merge a list of array-like objects using 'action' The 'action' will differ depending on +-- what kind of arrays are begin merged. +mergeWithAction :: forall a arr marr. + (ArrayPair arr marr a, Ord a) => + Int -> + [arr a] -> + (forall s. marr s a -> Int -> (Int,arr a) -> ST s Int) -> + arr a +mergeWithAction size inputs action = fst $ withMutArray size build where + build:: forall s. marr s a -> ST s Int + build moutput = do + minputs <- mfromlist (map (\ x -> (0,x)) inputs) + inOrder mark1 smaller1 (0::Int) 0 (length inputs-1) (action moutput) minputs + +-- | Merge a list of array like objects by allocating the target and then merging the sources. +-- mergeArray maintains ascending order. But catArray maintains index order. +-- mergeArray [[1,2],[14],[5,6,11]] --> [1,2,5,6,11,14] +-- catArray [[2,1],[14],[6,5,11]] --> [2,1,14,6,5,11] + +mergeArray :: (Ord a,ArrayPair arr marr a) => Int -> [arr a] -> arr a +mergeArray size xs = mergeWithAction size xs action1 + +testmerge :: PrimArray Int +testmerge = mergeArray (sum(map isize xs)) xs + where xs = [fromlist[2,7], fromlist[1,6,19], fromlist[4,9], fromlist[3,8,12,17]] + +{- +-- | Merge 2 parallel arrays with 'action'. The order of merging depends only on +-- The first list 'keys' , the second is implicit in the the state (Int,'vals'). +merge2WithAction :: forall a arr marr arr2 marr2 v. + ( ArrayPair arr marr a, + Indexable arr2 v, + ArrayPair arr2 marr2 v, + Ord a + ) => + Int -> + [arr a] -> + (forall s. marr s a -> marr2 s v -> Int -> (Int,arr a) -> ST s Int) -> + MapNode arr arr2 a v +merge2WithAction size keys action = node (with2MutArray size size build) where + node (arr1,arr2,_) = MapNode size arr1 arr2 + build:: forall s. marr s a -> marr2 s v -> ST s Int + build mkeys mvals = do + minputs <- mfromlist (map (\ x -> (0,x)) keys) + inOrder mark1 smaller1 (0::Int) 0 (length keys - 1) (action mkeys mvals) minputs + +mergeMapNode :: forall karr varr marr marr2 k v. + ( ArrayPair karr marr k, + ArrayPair varr marr2 v, + Ord k + ) => Int -> [MapNode karr varr k v] -> MapNode karr varr k v +mergeMapNode size nodes = mergeArray2 size inputs action where + (inputs,vals) = unzip (map (\ (MapNode _ ks vs) -> (ks,vs)) nodes) + action:: forall s. marr s k -> marr2 s v -> Int -> (Int,karr k) -> ST s Int + action mkeys mvals i (n,arrkeys) = undefined + +-} + + diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/small-steps/src/Data/Compact/KeyMap.hs new file mode 100644 index 00000000000..4825270a285 --- /dev/null +++ b/libs/small-steps/src/Data/Compact/KeyMap.hs @@ -0,0 +1,574 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} + +{-# OPTIONS_GHC -Wno-orphans #-} + + +module Data.Compact.KeyMap where + + + +import Data.Foldable (foldl') +import Cardano.Prelude (HeapWords (..),Generic,runST,ST) +import Data.Word(Word64) +import qualified Data.Primitive.Array as PA +import Data.Bits (Bits,(.&.), (.|.), complement, popCount, unsafeShiftL,setBit,testBit) +import Data.Compact.Class +import GHC.Exts ((==#), reallyUnsafePtrEquality#, isTrue# ) +import qualified Data.Map as Map +import Control.DeepSeq (NFData(..)) +import qualified Data.Primitive.SmallArray as Small +import Data.Primitive.SmallArray() +import System.Random(RandomGen,genWord64,mkStdGen) +import Prettyprinter +import Data.Text(Text,pack) +import qualified Prettyprinter.Internal as Pretty + +-- type PArray = PA.Array +type PArray = Small.SmallArray + +bin :: Integral n => n -> [n] +bin x = reverse (binary x) + +-- ========================================================================== +-- bits, Segments, Paths. Breaking a Key into a sequence of small components + +-- | Represent a set of small integers, they can range from 0 to 63 +type Bitmap = Word64 + +-- | The number of bits in a segment. Can't be more than 6, because using Word64 +-- as Bitmap can only accomodate 2^6 = 64 bits +bits :: Int +bits = 6 + +-- | Ints in the range [0..63], represents 'bits' wide portion of a key +type Segment = Int + +-- | Represents a list of 'bits', which when combined is in 1-1 correspondance with a Key +type Path = [Segment] + +-- | The maximum value of a segment, as an Int +intSize :: Int +intSize = 2 ^ bits + +-- | The maximum value of a segment, as a Word64 +wordSize :: Word64 +wordSize = 2 ^ ((fromIntegral bits)::Word64) + +-- | The length of a list of segments representing a key. Need to be carefull if a Key isn't evenly divisible by bits +pathSize :: Word64 +pathSize = (if (mod 64 wbits)==0 then (div 64 wbits) else (div 64 wbits) + 1) + where wbits = fromIntegral bits :: Word64 + +-- | Break up a Word64 into a Path +getpath :: Word64 -> Path +getpath w64 = loop pathSize w64 [] + where loop :: Word64 -> Word64 -> [Int] -> [Int] + loop 0 _ ans = ans + loop cnt n ans = loop (cnt - 1) (div n wordSize) ((fromIntegral (mod n wordSize)):ans) + +-- ======================================================================== +-- Keys + +data Key = Key {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + deriving (Eq,Ord,Show,NFData,Generic) + +-- | The number of Word64 per key +wordsPerKey :: Int +wordsPerKey = 4 + +-- | The length of a Path for a Key (which might have multiple Word64's inside) +keyPathSize :: Int +keyPathSize = wordsPerKey * (fromIntegral pathSize) + +genKey :: RandomGen b => b -> (Key, b) +genKey g = (Key w0 w1 w2 w3,g4) + where (w0,g1) = genWord64 g + (w1,g2) = genWord64 g1 + (w2,g3) = genWord64 g2 + (w3,g4) = genWord64 g3 + +-- | Break up a Key into a Path +path :: Key -> Path +path (Key w0 w1 w2 w3) = getpath w0 ++ getpath w1 ++ getpath w2 ++ getpath w3 + +-- | A pair of a Key and its equivalent Path +data BitState = BitState Path !Key + +-- Initialize a BitState from a Key +initBitState :: Key -> BitState +initBitState key = BitState (path key) key + +-- | Obtain the Key from a BitState +getBytes :: BitState -> Key +getBytes (BitState _ bs) = bs + +-- | Consume the next segment from a path +nextBits :: String -> BitState -> (Int,BitState) +nextBits _ (BitState (x:xs) key) = (x,BitState xs key) +nextBits message (BitState [] key) = + error ("NextBits out of bounds. The key "++show key++" has run out of bits."++message) + +next2 :: BitState -> Key -> BitState +next2 (BitState ps _) key = (BitState (drop n (path key)) key) + where n = (fromIntegral keyPathSize) - length ps + +showBM :: Bitmap -> String +showBM bm = show(bitmapToList bm) + +bitmapToList :: Bits a => a -> [Int] +bitmapToList bm = loop 63 [] + where loop i ans | i < 0 = ans + loop i ans = if testBit bm i then loop (i-1) (i:ans) else loop (i-1) ans + +instance HeapWords Key where + heapWords (Key _ _ _ _) = 5 + +instance Show BitState where + show (BitState p key) = "(BitState "++show p++" "++show key++")" + + +-- =============================================================== + +data HashMap v + = Empty + | Leaf {-# UNPACK #-} !Key v + | One {-# UNPACK #-} !Int (HashMap v) -- 1 subtree + | Two {-# UNPACK #-} !Bitmap (HashMap v) (HashMap v) -- 2 subtrees + | BitmapIndexed {-# UNPACK #-} !Bitmap -- 3 - (intSize - 1) subtrees + {-# UNPACK #-} !(Small.SmallArray (HashMap v)) + | Full {-# UNPACK #-} !(Small.SmallArray (HashMap v)) -- intSize subtrees + deriving (NFData,Generic) + +heapAdd :: HeapWords a => a -> Int -> Int +heapAdd x ans = heapWords x + ans + +heapPlus:: HeapWords a => Int -> a -> Int +heapPlus ans x = heapWords x + ans + +instance HeapWords t => HeapWords (PA.Array t) where + heapWords arr = foldl' heapPlus (2 + isize arr) arr + +instance HeapWords v => HeapWords (HashMap v) where + heapWords Empty = 1 + heapWords (One _ xs) = 3 + heapWords xs + heapWords (Leaf _ v) = 6 + heapWords v -- Change when Key changes + heapWords (BitmapIndexed _ arr) = foldl' heapPlus 2 arr + heapWords (Full arr) = foldl' heapPlus 1 arr + heapWords (Two _ a b) = 4 + heapWords a + heapWords b + +-- ====================================================================== + + +insert' :: Show v => BitState -> v -> HashMap v -> HashMap v +insert' bs0 v0 m0 = go bs0 v0 m0 + where + go !state !x Empty = Leaf (getBytes state) x + go !state x (One j node) = + case (nextBits "One" state) of + (i,state1) -> + case compare j i of + EQ -> One j (go state1 x node) + LT -> Two (setBit (setBit 0 i) j) node (go state1 x Empty) + GT -> Two (setBit (setBit 0 i) j) (go state1 x Empty) node + go state x t@(Leaf bs1 y) + | getBytes state == bs1 = if x `ptrEq` y then t else (Leaf bs1 x) + | otherwise = makeTwo state t (next2 state bs1) x + go state x t@(BitmapIndexed bmap arr) + | not(testBit bmap tagbits) = + let !arr' = insertAt arr i $! (Leaf (getBytes state) x) + in bitmapIndexedOrFull (bmap .|. m) arr' + | otherwise = + let !st = index arr i + !st' = go state1 x st + in if st' `ptrEq` st + then t + else BitmapIndexed bmap (update arr i st') + where (!tagbits,!state1) = nextBits "BitmapIndexed" state + m = setBit 0 tagbits + i = sparseIndex bmap m + go state x t@(Two bmap x0 x1) + | not(testBit bmap tagbits) = + let !arr' = insertAt (fromlist [x0,x1]) i $! (Leaf (getBytes state) x) + in bitmapIndexedOrFull (bmap .|. m) arr' + | otherwise = + let !st = if i==0 then x0 else x1 -- index arr i + !st' = go state1 x st + in if st' `ptrEq` st + then t + else if i==0 + then Two bmap st' x1 + else Two bmap x0 st' + where (!tagbits,!state1) = nextBits "Two" state + m = setBit 0 tagbits + i = sparseIndex bmap m + go state x t@(Full arr) = + let !st = index arr i + !st' = go state1 x st + in if st' `ptrEq` st + then t + else Full (update arr i st') + where (!tagbits,!state1) = nextBits "Full" state + m = setBit 0 tagbits + i = sparseIndex fullNodeMask m + +makeTwo :: Show v => BitState -> HashMap v -> BitState -> v -> HashMap v +makeTwo state1 leaf1 state2 val2 + | i1==i2 = One i1 (makeTwo state1' leaf1 state2' val2) + | otherwise = -- trace ("MAKETWO (i1,i2)="++show(i1,i2)++"\n state1="++show state1++"\n state2="++show state2) $ + if i1 < i2 + then Two (setBit (setBit 0 i1) i2) (Leaf (getBytes state1') val2) leaf1 + else Two (setBit (setBit 0 i1) i2) leaf1 (Leaf (getBytes state1') val2) + where (i1,state1') = nextBits "makeTwo1" state1 + (i2,state2') = nextBits ("makeTwo2 "++"\n "++show state1++"\n "++show state2) state2 + +insert :: Show v => Key -> v -> HashMap v -> HashMap v +insert bs v hashmap = insert' (initBitState bs) v hashmap + +fromList :: Show v => [(Key,v)] -> HashMap v +fromList ps = foldl' accum Empty ps + where accum ans (k,v) = insert k v ans + + +indexFromStateAndBitmap :: BitState -> Bitmap -> (BitState,Int) +indexFromStateAndBitmap state bmap = (state1,sparseIndex bmap m) + where (!bs,!state1) = nextBits "indexFromStateAndBitmap" state + m = setBit 0 bs + + +lookup' :: PrettyA v => BitState -> HashMap v -> Maybe v +lookup' _ Empty = Nothing +lookup' state (Leaf bs v) = if (getBytes state)==bs then Just v else Nothing +lookup' state (One i x) = if i==j then lookup' state' x else Nothing + where (j,state') = nextBits "ONE in lookup" state +lookup' state (Two bm x0 x1) = if i==0 then lookup' state' x0 else lookup' state' x1 + where (state',i) = indexFromStateAndBitmap state bm +lookup' state (BitmapIndexed bm arr) = lookup' state' (index arr i) + where (state',i) = indexFromStateAndBitmap state bm +lookup' state (Full arr) = lookup' state' (index arr i) + where (state',i) = indexFromStateAndBitmap state fullNodeMask + +lookupHM :: PrettyA v => Key -> HashMap v -> Maybe v +lookupHM bytes mp = lookup' (initBitState bytes) mp + + +-- ========================== + +-- | Check if two the two arguments are the same value. N.B. This +-- function might give false negatives (due to GC moving objects.) +ptrEq :: a -> a -> Bool +ptrEq x y = isTrue# (reallyUnsafePtrEquality# x y ==# 1#) +{-# INLINE ptrEq #-} + +bitsPerSubkey :: Int +bitsPerSubkey = 4 +{-# INLINE bitsPerSubkey #-} + +maxChildren :: Int +maxChildren = 1 `unsafeShiftL` bitsPerSubkey +{-# INLINE maxChildren #-} + +subkeyMask :: Bitmap +subkeyMask = 1 `unsafeShiftL` bitsPerSubkey - 1 +{-# INLINE subkeyMask #-} + +sparseIndex :: Bitmap -> Bitmap -> Int +sparseIndex b m = popCount (b .&. (m - 1)) +{-# INLINE sparseIndex #-} + +-- | Create a 'BitmapIndexed' or 'Full' node. +bitmapIndexedOrFull :: Bitmap -> PArray (HashMap v) -> HashMap v +bitmapIndexedOrFull b ary + | b == fullNodeMask = Full ary + | otherwise = BitmapIndexed b ary +{-# INLINE bitmapIndexedOrFull #-} + +-- | A bitmask with the 'bitsPerSubkey' least significant bits set. +fullNodeMask :: Bitmap +fullNodeMask = complement (complement 0 `unsafeShiftL` maxChildren) +{-# INLINE fullNodeMask #-} + + +-- ======================================================================= +-- Operations to make new arrays out off old ones with small changes + +-- | /O(n)/ Overwrite the element at the given position in this array, +update :: PArray t -> Int -> t -> PArray t +update arr i _t + | i<0 || i >= (isize arr) + = error ("index out of bounds in update "++show i++" not in range (0,"++show (isize arr -1)++")") +update arr i t = fst(withMutArray size1 action) + where size1 = isize arr + action marr = do + mcopy marr 0 arr 0 i + mwrite marr i t + mcopy marr (i+1) arr (i+1) (size1 - (i+1)) + + +-- | /O(n)/ Insert an element at the given position in this array, +-- increasing its size by one. +insertM :: PArray e -> Int -> e -> ST s (PArray e) +insertM ary idx b + | idx < 0 || idx > counter = error ("Bounds check in insertAt "++show idx++" not in range 0.."++show (counter)) + | otherwise = do + mary <- mnew (counter+1) + mcopy mary 0 ary 0 idx + mwrite mary idx b + mcopy mary (idx+1) ary idx (counter-idx) + mfreeze mary + where !counter = isize ary +{-# INLINE insertM #-} + +-- | /O(n)/ Insert an element at the given position in this array, +-- increasing its size by one. +insertAt :: PArray e -> Int -> e -> PArray e +insertAt arr idx b = runST(insertM arr idx b) + + +arrayOf :: Int -> a -> PArray a +arrayOf n a = runST $ do + marr <- mnew n + let loop i + | i < n = mwrite marr i a >> loop (i+1) + | otherwise = pure () + loop 0 + arr <- mfreeze marr + pure arr + +-- ========================================================================= + +makeKeys :: Int -> Int -> [Key] +makeKeys seed cnt = loop (mkStdGen seed) cnt [] + where loop _g i ans | i <= 0 = ans + loop g i ans = case genKey g of + (key,g2) -> loop g2 (i-1) (key : ans) + +testt :: Int -> IO () +testt n = do + let (hmap,output) = tests n + histArr = histo hmap + -- hbytes <- recursiveSize $!! hmap + -- putStrLn ("hbytes = "++show hbytes++"\n"++output) + putStrLn output + putStrLn ("histogram "++show(tolist histArr)) + + +tests :: Int -> (HashMap Int, String) +tests n = (hashmap,unlines + [ "bits per level = "++show bits + , "num levels = "++show keyPathSize + , "empty = "++show empty + , "leaf = "++show leaf + , "one = "++show one + , "two = "++show two + , "bits = "++show bit + , "full = "++show full + , "hwords = "++show hwords + , "mwords = "++show mwords + , "diff = "++show(hwords - mwords)++" %"++show((hwords*100) `div` mwords) + , "depth = "++show (hdepth hashmap) + ]) + where hashmap = fromList (take n pairs) + mapmap = Map.fromList (take n pairs) + (empty,one,two,leaf,bit,full) = count hashmap + hwords = heapWords hashmap + mwords = heapWords mapmap + +count :: HashMap v -> (Int,Int,Int,Stat Int,Stat Int,Int) +count x = go 0 x (0,0,0,mempty,mempty,0) + where go _ Empty (e,o,t,l,b,f) = (e+1,o,t,l,b,f) + go d (One _ y) (e,o,t,l,b,f) = go (1 + d) y (e,1+o,t,l,b,f) + go d (Two _ z y) (e,o,t,l,b,f) = go (1+d) y (go (1+d) z (e,o,1+t,l,b,f)) + go d (Leaf _ _) (e,o,t,l,b,f) = (e,o,t,add d l,b,f) + go d (BitmapIndexed _ arr) (e,o,t,l,b,f) = + foldr (go (length arr + d)) (e,o,t,l,add (length arr) b,f) arr + go d (Full arr) (e,o,t,l,b,f) = foldr (go (length arr + d)) (e,o,t,l,b,f+1) arr + + + +hdepth :: HashMap v -> Int +hdepth Empty = 0 +hdepth (One _ x) = 1 + hdepth x +hdepth (Leaf _ _) = 1 +hdepth (BitmapIndexed _ arr) = 1+ maximum(foldr (\ x ans -> hdepth x : ans) [] arr) +hdepth (Full arr) = 1+ maximum(foldr (\ x ans -> hdepth x : ans) [] arr) +hdepth (Two _ x y) = 1 + max (hdepth x) (hdepth y) + +increment :: (ArrayPair arr marr a, Num a) => marr s a -> Int -> ST s () +increment marr i = do { n <- mindex marr i; mwrite marr i (n+1) } + +histogram :: HashMap v -> PA.MutableArray s Int -> ST s () +histogram Empty _ = pure () +histogram (One _ x) marr = increment marr 1 >> histogram x marr +histogram (Leaf _ _) _ = pure () +histogram (BitmapIndexed _ arr) marr = increment marr (isize arr-1) >> mapM_ (\ x -> histogram x marr) arr +histogram (Full arr) marr = increment marr (intSize-1) >> mapM_ (\ x -> histogram x marr) arr +histogram (Two _ x y) marr = increment marr 2 >> histogram x marr >> histogram y marr + +histo :: HashMap v -> PA.Array Int +histo x = fst(withMutArray intSize process) + where process marr = do { initialize (intSize - 1) ; histogram x marr } + where initialize n | n <0 = pure () + initialize n = mwrite marr n 0 >> initialize (n-1) + + + +bpairs :: [Key] +bpairs = makeKeys 99 1500000 + -- makeKeys 3 15 + +pairs :: [ (Key,Int) ] +pairs = zip bpairs [0..] + +-- =================================================== + +data Stat n = Stat n n (Maybe n) (Maybe n) + +liftM:: (t -> t -> t) -> Maybe t -> Maybe t -> Maybe t +liftM f (Just x) (Just y) = Just(f x y) +liftM _ Nothing (Just y) = Just y +liftM _ (Just x) Nothing = Just x +liftM _ Nothing Nothing = Nothing + +instance (Ord n,Num n) => Semigroup (Stat n) where + (Stat c1 s1 mx1 mn1) <> (Stat c2 s2 mx2 mn2) = + Stat (c1+c2) (s1 + s2) (liftM max mx1 mx2) (liftM min mn1 mn2) + +instance (Ord n,Num n) => Monoid (Stat n) where + mempty = Stat 0 0 Nothing Nothing + +instance (Integral n,Show n) => Show (Stat n) where + show (Stat c s mx mn) = "{count= "++show c++", sum="++show s++", max="++show mx++ + ", min="++show mn++ + (if c==0 then "}" else ", avg="++show(div s c)++"}") + +add :: (Num n,Ord n) => n -> Stat n -> Stat n +add n stat = (Stat 1 n (Just n) (Just n)) <> stat + +-- ==================== +-- Debugging functions + +bug :: Int -> IO (HashMap Int) +bug n = do + let ps = take n pairs -- zip (makeKeys 3 n) [0..] + hh (k@(Key m0 m1 _ _ ),v) = show m0++" "++show m1++" "++show (path k)++" "++show v + putStrLn (unlines (map hh ps)) + + -- putStrLn (show (fromList ps)) + pure (fromList ps) + +try :: [(Key,Int)] -> IO () +try ps = do + let hh (k@(Key m0 m1 _ _),v) = show m0++" "++show m1++" "++show (path k)++" "++show v + putStrLn (unlines (map hh ps)) + putStrLn (show (fromList ps)) + + +testlookup :: Int -> Int -> Bool +testlookup seed n = all ok results + where ps = zip (makeKeys seed n) [0..] + keymap :: HashMap Int + keymap = fromList ps + results = [ (i,lookupHM (fst(ps !! i)) keymap) | i <- [0..(n-1)]] + ok (_,Just _) = True + ok (i,Nothing) = error ("testlookup failure: "++show i++" "++show pair++"\n"++ + show (path (fst pair))++"\n "++show keymap) + where pair = (ps !! i) + +-- ====================================================================================== +-- Helper functions for Pretty Printers + +newtype PrettyAnn = Width Int + +type Ann = [PrettyAnn] + +type PDoc = Doc Ann + +class PrettyA t where + prettyA :: t -> PDoc + +instance PrettyA Int where + prettyA = ppInt + +instance PrettyA Word64 where + prettyA = ppWord64 + +ppWord64 :: Word64 -> Doc a +ppWord64 = viaShow + +ppInt :: Int -> Doc a +ppInt = viaShow + +text :: Text -> Doc ann +text = pretty + +isEmpty :: Doc ann -> Bool +isEmpty Pretty.Empty = True +isEmpty _ = False + +-- | ppSexp x [w,y,z] --> (x w y z) +ppSexp :: Text -> [PDoc] -> PDoc +ppSexp con = ppSexp' (text con) + +ppSexp' :: PDoc -> [PDoc] -> PDoc +ppSexp' con fields = + group $ + flatAlt + (hang 2 (encloseSep lparen rparen space docs)) + (encloseSep lparen rparen space docs) + where + docs = if isEmpty con then fields else con : fields + +-- | Vertical layout with commas aligned on the left hand side +puncLeft :: Doc ann -> [Doc ann] -> Doc ann -> Doc ann -> Doc ann +puncLeft open [] _ close = hsep [open, close] +puncLeft open [x] _ close = hsep [open, x, close] +puncLeft open (x : xs) coma close = align (sep ((open <+> x) : help xs)) + where + help [] = mempty + help [y] = [hsep [coma, y, close]] + help (y : ys) = (coma <+> y) : help ys + +ppList :: (x -> Doc ann) -> [x] -> Doc ann +ppList p xs = + group $ + flatAlt + (puncLeft lbracket (map p xs) comma rbracket) + (encloseSep (lbracket <> space) (space <> rbracket) (comma <> space) (map p xs)) + +-- | x == y +equate :: Doc a -> Doc a -> Doc a +equate x y = group (flatAlt (hang 2 (sep [x <+> text "=", y])) (hsep [x, text "=", y])) + +ppArray :: (Indexable arr a) => (a -> PDoc) -> arr a -> PDoc +ppArray f arr = ppList f (tolist arr) + +-- ==================================== +-- Pretty Printer for HashMap + +ppKey :: Key -> PDoc +ppKey (Key w0 _ _ _) = ppWord64 w0 + +ppBitmap :: Word64 -> PDoc +ppBitmap x = text (pack(showBM x)) + +ppHashMap :: (v -> PDoc) -> HashMap v -> PDoc +ppHashMap p (Leaf k v) = ppSexp "L" [ppKey k,p v] +ppHashMap _ Empty = text "E" +ppHashMap p (One x mp) = ppSexp "O" [ppInt x,ppHashMap p mp] +ppHashMap p (Two x m1 m2) = ppSexp "T" [ppBitmap x ,ppHashMap p m1, ppHashMap p m2] +ppHashMap p (BitmapIndexed x arr) = ppSexp "B" [ppList q (zip (bitmapToList x) (tolist arr))] + where q (i,a) = ppInt i <+> ppHashMap p a +ppHashMap p (Full arr) = ppSexp "F" [ppList q (zip (bitmapToList fullNodeMask) (tolist arr))] + where q (i,a) = ppInt i <+> ppHashMap p a + +instance PrettyA v => Show (HashMap v) where + show x = show(ppHashMap prettyA x) + \ No newline at end of file From 532e4b2c72697f4239b89bdc606c90207344ef56 Mon Sep 17 00:00:00 2001 From: TimSheard Date: Wed, 13 Oct 2021 12:38:44 -0400 Subject: [PATCH 02/19] Added delete and foldWithKey operations --- libs/small-steps/src/Data/Compact/KeyMap.hs | 106 ++++++++++++++++++-- 1 file changed, 100 insertions(+), 6 deletions(-) diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/small-steps/src/Data/Compact/KeyMap.hs index 4825270a285..f4867fb2105 100644 --- a/libs/small-steps/src/Data/Compact/KeyMap.hs +++ b/libs/small-steps/src/Data/Compact/KeyMap.hs @@ -16,7 +16,7 @@ import Data.Foldable (foldl') import Cardano.Prelude (HeapWords (..),Generic,runST,ST) import Data.Word(Word64) import qualified Data.Primitive.Array as PA -import Data.Bits (Bits,(.&.), (.|.), complement, popCount, unsafeShiftL,setBit,testBit) +import Data.Bits (Bits,(.&.), (.|.), complement, popCount, unsafeShiftL,setBit,testBit,clearBit) import Data.Compact.Class import GHC.Exts ((==#), reallyUnsafePtrEquality#, isTrue# ) import qualified Data.Map as Map @@ -165,7 +165,7 @@ instance HeapWords v => HeapWords (HashMap v) where heapWords (Two _ a b) = 4 + heapWords a + heapWords b -- ====================================================================== - +-- Insertion insert' :: Show v => BitState -> v -> HashMap v -> HashMap v insert' bs0 v0 m0 = go bs0 v0 m0 @@ -236,6 +236,90 @@ fromList :: Show v => [(Key,v)] -> HashMap v fromList ps = foldl' accum Empty ps where accum ans (k,v) = insert k v ans +-- ================================================================= +-- Deletion + +-- | Delete the Key encoded in the BitState from the HashMap +delete' :: BitState -> HashMap v -> HashMap v +delete' (BitState [] _) hm = hm -- Removing a bogus key, leaves 'hm' unchanged +delete' _ Empty = Empty +delete' (BitState _ k) (hm@(Leaf k2 _)) = if k==k2 then Empty else hm +delete' (BitState(i:is) k) (hm@(One j x)) = if i==j then oneE j (delete' (BitState is k) x) else hm +delete' (BitState(i:is) k) (hm@(Two bmap x y)) = + if testBit bmap i + then twoE bmap (delete' (BitState is k) x) (delete' (BitState is k) y) + else hm +delete' (BitState(i:is) k) (hm@(BitmapIndexed bmap arr)) = + if testBit bmap i + then let m = setBit 0 i + j = sparseIndex bmap m + result = delete' (BitState is k) (index arr j) + -- Consume an upwards floating Empty by removing that element from the array + in case result of + Empty -> bitmapE (clearBit bmap i) (remove arr j) + _ -> BitmapIndexed bmap (update arr j result) + else hm +delete' (BitState(i:is) k) (Full arr) = + let m = setBit 0 i + j = sparseIndex fullNodeMask m + result = delete' (BitState is k) (index arr j) + -- Consume an upwards floating Empty by removing that element from the array + in case result of + Empty -> BitmapIndexed (clearBit fullNodeMask i) (remove arr j) + _ -> Full(update arr j result) + +delete :: Key -> HashMap v -> HashMap v +delete k hm = delete' (initBitState k) hm + +-- One of the invariants is that no Empty ever appears in any of the other +-- constructors of KeyMap. So we make "smart" constructors that remove Empty +-- if it ever occurrs. This is necessary since 'delete' can turn a subtree +-- into Empty. The strategy is to float 'Empty' up the tree, until it can be +-- 'remove'd from one of the constructors with Array like components (One, Two, BitmapInded, Full). + +-- Float Empty up over One +oneE :: Int -> HashMap v -> HashMap v +oneE _ Empty = Empty +oneE i x = One i x + +-- Float Empty's up over Two +twoE :: Bitmap -> HashMap v -> HashMap v -> HashMap v +twoE _ Empty Empty = Empty +twoE bmap x Empty = oneE (ith bmap 0) x +twoE bmap Empty x = oneE (ith bmap 1) x +twoE bmap x y = Two bmap x y + +-- | Get the 'ith' element from a Bitmap +ith :: Bitmap -> Int -> Int +ith bmap i = (bitmapToList bmap !! i) + +-- Float Empty's up over BitmpIndexed, Note that if the size of the arr +-- becomes 2, then rebuild with Two rather than BitmapIndexed +bitmapE :: Bitmap -> PArray (HashMap v) -> HashMap v +bitmapE bmap arr | isize arr == 2 = twoE bmap (index arr 0) (index arr 1) +bitmapE bmap arr = BitmapIndexed bmap arr + +-- ================================================================ +-- aggregation in ascending order of keys + +foldWithKey :: (ans -> Key -> v -> ans) -> ans -> HashMap v -> ans +foldWithKey _ ans Empty = ans +foldWithKey accum ans (Leaf k v) = accum ans k v +foldWithKey accum ans (One _ x) = foldWithKey accum ans x +foldWithKey accum ans (Two _ x y) = foldWithKey accum (foldWithKey accum ans x) y +foldWithKey accum ans0 (BitmapIndexed _ arr) = loop ans0 0 + where n = isize arr + loop ans i | i >= n = ans + loop ans i = loop (foldWithKey accum ans (index arr i)) (i+1) +foldWithKey accum ans0 (Full arr) = loop ans0 0 + where n = isize arr + loop ans i | i >= n = ans + loop ans i = loop (foldWithKey accum ans (index arr i)) (i+1) + + + +-- ================================================================== +-- Lookup a key indexFromStateAndBitmap :: BitState -> Bitmap -> (BitState,Int) indexFromStateAndBitmap state bmap = (state1,sparseIndex bmap m) @@ -243,7 +327,7 @@ indexFromStateAndBitmap state bmap = (state1,sparseIndex bmap m) m = setBit 0 bs -lookup' :: PrettyA v => BitState -> HashMap v -> Maybe v +lookup' :: BitState -> HashMap v -> Maybe v lookup' _ Empty = Nothing lookup' state (Leaf bs v) = if (getBytes state)==bs then Just v else Nothing lookup' state (One i x) = if i==j then lookup' state' x else Nothing @@ -255,7 +339,7 @@ lookup' state (BitmapIndexed bm arr) = lookup' state' (index arr i) lookup' state (Full arr) = lookup' state' (index arr i) where (state',i) = indexFromStateAndBitmap state fullNodeMask -lookupHM :: PrettyA v => Key -> HashMap v -> Maybe v +lookupHM :: Key -> HashMap v -> Maybe v lookupHM bytes mp = lookup' (initBitState bytes) mp @@ -299,6 +383,16 @@ fullNodeMask = complement (complement 0 `unsafeShiftL` maxChildren) -- ======================================================================= -- Operations to make new arrays out off old ones with small changes + +-- | /O(n)/ Make a copy of an Array that removes the 'i'th element. Decreasing the size by 1. +remove :: ArrayPair arr marr a => arr a -> Int -> arr a +remove arr i = fst(withMutArray n action) + where n = (isize arr) - 1 + action marr = do + mcopy marr 0 arr 0 i + mcopy marr i arr (i+1) (n-i) + + -- | /O(n)/ Overwrite the element at the given position in this array, update :: PArray t -> Int -> t -> PArray t update arr i _t @@ -331,7 +425,7 @@ insertM ary idx b insertAt :: PArray e -> Int -> e -> PArray e insertAt arr idx b = runST(insertM arr idx b) - +-- | Create a new Array of size 'n' filled with objects 'a' arrayOf :: Int -> a -> PArray a arrayOf n a = runST $ do marr <- mnew n @@ -571,4 +665,4 @@ ppHashMap p (Full arr) = ppSexp "F" [ppList q (zip (bitmapToList fullNodeMask) ( instance PrettyA v => Show (HashMap v) where show x = show(ppHashMap prettyA x) - \ No newline at end of file + From 6f2f372ff63b33310e36fa538fdf392213255406 Mon Sep 17 00:00:00 2001 From: TimSheard Date: Wed, 13 Oct 2021 16:21:15 -0400 Subject: [PATCH 03/19] added DomainRestrict, starting splitHashMap. --- libs/small-steps/src/Data/Compact/KeyMap.hs | 77 +++++++++++++++++++-- 1 file changed, 71 insertions(+), 6 deletions(-) diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/small-steps/src/Data/Compact/KeyMap.hs index f4867fb2105..5abcd5fca36 100644 --- a/libs/small-steps/src/Data/Compact/KeyMap.hs +++ b/libs/small-steps/src/Data/Compact/KeyMap.hs @@ -16,7 +16,9 @@ import Data.Foldable (foldl') import Cardano.Prelude (HeapWords (..),Generic,runST,ST) import Data.Word(Word64) import qualified Data.Primitive.Array as PA -import Data.Bits (Bits,(.&.), (.|.), complement, popCount, unsafeShiftL,setBit,testBit,clearBit) +import Data.Bits (Bits,(.&.), (.|.), complement, popCount, + unsafeShiftL, unsafeShiftR, + zeroBits,setBit,testBit,clearBit) import Data.Compact.Class import GHC.Exts ((==#), reallyUnsafePtrEquality#, isTrue# ) import qualified Data.Map as Map @@ -27,6 +29,8 @@ import System.Random(RandomGen,genWord64,mkStdGen) import Prettyprinter import Data.Text(Text,pack) import qualified Prettyprinter.Internal as Pretty +import Data.Set(Set) +import qualified Data.Set as Set -- type PArray = PA.Array type PArray = Small.SmallArray @@ -167,7 +171,7 @@ instance HeapWords v => HeapWords (HashMap v) where -- ====================================================================== -- Insertion -insert' :: Show v => BitState -> v -> HashMap v -> HashMap v +insert' :: BitState -> v -> HashMap v -> HashMap v insert' bs0 v0 m0 = go bs0 v0 m0 where go !state !x Empty = Leaf (getBytes state) x @@ -219,7 +223,7 @@ insert' bs0 v0 m0 = go bs0 v0 m0 m = setBit 0 tagbits i = sparseIndex fullNodeMask m -makeTwo :: Show v => BitState -> HashMap v -> BitState -> v -> HashMap v +makeTwo :: BitState -> HashMap v -> BitState -> v -> HashMap v makeTwo state1 leaf1 state2 val2 | i1==i2 = One i1 (makeTwo state1' leaf1 state2' val2) | otherwise = -- trace ("MAKETWO (i1,i2)="++show(i1,i2)++"\n state1="++show state1++"\n state2="++show state2) $ @@ -229,7 +233,7 @@ makeTwo state1 leaf1 state2 val2 where (i1,state1') = nextBits "makeTwo1" state1 (i2,state2') = nextBits ("makeTwo2 "++"\n "++show state1++"\n "++show state2) state2 -insert :: Show v => Key -> v -> HashMap v -> HashMap v +insert :: Key -> v -> HashMap v -> HashMap v insert bs v hashmap = insert' (initBitState bs) v hashmap fromList :: Show v => [(Key,v)] -> HashMap v @@ -316,8 +320,6 @@ foldWithKey accum ans0 (Full arr) = loop ans0 0 loop ans i | i >= n = ans loop ans i = loop (foldWithKey accum ans (index arr i)) (i+1) - - -- ================================================================== -- Lookup a key @@ -343,6 +345,41 @@ lookupHM :: Key -> HashMap v -> Maybe v lookupHM bytes mp = lookup' (initBitState bytes) mp +splitHashMap :: BitState -> HashMap v -> (HashMap v,Maybe v,HashMap v) +splitHashMap _ Empty = (Empty,Nothing,Empty) +splitHashMap (BitState [] _) x = (x,Nothing,Empty) +splitHashMap (BitState (i:is) k) (One j x) = (One j a,b,One j c) + where (a,b,c) = splitHashMap (BitState is k) x +splitHashMap (BitState (i:is) k) (BitmapIndexed bmap arr) = + let splitpoint = sparseIndex bmap (setBit 0 i) + in case splitBitmap bmap i of + (less,True,greater) -> + let (h1,mv,h2) = splitHashMap (BitState is k) (index arr splitpoint) + (arr1,arr2) = splitArrayAt arr splitpoint h1 h2 + in (BitmapIndexed less arr1,mv,BitmapIndexed greater arr2) + (less,False,greater) -> undefined + +-- splitArrayAt :: PArray a -> Int -> a -> a -> (PArray a, PArray a) + +{- +splitHashMap (BitState (i:is) k) (Two bmap x y) = + case splitBitmap bmap i of + (0,True,greater) -> undefined + (less,True,0) -> undefined + (less,True,greater) -> undefined + (less,False,greater) -> undefined +-} + +domainRestrict :: HashMap v -> Set Key -> HashMap v +domainRestrict hm s = Set.foldl' accum Empty s + where accum ans key = + case lookupHM key hm of + Nothing -> ans + Just v -> insert key v ans + +hmdr = fromList (take 10 pairs) +set = Set.fromList [ bpairs !! 3, bpairs !! 8, bpairs !! 20] + -- ========================== -- | Check if two the two arguments are the same value. N.B. This @@ -379,6 +416,18 @@ fullNodeMask :: Bitmap fullNodeMask = complement (complement 0 `unsafeShiftL` maxChildren) {-# INLINE fullNodeMask #-} +-- | A Bitmap represents a set. Split it into 3 parts (set1,present,set2) +-- where 'set1' is all elements in 'bm' less than 'i' +-- 'present' is if 'i' is in the set 'bm' +-- 'set2' is all elements in 'bm' greater than 'i' +splitBitmap :: Bitmap -> Int -> (Bitmap,Bool,Bitmap) +splitBitmap bm i = (unsafeShiftR (unsafeShiftL bm (64-i)) (64-i) + ,testBit bm i + ,unsafeShiftL (unsafeShiftR bm i) i) + +setBits :: [Int] -> Bitmap +setBits xs = foldl' setBit 0 xs + -- ======================================================================= -- Operations to make new arrays out off old ones with small changes @@ -436,6 +485,22 @@ arrayOf n a = runST $ do arr <- mfreeze marr pure arr + +-- | Split an array into 2 partial copies, where a1 appears at the last index +-- of the first copy and 'a2' appears at the 0th index of the second. +-- splitArrAt (fromlist [0,1,2,3,4,5,6]) 4 44 45 +-- (fromlist [0,1,2,3,44],fromlist [45,5,6]) +splitArrayAt :: PArray a -> Int -> a -> a -> (PArray a, PArray a) +splitArrayAt arr i a1 a2 = project (with2MutArray size1 size2 action) + where project (arr1,arr2, _state) = (arr1,arr2) + size1 = i + 1 + size2 = isize arr - i + action marr1 marr2 = do + mcopy marr1 0 arr 0 i + mwrite marr1 i a1 + mwrite marr2 0 a2 + mcopy marr2 1 arr (i+1) (size2 - 1) + -- ========================================================================= makeKeys :: Int -> Int -> [Key] From 864087d1ec151d1291b0cfb62dc73b2abf380a17 Mon Sep 17 00:00:00 2001 From: TimSheard Date: Thu, 14 Oct 2021 14:10:08 -0400 Subject: [PATCH 04/19] added splitHashMap and intersect. --- libs/small-steps/src/Data/Compact/KeyMap.hs | 213 ++++++++++++++++---- 1 file changed, 176 insertions(+), 37 deletions(-) diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/small-steps/src/Data/Compact/KeyMap.hs index 5abcd5fca36..9e1da34d611 100644 --- a/libs/small-steps/src/Data/Compact/KeyMap.hs +++ b/libs/small-steps/src/Data/Compact/KeyMap.hs @@ -10,15 +10,21 @@ module Data.Compact.KeyMap where - - import Data.Foldable (foldl') import Cardano.Prelude (HeapWords (..),Generic,runST,ST) import Data.Word(Word64) import qualified Data.Primitive.Array as PA -import Data.Bits (Bits,(.&.), (.|.), complement, popCount, - unsafeShiftL, unsafeShiftR, - zeroBits,setBit,testBit,clearBit) +import Data.Bits + ( Bits,(.&.), + (.|.), + complement, + popCount, + unsafeShiftL, + unsafeShiftR, + setBit, + testBit, + clearBit, + ) import Data.Compact.Class import GHC.Exts ((==#), reallyUnsafePtrEquality#, isTrue# ) import qualified Data.Map as Map @@ -31,6 +37,7 @@ import Data.Text(Text,pack) import qualified Prettyprinter.Internal as Pretty import Data.Set(Set) import qualified Data.Set as Set +-- import Debug.Trace -- type PArray = PA.Array type PArray = Small.SmallArray @@ -228,15 +235,15 @@ makeTwo state1 leaf1 state2 val2 | i1==i2 = One i1 (makeTwo state1' leaf1 state2' val2) | otherwise = -- trace ("MAKETWO (i1,i2)="++show(i1,i2)++"\n state1="++show state1++"\n state2="++show state2) $ if i1 < i2 - then Two (setBit (setBit 0 i1) i2) (Leaf (getBytes state1') val2) leaf1 - else Two (setBit (setBit 0 i1) i2) leaf1 (Leaf (getBytes state1') val2) + then Two (setBits [i1,i2]) (Leaf (getBytes state1') val2) leaf1 + else Two (setBits [i1,i2]) leaf1 (Leaf (getBytes state1') val2) where (i1,state1') = nextBits "makeTwo1" state1 (i2,state2') = nextBits ("makeTwo2 "++"\n "++show state1++"\n "++show state2) state2 insert :: Key -> v -> HashMap v -> HashMap v insert bs v hashmap = insert' (initBitState bs) v hashmap -fromList :: Show v => [(Key,v)] -> HashMap v +fromList :: [(Key,v)] -> HashMap v fromList ps = foldl' accum Empty ps where accum ans (k,v) = insert k v ans @@ -297,11 +304,16 @@ twoE bmap x y = Two bmap x y ith :: Bitmap -> Int -> Int ith bmap i = (bitmapToList bmap !! i) + +-- | The first (smallest) Segment in a BitMap +firstSeg :: Bitmap -> Segment +firstSeg bmap = head(bitmapToList bmap) + -- Float Empty's up over BitmpIndexed, Note that if the size of the arr -- becomes 2, then rebuild with Two rather than BitmapIndexed bitmapE :: Bitmap -> PArray (HashMap v) -> HashMap v bitmapE bmap arr | isize arr == 2 = twoE bmap (index arr 0) (index arr 1) -bitmapE bmap arr = BitmapIndexed bmap arr +bitmapE bmap arr = bitmapIndexedOrFull bmap arr -- ================================================================ -- aggregation in ascending order of keys @@ -345,31 +357,143 @@ lookupHM :: Key -> HashMap v -> Maybe v lookupHM bytes mp = lookup' (initBitState bytes) mp -splitHashMap :: BitState -> HashMap v -> (HashMap v,Maybe v,HashMap v) -splitHashMap _ Empty = (Empty,Nothing,Empty) -splitHashMap (BitState [] _) x = (x,Nothing,Empty) -splitHashMap (BitState (i:is) k) (One j x) = (One j a,b,One j c) - where (a,b,c) = splitHashMap (BitState is k) x -splitHashMap (BitState (i:is) k) (BitmapIndexed bmap arr) = - let splitpoint = sparseIndex bmap (setBit 0 i) - in case splitBitmap bmap i of - (less,True,greater) -> - let (h1,mv,h2) = splitHashMap (BitState is k) (index arr splitpoint) - (arr1,arr2) = splitArrayAt arr splitpoint h1 h2 - in (BitmapIndexed less arr1,mv,BitmapIndexed greater arr2) - (less,False,greater) -> undefined - --- splitArrayAt :: PArray a -> Int -> a -> a -> (PArray a, PArray a) - -{- -splitHashMap (BitState (i:is) k) (Two bmap x y) = - case splitBitmap bmap i of - (0,True,greater) -> undefined - (less,True,0) -> undefined - (less,True,greater) -> undefined - (less,False,greater) -> undefined --} - +-- ========================================================== +-- Split a HashMap into 3 parts + +-- | return (smaller than 'key', has key?, greater than 'key') +splitHashMap:: BitState -> HashMap v -> (HashMap v,Maybe v,HashMap v) +splitHashMap (BitState [] _) hm = (hm,Nothing,Empty) +splitHashMap (BitState (i:is) key) hm = + case splitBySegment i hm of + (less,x,greater) -> + case x of + Empty -> (build less,Nothing,build greater) + (Leaf k v) -> (build less,if key==k then (Just v) else Nothing,build greater) + other -> (reconstruct i less less1,ans,reconstruct i greater greater1) + where (less1,ans,greater1) = splitHashMap (BitState is key) other + +splitBySegment :: Segment -> HashMap v -> ([(Segment,HashMap v)],HashMap v, [(Segment,HashMap v)]) +splitBySegment i _x | i < 0 = ([],Empty,[]) +splitBySegment i _x | i > intSize = ([],Empty,[]) +splitBySegment _ Empty = ([],Empty,[]) +splitBySegment _ (x@(Leaf _ _)) = ([],x,[]) +splitBySegment i (x@(One j y)) = + case compare i j of + LT -> ([],Empty,[(i,x)]) + EQ -> ([],y,[]) + GT -> ([(i,x)],Empty,[]) +splitBySegment i (Two bmap l h) = splitArrAtSeg i bmap (fromlist [l,h]) +splitBySegment i (BitmapIndexed bmap arr) = splitArrAtSeg i bmap arr +splitBySegment i (Full arr) = splitArrAtSeg i fullNodeMask arr + +-- | Split an PArray at a particular Segment. +splitArrAtSeg:: Segment -> Bitmap -> PArray (HashMap v) -> ([(Int, HashMap v)], HashMap v, [(Int, HashMap v)]) +splitArrAtSeg i bmap arr = (takeWhile smaller ps, match, dropWhile tooSmall ps) + where ps = zip (bitmapToList bmap) (tolist arr) + smaller (j,_) = j < i + tooSmall (j,_) = j <= i + same (j,_) = i==j + match = case filter same ps of + [] -> Empty + ((_,x):_) -> x + +-- | reconstruct a HashMap from list of previous Segments, and a single HashMap from the next Segment +reconstruct :: Segment -> [(Segment, HashMap v)] -> HashMap v -> HashMap v +reconstruct _ xs Empty = build xs +reconstruct seg xs x = build (insertAscending (seg,x) xs) + +-- | insert a Segment pair in ascending order of Segments, Keep it sorted. +insertAscending:: (Segment, HashMap v) -> [(Segment, HashMap v)] -> [(Segment, HashMap v)] +insertAscending (i,x) [] = [(i,x)] +insertAscending (i,x) (ws@((y@(j,_)):ys)) = + case compare i j of + LT -> (i,x):ws + GT -> y : insertAscending (i,x) ys + EQ -> (i,x):ys -- We know that the Segement i should never appear in the list + +-- | Build a HashMap out of a list of Segment pairs. +build :: [(Segment, HashMap v)] -> HashMap v +build [] = Empty +build [(_,x)] = x +build [(j,x),(k,y)] = Two (setBits [j,k]) x y +build ps = bitmapIndexedOrFull (setBits (map fst ps)) (fromlist (map snd ps)) + + +testSplit2 :: Int -> IO () +testSplit2 i = putStrLn (unlines [show hm, " ",show pathx," ",show a, " ",show b, " ",show c]) + where keys = makeKeys 99 1000 + ps = zip (take 12 keys) [0..] + hm :: HashMap Int + hm = fromList ps + state@(BitState pathx _) = (initBitState (keys !! i)) + (a,b,c) = splitHashMap state hm + + +-- =========================================================== +-- Maximum and Minimum Key + +-- | Get the smallest key, NOT the smallest value +getMin :: HashMap v -> Maybe (Key,v) +getMin Empty = Nothing +getMin (Leaf k v) = Just (k,v) +getMin (One _ x) = getMin x +getMin (Two _ x _) = getMin x +getMin (BitmapIndexed _ arr) = getMin (index arr 0) +getMin (Full arr) = getMin (index arr 0) + +-- | Get the largest key, NOT the largest value +getMax :: HashMap v -> Maybe (Key,v) +getMax Empty = Nothing +getMax (Leaf k v) = Just (k,v) +getMax (One _ x) = getMax x +getMax (Two _ _ y) = getMax y +getMax (BitmapIndexed _ arr) = getMax (index arr (isize arr - 1)) +getMax (Full arr) = getMax (index arr (isize arr - 1)) + +-- ================================================== + +-- | The (key,value) pairs (subset) of 'h1' where key is in the domain of both 'h1' and 'h2' +intersect :: HashMap v -> HashMap v -> HashMap v +intersect map1 map2 = + case next map1 map2 of + Nothing -> Empty + Just k -> leapfrog k map1 map2 Empty + +leapfrog :: Key -> HashMap v -> HashMap v -> HashMap v -> HashMap v +leapfrog k x y ans = + case (lub k x,lub k y) of + (Just(k1,v1,h1),Just(k2,_,h2)) -> + case next h1 h2 of + Just k3 -> leapfrog k3 h1 h2 (if k1==k2 then insert k1 v1 ans else ans) + Nothing -> (if k1==k2 then insert k1 v1 ans else ans) + _ -> ans + +-- | Find the smallest key <= 'key', and a HashMap of everything bigger than 'key' +lub :: Key -> HashMap v -> Maybe (Key, v, HashMap v) +lub key hm = + case splitHashMap (initBitState key) hm of + (_,Just _,Empty) -> Nothing + (_,Just v,hm2) -> Just(key,v,hm2) + (_,Nothing,hm1) -> + case getMin hm1 of + Just (k,v) -> Just(k,v,hm1) + Nothing -> Nothing + +next :: HashMap v1 -> HashMap v2 -> Maybe Key +next x y = case (getMin x,getMin y) of + (Just (k1,_),Just (k2,_)) -> Just(max k1 k2) + _ -> Nothing + +testIntersect :: HashMap Int +testIntersect = intersect h1x h2x + +h1x, h2x :: HashMap Int +h1x = fromList [pairs !! 3,pairs !! 5, pairs !! 11, pairs !! 6, pairs !! 4] +h2x = fromList [pairs !! 3,pairs !! 7, pairs !! 4, pairs !! 6, pairs !! 8] +-- ========================================================= + +-- | Domain restrict 'hkm' to those Keys found in 's'. This algorithm +-- assumes the set 's' is small compared to 'hm'. domainRestrict :: HashMap v -> Set Key -> HashMap v domainRestrict hm s = Set.foldl' accum Empty s where accum ans key = @@ -377,10 +501,14 @@ domainRestrict hm s = Set.foldl' accum Empty s Nothing -> ans Just v -> insert key v ans +hmdr :: HashMap Int hmdr = fromList (take 10 pairs) + +set:: Set Key set = Set.fromList [ bpairs !! 3, bpairs !! 8, bpairs !! 20] --- ========================== +-- ========================================== +-- Operations on Bits and Bitmaps -- | Check if two the two arguments are the same value. N.B. This -- function might give false negatives (due to GC moving objects.) @@ -421,9 +549,10 @@ fullNodeMask = complement (complement 0 `unsafeShiftL` maxChildren) -- 'present' is if 'i' is in the set 'bm' -- 'set2' is all elements in 'bm' greater than 'i' splitBitmap :: Bitmap -> Int -> (Bitmap,Bool,Bitmap) +splitBitmap bm 0 = (0,testBit bm 0, clearBit bm 0) splitBitmap bm i = (unsafeShiftR (unsafeShiftL bm (64-i)) (64-i) ,testBit bm i - ,unsafeShiftL (unsafeShiftR bm i) i) + ,unsafeShiftL (unsafeShiftR bm (i+1)) (i+1)) setBits :: [Int] -> Bitmap setBits xs = foldl' setBit 0 xs @@ -432,7 +561,6 @@ setBits xs = foldl' setBit 0 xs -- ======================================================================= -- Operations to make new arrays out off old ones with small changes - -- | /O(n)/ Make a copy of an Array that removes the 'i'th element. Decreasing the size by 1. remove :: ArrayPair arr marr a => arr a -> Int -> arr a remove arr i = fst(withMutArray n action) @@ -473,6 +601,7 @@ insertM ary idx b -- increasing its size by one. insertAt :: PArray e -> Int -> e -> PArray e insertAt arr idx b = runST(insertM arr idx b) +{-# INLINE insertAt #-} -- | Create a new Array of size 'n' filled with objects 'a' arrayOf :: Int -> a -> PArray a @@ -484,6 +613,15 @@ arrayOf n a = runST $ do loop 0 arr <- mfreeze marr pure arr +{-# INLINE arrayOf #-} + +-- | Extract a slice from an array +subarray :: ArrayPair arr2 marr a => Int -> Int -> arr2 a -> arr2 a +subarray 0 hi arr | hi == (isize arr -1) = arr +subarray lo hi arr = fst(withMutArray size action) + where size = max (hi - lo + 1) 0 + action marr = mcopy marr 0 arr lo size +{-# INLINE subarray #-} -- | Split an array into 2 partial copies, where a1 appears at the last index @@ -730,4 +868,5 @@ ppHashMap p (Full arr) = ppSexp "F" [ppList q (zip (bitmapToList fullNodeMask) ( instance PrettyA v => Show (HashMap v) where show x = show(ppHashMap prettyA x) + showList xs x = unlines (map (\ y -> "\n"++ show(ppHashMap prettyA y)) xs) ++ x From 841ebfdbd771357874521519a0076d5322c5c0b1 Mon Sep 17 00:00:00 2001 From: TimSheard Date: Thu, 14 Oct 2021 15:40:05 -0400 Subject: [PATCH 05/19] cleaned up, got rid of nextBits. --- libs/small-steps/src/Data/Compact/KeyMap.hs | 100 +++++++++----------- 1 file changed, 43 insertions(+), 57 deletions(-) diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/small-steps/src/Data/Compact/KeyMap.hs index 9e1da34d611..999b06df07d 100644 --- a/libs/small-steps/src/Data/Compact/KeyMap.hs +++ b/libs/small-steps/src/Data/Compact/KeyMap.hs @@ -121,12 +121,8 @@ initBitState key = BitState (path key) key getBytes :: BitState -> Key getBytes (BitState _ bs) = bs --- | Consume the next segment from a path -nextBits :: String -> BitState -> (Int,BitState) -nextBits _ (BitState (x:xs) key) = (x,BitState xs key) -nextBits message (BitState [] key) = - error ("NextBits out of bounds. The key "++show key++" has run out of bits."++message) - +-- | Make a new BitState from a Key, using the old BitState to figure out +-- how far down the path have we already gone. next2 :: BitState -> Key -> BitState next2 (BitState ps _) key = (BitState (drop n (path key)) key) where n = (fromIntegral keyPathSize) - length ps @@ -178,67 +174,63 @@ instance HeapWords v => HeapWords (HashMap v) where -- ====================================================================== -- Insertion +indexFromSegment :: Bitmap -> Int -> Int +indexFromSegment bmap j = sparseIndex bmap (setBit 0 j) + insert' :: BitState -> v -> HashMap v -> HashMap v insert' bs0 v0 m0 = go bs0 v0 m0 where go !state !x Empty = Leaf (getBytes state) x - go !state x (One j node) = - case (nextBits "One" state) of - (i,state1) -> - case compare j i of - EQ -> One j (go state1 x node) - LT -> Two (setBit (setBit 0 i) j) node (go state1 x Empty) - GT -> Two (setBit (setBit 0 i) j) (go state1 x Empty) node + go (BitState [] k) _ _ = error ("In insert', ran out of bits for key "++show k) + go (BitState (i:is) k) x (One j node) = + case compare j i of + EQ -> One j (go (BitState is k) x node) + LT -> Two (setBits [i,j]) node (go (BitState is k) x Empty) + GT -> Two (setBits [i,j]) (go (BitState is k) x Empty) node go state x t@(Leaf bs1 y) | getBytes state == bs1 = if x `ptrEq` y then t else (Leaf bs1 x) | otherwise = makeTwo state t (next2 state bs1) x - go state x t@(BitmapIndexed bmap arr) - | not(testBit bmap tagbits) = - let !arr' = insertAt arr i $! (Leaf (getBytes state) x) - in bitmapIndexedOrFull (bmap .|. m) arr' + go (BitState (j:js) k) x t@(BitmapIndexed bmap arr) + | not(testBit bmap j) = + let !arr' = insertAt arr i $! (Leaf k x) + in bitmapIndexedOrFull (bmap .|. (setBit 0 j)) arr' | otherwise = let !st = index arr i - !st' = go state1 x st + !st' = go (BitState js k) x st in if st' `ptrEq` st then t else BitmapIndexed bmap (update arr i st') - where (!tagbits,!state1) = nextBits "BitmapIndexed" state - m = setBit 0 tagbits - i = sparseIndex bmap m - go state x t@(Two bmap x0 x1) - | not(testBit bmap tagbits) = - let !arr' = insertAt (fromlist [x0,x1]) i $! (Leaf (getBytes state) x) - in bitmapIndexedOrFull (bmap .|. m) arr' + where i = indexFromSegment bmap j + go (BitState (j:js) k) x t@(Two bmap x0 x1) + | not(testBit bmap j) = + let !arr' = insertAt (fromlist [x0,x1]) i $! (Leaf k x) + in bitmapIndexedOrFull (bmap .|. (setBit 0 j)) arr' | otherwise = - let !st = if i==0 then x0 else x1 -- index arr i - !st' = go state1 x st + let !st = if i==0 then x0 else x1 + !st' = go (BitState js k) x st in if st' `ptrEq` st then t else if i==0 then Two bmap st' x1 else Two bmap x0 st' - where (!tagbits,!state1) = nextBits "Two" state - m = setBit 0 tagbits - i = sparseIndex bmap m - go state x t@(Full arr) = + where i = indexFromSegment bmap j + go (BitState (j:js) k) x t@(Full arr) = let !st = index arr i - !st' = go state1 x st + !st' = go (BitState js k) x st in if st' `ptrEq` st then t else Full (update arr i st') - where (!tagbits,!state1) = nextBits "Full" state - m = setBit 0 tagbits - i = sparseIndex fullNodeMask m + where i = indexFromSegment fullNodeMask j makeTwo :: BitState -> HashMap v -> BitState -> v -> HashMap v -makeTwo state1 leaf1 state2 val2 - | i1==i2 = One i1 (makeTwo state1' leaf1 state2' val2) - | otherwise = -- trace ("MAKETWO (i1,i2)="++show(i1,i2)++"\n state1="++show state1++"\n state2="++show state2) $ - if i1 < i2 - then Two (setBits [i1,i2]) (Leaf (getBytes state1') val2) leaf1 - else Two (setBits [i1,i2]) leaf1 (Leaf (getBytes state1') val2) - where (i1,state1') = nextBits "makeTwo1" state1 - (i2,state2') = nextBits ("makeTwo2 "++"\n "++show state1++"\n "++show state2) state2 +makeTwo (BitState [] k) _leaf _state _val = error ("Case 1. In makeTwo, out of bits for key "++show k) +makeTwo _state _leaf (BitState [] k) _val = error ("Case 2. In makeTwo, out of bits for key "++show k) +makeTwo (BitState (i:is) k1) leaf1 (BitState (j:js) k2) val2 + | i==j = One i (makeTwo (BitState is k1) leaf1 (BitState js k2) val2) + | otherwise = if i < j + then Two (setBits [i,j]) (Leaf k1 val2) leaf1 + else Two (setBits [i,j]) leaf1 (Leaf k1 val2) + insert :: Key -> v -> HashMap v -> HashMap v insert bs v hashmap = insert' (initBitState bs) v hashmap @@ -335,23 +327,17 @@ foldWithKey accum ans0 (Full arr) = loop ans0 0 -- ================================================================== -- Lookup a key -indexFromStateAndBitmap :: BitState -> Bitmap -> (BitState,Int) -indexFromStateAndBitmap state bmap = (state1,sparseIndex bmap m) - where (!bs,!state1) = nextBits "indexFromStateAndBitmap" state - m = setBit 0 bs - - lookup' :: BitState -> HashMap v -> Maybe v lookup' _ Empty = Nothing lookup' state (Leaf bs v) = if (getBytes state)==bs then Just v else Nothing -lookup' state (One i x) = if i==j then lookup' state' x else Nothing - where (j,state') = nextBits "ONE in lookup" state -lookup' state (Two bm x0 x1) = if i==0 then lookup' state' x0 else lookup' state' x1 - where (state',i) = indexFromStateAndBitmap state bm -lookup' state (BitmapIndexed bm arr) = lookup' state' (index arr i) - where (state',i) = indexFromStateAndBitmap state bm -lookup' state (Full arr) = lookup' state' (index arr i) - where (state',i) = indexFromStateAndBitmap state fullNodeMask +lookup' (BitState [] k) _ = error ("lookup', out of bits for key "++show k) +lookup' (BitState (j:js) k) (One i x) = if i==j then lookup' (BitState js k) x else Nothing +lookup' (BitState (j:js) k) (Two bm x0 x1) = if i==0 then lookup' (BitState js k) x0 else lookup' (BitState js k) x1 + where i = indexFromSegment bm j +lookup' (BitState (j:js) k) (BitmapIndexed bm arr) = lookup' (BitState js k) (index arr i) + where i = indexFromSegment bm j +lookup' (BitState (j:js) k) (Full arr) = lookup' (BitState js k) (index arr i) + where i = indexFromSegment fullNodeMask j lookupHM :: Key -> HashMap v -> Maybe v lookupHM bytes mp = lookup' (initBitState bytes) mp From a757041bffed7eaed68fd663229f6822b7d6bfca Mon Sep 17 00:00:00 2001 From: TimSheard Date: Fri, 15 Oct 2021 12:00:17 -0400 Subject: [PATCH 06/19] Cleaned up added the HashMap module, same names ad Data.Map.Strict. --- libs/small-steps/small-steps.cabal | 1 + libs/small-steps/src/Data/Compact/Class.hs | 9 + libs/small-steps/src/Data/Compact/HashMap.hs | 54 ++++ libs/small-steps/src/Data/Compact/KeyMap.hs | 287 ++++++++++--------- 4 files changed, 222 insertions(+), 129 deletions(-) create mode 100644 libs/small-steps/src/Data/Compact/HashMap.hs diff --git a/libs/small-steps/small-steps.cabal b/libs/small-steps/small-steps.cabal index 74839df2334..bd10ca23c2f 100644 --- a/libs/small-steps/small-steps.cabal +++ b/libs/small-steps/small-steps.cabal @@ -47,6 +47,7 @@ library , Data.Coders , Data.Compact.Class , Data.Compact.KeyMap + , Data.Compact.HashMap , Data.Pulse , Control.Provenance , Control.Iterate.SetAlgebra diff --git a/libs/small-steps/src/Data/Compact/Class.hs b/libs/small-steps/src/Data/Compact/Class.hs index 5a38c6be3bd..6c02038056b 100644 --- a/libs/small-steps/src/Data/Compact/Class.hs +++ b/libs/small-steps/src/Data/Compact/Class.hs @@ -209,6 +209,15 @@ mToList first marr = loop first [] loop lo xs | lo > hi = pure(reverse xs) loop lo xs = do {x <- mindex marr lo; loop (lo+1) (x:xs)} + +-- | Extract a slice from an array +slice :: ArrayPair arr2 marr a => Int -> Int -> arr2 a -> arr2 a +slice 0 hi arr | hi == (isize arr -1) = arr +slice lo hi arr = fst(withMutArray size action) + where size = max (hi - lo + 1) 0 + action marr = mcopy marr 0 arr lo size +{-# INLINE slice #-} + -- ================================================================ -- Functions for using mutable initialization in a safe manner. -- Using these functions is the safe way to use the method 'mfreeze' diff --git a/libs/small-steps/src/Data/Compact/HashMap.hs b/libs/small-steps/src/Data/Compact/HashMap.hs new file mode 100644 index 00000000000..d39a89280c5 --- /dev/null +++ b/libs/small-steps/src/Data/Compact/HashMap.hs @@ -0,0 +1,54 @@ +{-# LANGUAGE GADTs #-} + +module Data.Compact.HashMap where + +import qualified Data.Compact.KeyMap as KM +import Data.Compact.KeyMap(Key,KeyMap) +import Data.Set(Set) +import qualified Data.Set as Set + +-- ========================================================================== + +class Keyed t where + toKey :: t -> Key + fromKey :: Key -> t + +data HashMap k v where + HashMap :: Keyed k => KeyMap v -> HashMap k v + +lookup :: k -> HashMap k v -> Maybe v +lookup k (HashMap m) = KM.lookupHM (toKey k) m + +insert :: k -> v -> HashMap k v -> HashMap k v +insert k v (HashMap m) = HashMap(KM.insert (toKey k) v m) + +insertWithKey :: (k -> v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v +insertWithKey combine key v (HashMap m) = HashMap(KM.insertWithKey comb state v m) + where comb k v1 v2 = combine (fromKey k) v1 v2 + state = (KM.initBitState (toKey key)) + +restrictKeys :: HashMap k v -> Set k -> HashMap k v +restrictKeys (HashMap m) set = HashMap(KM.domainRestrict m (Set.map toKey set)) + +splitLookup:: k -> HashMap k a -> (HashMap k a, Maybe a, HashMap k a) +splitLookup k (HashMap m) = (HashMap a,b,HashMap c) + where (a,b,c) = KM.splitKeyMap (KM.initBitState (toKey k)) m + +intersection:: HashMap k v -> HashMap k v -> HashMap k v +intersection (HashMap m1) (HashMap m2) = HashMap(KM.intersect m1 m2) + +foldlWithKey' :: (ans -> k -> v -> ans) -> ans -> HashMap k v -> ans +foldlWithKey' accum a (HashMap m) = KM.foldWithKey accum2 a m + where accum2 ans k v = accum ans (fromKey k) v + +size :: HashMap k v -> Int +size (HashMap m) = KM.sizeKeyMap m + +fromList :: Keyed k => [(k, v)] -> HashMap k v +fromList xs = HashMap(KM.fromList (map (\ (k,v) -> (toKey k,v)) xs)) + +toList :: HashMap k v -> [(k, v)] +toList (HashMap m) = KM.foldWithKey (\ ans k v -> (fromKey k,v):ans) [] m + +mapWithKey :: (k -> v -> u) -> HashMap k v -> HashMap k u +mapWithKey f (HashMap m) = HashMap (KM.mapWithKey (\ key v -> f (fromKey key) v) m) \ No newline at end of file diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/small-steps/src/Data/Compact/KeyMap.hs index 999b06df07d..8d97dc98a48 100644 --- a/libs/small-steps/src/Data/Compact/KeyMap.hs +++ b/libs/small-steps/src/Data/Compact/KeyMap.hs @@ -5,6 +5,7 @@ {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE OverloadedStrings #-} + {-# OPTIONS_GHC -Wno-orphans #-} @@ -20,10 +21,10 @@ import Data.Bits complement, popCount, unsafeShiftL, - unsafeShiftR, setBit, testBit, clearBit, + zeroBits, ) import Data.Compact.Class import GHC.Exts ((==#), reallyUnsafePtrEquality#, isTrue# ) @@ -144,14 +145,14 @@ instance Show BitState where -- =============================================================== -data HashMap v +data KeyMap v = Empty | Leaf {-# UNPACK #-} !Key v - | One {-# UNPACK #-} !Int (HashMap v) -- 1 subtree - | Two {-# UNPACK #-} !Bitmap (HashMap v) (HashMap v) -- 2 subtrees + | One {-# UNPACK #-} !Int (KeyMap v) -- 1 subtree + | Two {-# UNPACK #-} !Bitmap (KeyMap v) (KeyMap v) -- 2 subtrees | BitmapIndexed {-# UNPACK #-} !Bitmap -- 3 - (intSize - 1) subtrees - {-# UNPACK #-} !(Small.SmallArray (HashMap v)) - | Full {-# UNPACK #-} !(Small.SmallArray (HashMap v)) -- intSize subtrees + {-# UNPACK #-} !(Small.SmallArray (KeyMap v)) + | Full {-# UNPACK #-} !(Small.SmallArray (KeyMap v)) -- intSize subtrees deriving (NFData,Generic) heapAdd :: HeapWords a => a -> Int -> Int @@ -163,7 +164,7 @@ heapPlus ans x = heapWords x + ans instance HeapWords t => HeapWords (PA.Array t) where heapWords arr = foldl' heapPlus (2 + isize arr) arr -instance HeapWords v => HeapWords (HashMap v) where +instance HeapWords v => HeapWords (KeyMap v) where heapWords Empty = 1 heapWords (One _ xs) = 3 + heapWords xs heapWords (Leaf _ v) = 6 + heapWords v -- Change when Key changes @@ -177,19 +178,24 @@ instance HeapWords v => HeapWords (HashMap v) where indexFromSegment :: Bitmap -> Int -> Int indexFromSegment bmap j = sparseIndex bmap (setBit 0 j) -insert' :: BitState -> v -> HashMap v -> HashMap v -insert' bs0 v0 m0 = go bs0 v0 m0 +insert' :: BitState -> v -> KeyMap v -> KeyMap v +insert' state v m = insertWithKey (\ _k new _old -> new) state v m + +insertWithKey :: (Key -> v -> v -> v) -> BitState -> v -> KeyMap v -> KeyMap v +insertWithKey combine bs0 v0 m0 = go bs0 v0 m0 where - go !state !x Empty = Leaf (getBytes state) x + go (BitState _ k) !x Empty = Leaf k x go (BitState [] k) _ _ = error ("In insert', ran out of bits for key "++show k) go (BitState (i:is) k) x (One j node) = case compare j i of EQ -> One j (go (BitState is k) x node) LT -> Two (setBits [i,j]) node (go (BitState is k) x Empty) GT -> Two (setBits [i,j]) (go (BitState is k) x Empty) node - go state x t@(Leaf bs1 y) - | getBytes state == bs1 = if x `ptrEq` y then t else (Leaf bs1 x) - | otherwise = makeTwo state t (next2 state bs1) x + go (state@(BitState _ k1)) x t@(Leaf k2 y) + | k1 == k2 = if x `ptrEq` y + then t + else (Leaf k2 (combine k2 x y)) + | otherwise = makeTwo state t (next2 state k2) x go (BitState (j:js) k) x t@(BitmapIndexed bmap arr) | not(testBit bmap j) = let !arr' = insertAt arr i $! (Leaf k x) @@ -222,7 +228,7 @@ insert' bs0 v0 m0 = go bs0 v0 m0 else Full (update arr i st') where i = indexFromSegment fullNodeMask j -makeTwo :: BitState -> HashMap v -> BitState -> v -> HashMap v +makeTwo :: BitState -> KeyMap v -> BitState -> v -> KeyMap v makeTwo (BitState [] k) _leaf _state _val = error ("Case 1. In makeTwo, out of bits for key "++show k) makeTwo _state _leaf (BitState [] k) _val = error ("Case 2. In makeTwo, out of bits for key "++show k) makeTwo (BitState (i:is) k1) leaf1 (BitState (j:js) k2) val2 @@ -232,18 +238,18 @@ makeTwo (BitState (i:is) k1) leaf1 (BitState (j:js) k2) val2 else Two (setBits [i,j]) leaf1 (Leaf k1 val2) -insert :: Key -> v -> HashMap v -> HashMap v +insert :: Key -> v -> KeyMap v -> KeyMap v insert bs v hashmap = insert' (initBitState bs) v hashmap -fromList :: [(Key,v)] -> HashMap v +fromList :: [(Key,v)] -> KeyMap v fromList ps = foldl' accum Empty ps where accum ans (k,v) = insert k v ans -- ================================================================= -- Deletion --- | Delete the Key encoded in the BitState from the HashMap -delete' :: BitState -> HashMap v -> HashMap v +-- | Delete the Key encoded in the BitState from the KeyMap +delete' :: BitState -> KeyMap v -> KeyMap v delete' (BitState [] _) hm = hm -- Removing a bogus key, leaves 'hm' unchanged delete' _ Empty = Empty delete' (BitState _ k) (hm@(Leaf k2 _)) = if k==k2 then Empty else hm @@ -271,7 +277,7 @@ delete' (BitState(i:is) k) (Full arr) = Empty -> BitmapIndexed (clearBit fullNodeMask i) (remove arr j) _ -> Full(update arr j result) -delete :: Key -> HashMap v -> HashMap v +delete :: Key -> KeyMap v -> KeyMap v delete k hm = delete' (initBitState k) hm -- One of the invariants is that no Empty ever appears in any of the other @@ -281,55 +287,53 @@ delete k hm = delete' (initBitState k) hm -- 'remove'd from one of the constructors with Array like components (One, Two, BitmapInded, Full). -- Float Empty up over One -oneE :: Int -> HashMap v -> HashMap v +oneE :: Int -> KeyMap v -> KeyMap v oneE _ Empty = Empty oneE i x = One i x -- Float Empty's up over Two -twoE :: Bitmap -> HashMap v -> HashMap v -> HashMap v +twoE :: Bitmap -> KeyMap v -> KeyMap v -> KeyMap v twoE _ Empty Empty = Empty twoE bmap x Empty = oneE (ith bmap 0) x twoE bmap Empty x = oneE (ith bmap 1) x twoE bmap x y = Two bmap x y --- | Get the 'ith' element from a Bitmap -ith :: Bitmap -> Int -> Int -ith bmap i = (bitmapToList bmap !! i) - - -- | The first (smallest) Segment in a BitMap firstSeg :: Bitmap -> Segment firstSeg bmap = head(bitmapToList bmap) -- Float Empty's up over BitmpIndexed, Note that if the size of the arr -- becomes 2, then rebuild with Two rather than BitmapIndexed -bitmapE :: Bitmap -> PArray (HashMap v) -> HashMap v +bitmapE :: Bitmap -> PArray (KeyMap v) -> KeyMap v bitmapE bmap arr | isize arr == 2 = twoE bmap (index arr 0) (index arr 1) bitmapE bmap arr = bitmapIndexedOrFull bmap arr -- ================================================================ -- aggregation in ascending order of keys -foldWithKey :: (ans -> Key -> v -> ans) -> ans -> HashMap v -> ans -foldWithKey _ ans Empty = ans -foldWithKey accum ans (Leaf k v) = accum ans k v -foldWithKey accum ans (One _ x) = foldWithKey accum ans x -foldWithKey accum ans (Two _ x y) = foldWithKey accum (foldWithKey accum ans x) y -foldWithKey accum ans0 (BitmapIndexed _ arr) = loop ans0 0 +foldWithKey :: (ans -> Key -> v -> ans) -> ans -> KeyMap v -> ans +foldWithKey _ !ans Empty = ans +foldWithKey accum !ans (Leaf k v) = accum ans k v +foldWithKey accum !ans (One _ x) = foldWithKey accum ans x +foldWithKey accum !ans (Two _ x y) = foldWithKey accum (foldWithKey accum ans x) y +foldWithKey accum !ans0 (BitmapIndexed _ arr) = loop ans0 0 where n = isize arr - loop ans i | i >= n = ans - loop ans i = loop (foldWithKey accum ans (index arr i)) (i+1) -foldWithKey accum ans0 (Full arr) = loop ans0 0 + loop !ans i | i >= n = ans + loop !ans i = loop (foldWithKey accum ans (index arr i)) (i+1) +foldWithKey accum !ans0 (Full arr) = loop ans0 0 where n = isize arr - loop ans i | i >= n = ans - loop ans i = loop (foldWithKey accum ans (index arr i)) (i+1) + loop !ans i | i >= n = ans + loop !ans i = loop (foldWithKey accum ans (index arr i)) (i+1) + +sizeKeyMap :: KeyMap v -> Int +sizeKeyMap x = foldWithKey (\ ans _k _v -> ans+1) 0 x -- ================================================================== -- Lookup a key -lookup' :: BitState -> HashMap v -> Maybe v +lookup' :: BitState -> KeyMap v -> Maybe v lookup' _ Empty = Nothing -lookup' state (Leaf bs v) = if (getBytes state)==bs then Just v else Nothing +lookup' (BitState _ k1) (Leaf k2 v) = if k1==k2 then Just v else Nothing lookup' (BitState [] k) _ = error ("lookup', out of bits for key "++show k) lookup' (BitState (j:js) k) (One i x) = if i==j then lookup' (BitState js k) x else Nothing lookup' (BitState (j:js) k) (Two bm x0 x1) = if i==0 then lookup' (BitState js k) x0 else lookup' (BitState js k) x1 @@ -339,26 +343,39 @@ lookup' (BitState (j:js) k) (BitmapIndexed bm arr) = lookup' (BitState js k) (in lookup' (BitState (j:js) k) (Full arr) = lookup' (BitState js k) (index arr i) where i = indexFromSegment fullNodeMask j -lookupHM :: Key -> HashMap v -> Maybe v +lookupHM :: Key -> KeyMap v -> Maybe v lookupHM bytes mp = lookup' (initBitState bytes) mp +-- ========================================================= +-- map + +mapWithKey :: (Key -> a -> b) -> KeyMap a -> KeyMap b +mapWithKey _ Empty = Empty +mapWithKey f (Leaf k2 v) = (Leaf k2 (f k2 v)) +mapWithKey f (One i x) = One i (mapWithKey f x) +mapWithKey f (Two bm x0 x1) = Two bm (mapWithKey f x0) (mapWithKey f x1) +mapWithKey f (BitmapIndexed bm arr) = BitmapIndexed bm (fmap (mapWithKey f) arr) +mapWithKey f (Full arr) = Full (fmap (mapWithKey f) arr) + +instance Functor KeyMap where + fmap f x = mapWithKey (\ _ v -> f v) x -- ========================================================== --- Split a HashMap into 3 parts +-- Split a KeyMap into 3 parts -- | return (smaller than 'key', has key?, greater than 'key') -splitHashMap:: BitState -> HashMap v -> (HashMap v,Maybe v,HashMap v) -splitHashMap (BitState [] _) hm = (hm,Nothing,Empty) -splitHashMap (BitState (i:is) key) hm = +splitKeyMap:: BitState -> KeyMap v -> (KeyMap v,Maybe v,KeyMap v) +splitKeyMap (BitState [] _) hm = (hm,Nothing,Empty) +splitKeyMap (BitState (i:is) key) hm = case splitBySegment i hm of (less,x,greater) -> case x of Empty -> (build less,Nothing,build greater) (Leaf k v) -> (build less,if key==k then (Just v) else Nothing,build greater) other -> (reconstruct i less less1,ans,reconstruct i greater greater1) - where (less1,ans,greater1) = splitHashMap (BitState is key) other + where (less1,ans,greater1) = splitKeyMap (BitState is key) other -splitBySegment :: Segment -> HashMap v -> ([(Segment,HashMap v)],HashMap v, [(Segment,HashMap v)]) +splitBySegment :: Segment -> KeyMap v -> ([(Segment,KeyMap v)],KeyMap v, [(Segment,KeyMap v)]) splitBySegment i _x | i < 0 = ([],Empty,[]) splitBySegment i _x | i > intSize = ([],Empty,[]) splitBySegment _ Empty = ([],Empty,[]) @@ -373,7 +390,7 @@ splitBySegment i (BitmapIndexed bmap arr) = splitArrAtSeg i bmap arr splitBySegment i (Full arr) = splitArrAtSeg i fullNodeMask arr -- | Split an PArray at a particular Segment. -splitArrAtSeg:: Segment -> Bitmap -> PArray (HashMap v) -> ([(Int, HashMap v)], HashMap v, [(Int, HashMap v)]) +splitArrAtSeg:: Segment -> Bitmap -> PArray (KeyMap v) -> ([(Int, KeyMap v)], KeyMap v, [(Int, KeyMap v)]) splitArrAtSeg i bmap arr = (takeWhile smaller ps, match, dropWhile tooSmall ps) where ps = zip (bitmapToList bmap) (tolist arr) smaller (j,_) = j < i @@ -383,13 +400,13 @@ splitArrAtSeg i bmap arr = (takeWhile smaller ps, match, dropWhile tooSmall ps) [] -> Empty ((_,x):_) -> x --- | reconstruct a HashMap from list of previous Segments, and a single HashMap from the next Segment -reconstruct :: Segment -> [(Segment, HashMap v)] -> HashMap v -> HashMap v +-- | reconstruct a KeyMap from list of previous Segments, and a single KeyMap from the next Segment +reconstruct :: Segment -> [(Segment, KeyMap v)] -> KeyMap v -> KeyMap v reconstruct _ xs Empty = build xs reconstruct seg xs x = build (insertAscending (seg,x) xs) -- | insert a Segment pair in ascending order of Segments, Keep it sorted. -insertAscending:: (Segment, HashMap v) -> [(Segment, HashMap v)] -> [(Segment, HashMap v)] +insertAscending:: (Segment, KeyMap v) -> [(Segment, KeyMap v)] -> [(Segment, KeyMap v)] insertAscending (i,x) [] = [(i,x)] insertAscending (i,x) (ws@((y@(j,_)):ys)) = case compare i j of @@ -397,8 +414,8 @@ insertAscending (i,x) (ws@((y@(j,_)):ys)) = GT -> y : insertAscending (i,x) ys EQ -> (i,x):ys -- We know that the Segement i should never appear in the list --- | Build a HashMap out of a list of Segment pairs. -build :: [(Segment, HashMap v)] -> HashMap v +-- | Build a KeyMap out of a list of Segment pairs. +build :: [(Segment, KeyMap v)] -> KeyMap v build [] = Empty build [(_,x)] = x build [(j,x),(k,y)] = Two (setBits [j,k]) x y @@ -409,17 +426,17 @@ testSplit2 :: Int -> IO () testSplit2 i = putStrLn (unlines [show hm, " ",show pathx," ",show a, " ",show b, " ",show c]) where keys = makeKeys 99 1000 ps = zip (take 12 keys) [0..] - hm :: HashMap Int + hm :: KeyMap Int hm = fromList ps state@(BitState pathx _) = (initBitState (keys !! i)) - (a,b,c) = splitHashMap state hm + (a,b,c) = splitKeyMap state hm -- =========================================================== -- Maximum and Minimum Key -- | Get the smallest key, NOT the smallest value -getMin :: HashMap v -> Maybe (Key,v) +getMin :: KeyMap v -> Maybe (Key,v) getMin Empty = Nothing getMin (Leaf k v) = Just (k,v) getMin (One _ x) = getMin x @@ -428,7 +445,7 @@ getMin (BitmapIndexed _ arr) = getMin (index arr 0) getMin (Full arr) = getMin (index arr 0) -- | Get the largest key, NOT the largest value -getMax :: HashMap v -> Maybe (Key,v) +getMax :: KeyMap v -> Maybe (Key,v) getMax Empty = Nothing getMax (Leaf k v) = Just (k,v) getMax (One _ x) = getMax x @@ -438,14 +455,14 @@ getMax (Full arr) = getMax (index arr (isize arr - 1)) -- ================================================== --- | The (key,value) pairs (subset) of 'h1' where key is in the domain of both 'h1' and 'h2' -intersect :: HashMap v -> HashMap v -> HashMap v +-- | The (key,value) pairs (i.e. a subset) of 'h1' where key is in the domain of both 'h1' and 'h2' +intersect :: KeyMap v -> KeyMap v -> KeyMap v intersect map1 map2 = case next map1 map2 of Nothing -> Empty Just k -> leapfrog k map1 map2 Empty -leapfrog :: Key -> HashMap v -> HashMap v -> HashMap v -> HashMap v +leapfrog :: Key -> KeyMap v -> KeyMap v -> KeyMap v -> KeyMap v leapfrog k x y ans = case (lub k x,lub k y) of (Just(k1,v1,h1),Just(k2,_,h2)) -> @@ -454,10 +471,10 @@ leapfrog k x y ans = Nothing -> (if k1==k2 then insert k1 v1 ans else ans) _ -> ans --- | Find the smallest key <= 'key', and a HashMap of everything bigger than 'key' -lub :: Key -> HashMap v -> Maybe (Key, v, HashMap v) +-- | Find the smallest key <= 'key', and a KeyMap of everything bigger than 'key' +lub :: Key -> KeyMap v -> Maybe (Key, v, KeyMap v) lub key hm = - case splitHashMap (initBitState key) hm of + case splitKeyMap (initBitState key) hm of (_,Just _,Empty) -> Nothing (_,Just v,hm2) -> Just(key,v,hm2) (_,Nothing,hm1) -> @@ -465,29 +482,29 @@ lub key hm = Just (k,v) -> Just(k,v,hm1) Nothing -> Nothing -next :: HashMap v1 -> HashMap v2 -> Maybe Key +next :: KeyMap v1 -> KeyMap v2 -> Maybe Key next x y = case (getMin x,getMin y) of (Just (k1,_),Just (k2,_)) -> Just(max k1 k2) _ -> Nothing -testIntersect :: HashMap Int +testIntersect :: KeyMap Int testIntersect = intersect h1x h2x -h1x, h2x :: HashMap Int +h1x, h2x :: KeyMap Int h1x = fromList [pairs !! 3,pairs !! 5, pairs !! 11, pairs !! 6, pairs !! 4] h2x = fromList [pairs !! 3,pairs !! 7, pairs !! 4, pairs !! 6, pairs !! 8] -- ========================================================= -- | Domain restrict 'hkm' to those Keys found in 's'. This algorithm -- assumes the set 's' is small compared to 'hm'. -domainRestrict :: HashMap v -> Set Key -> HashMap v +domainRestrict :: KeyMap v -> Set Key -> KeyMap v domainRestrict hm s = Set.foldl' accum Empty s where accum ans key = case lookupHM key hm of Nothing -> ans Just v -> insert key v ans -hmdr :: HashMap Int +hmdr :: KeyMap Int hmdr = fromList (take 10 pairs) set:: Set Key @@ -519,7 +536,7 @@ sparseIndex b m = popCount (b .&. (m - 1)) {-# INLINE sparseIndex #-} -- | Create a 'BitmapIndexed' or 'Full' node. -bitmapIndexedOrFull :: Bitmap -> PArray (HashMap v) -> HashMap v +bitmapIndexedOrFull :: Bitmap -> PArray (KeyMap v) -> KeyMap v bitmapIndexedOrFull b ary | b == fullNodeMask = Full ary | otherwise = BitmapIndexed b ary @@ -530,26 +547,66 @@ fullNodeMask :: Bitmap fullNodeMask = complement (complement 0 `unsafeShiftL` maxChildren) {-# INLINE fullNodeMask #-} +setBits :: [Int] -> Bitmap +setBits xs = foldl' setBit 0 xs + +oneBits :: Bitmap +oneBits = (complement (zeroBits :: Word64)) + +-- | Get the 'ith' element from a Bitmap +ith :: Bitmap -> Int -> Int +ith bmap i = (bitmapToList bmap !! i) + + -- | A Bitmap represents a set. Split it into 3 parts (set1,present,set2) -- where 'set1' is all elements in 'bm' less than 'i' -- 'present' is if 'i' is in the set 'bm' -- 'set2' is all elements in 'bm' greater than 'i' +-- We do this by using the precomputed masks: lessMasks, greaterMasks splitBitmap :: Bitmap -> Int -> (Bitmap,Bool,Bitmap) -splitBitmap bm 0 = (0,testBit bm 0, clearBit bm 0) -splitBitmap bm i = (unsafeShiftR (unsafeShiftL bm (64-i)) (64-i) - ,testBit bm i - ,unsafeShiftL (unsafeShiftR bm (i+1)) (i+1)) +splitBitmap bm i = (bm .&. (index lessMasks i), testBit bm i, bm .&. (index greaterMasks i)) -setBits :: [Int] -> Bitmap -setBits xs = foldl' setBit 0 xs +{- +mask bits set formula + +at position i=0 +[0,0,0,0,0] [] [0 .. i-1] +[1,1,1,1,0] [1,2,3,4] [i+1 .. 4] + +at position i=1 +[0,0,0,0,1] [0] +[1,1,1,0,0] [2,3,4] + +at position i=2 +[0,0,0,1,1] [0,1] +[1,1,0,0,0] [3,4] + +at position i=3 +[0,0,1,1,1] [0,1,2] +[1,0,0,0,0] [4] + +at position i=4 +[0,1,1,1,1] [0,1,2,3] +[0,0,0,0,0] [] +-} + +lessMasks, greaterMasks :: PArray Bitmap +lessMasks = fromlist [ setBits [0 .. i-1] | i <- [0..63]] +greaterMasks = fromlist [ setBits [i+1 .. 63] | i <- [0..63]] + +testsplitBitmap :: Int -> ([Int], Bool, [Int]) +testsplitBitmap i = (bitmapToList l,b,bitmapToList g) + where (l,b,g) = splitBitmap (complement (zeroBits :: Word64)) i -- ======================================================================= -- Operations to make new arrays out off old ones with small changes -- | /O(n)/ Make a copy of an Array that removes the 'i'th element. Decreasing the size by 1. -remove :: ArrayPair arr marr a => arr a -> Int -> arr a -remove arr i = fst(withMutArray n action) +remove :: PArray a -> Int -> PArray a +remove arr i = if i<0 || i >= n + then error ("index out of bounds in 'remove' "++show i++" not in range (0,"++show (isize arr -1)++")") + else fst(withMutArray n action) where n = (isize arr) - 1 action marr = do mcopy marr 0 arr 0 i @@ -558,9 +615,9 @@ remove arr i = fst(withMutArray n action) -- | /O(n)/ Overwrite the element at the given position in this array, update :: PArray t -> Int -> t -> PArray t -update arr i _t +update arr i _ | i<0 || i >= (isize arr) - = error ("index out of bounds in update "++show i++" not in range (0,"++show (isize arr -1)++")") + = error ("index out of bounds in 'update' "++show i++" not in range (0,"++show (isize arr -1)++")") update arr i t = fst(withMutArray size1 action) where size1 = isize arr action marr = do @@ -573,7 +630,7 @@ update arr i t = fst(withMutArray size1 action) -- increasing its size by one. insertM :: PArray e -> Int -> e -> ST s (PArray e) insertM ary idx b - | idx < 0 || idx > counter = error ("Bounds check in insertAt "++show idx++" not in range 0.."++show (counter)) + | idx < 0 || idx > counter = error ("Bounds check in 'insertAt' "++show idx++" not in range 0.."++show (counter)) | otherwise = do mary <- mnew (counter+1) mcopy mary 0 ary 0 idx @@ -601,30 +658,6 @@ arrayOf n a = runST $ do pure arr {-# INLINE arrayOf #-} --- | Extract a slice from an array -subarray :: ArrayPair arr2 marr a => Int -> Int -> arr2 a -> arr2 a -subarray 0 hi arr | hi == (isize arr -1) = arr -subarray lo hi arr = fst(withMutArray size action) - where size = max (hi - lo + 1) 0 - action marr = mcopy marr 0 arr lo size -{-# INLINE subarray #-} - - --- | Split an array into 2 partial copies, where a1 appears at the last index --- of the first copy and 'a2' appears at the 0th index of the second. --- splitArrAt (fromlist [0,1,2,3,4,5,6]) 4 44 45 --- (fromlist [0,1,2,3,44],fromlist [45,5,6]) -splitArrayAt :: PArray a -> Int -> a -> a -> (PArray a, PArray a) -splitArrayAt arr i a1 a2 = project (with2MutArray size1 size2 action) - where project (arr1,arr2, _state) = (arr1,arr2) - size1 = i + 1 - size2 = isize arr - i - action marr1 marr2 = do - mcopy marr1 0 arr 0 i - mwrite marr1 i a1 - mwrite marr2 0 a2 - mcopy marr2 1 arr (i+1) (size2 - 1) - -- ========================================================================= makeKeys :: Int -> Int -> [Key] @@ -637,13 +670,11 @@ testt :: Int -> IO () testt n = do let (hmap,output) = tests n histArr = histo hmap - -- hbytes <- recursiveSize $!! hmap - -- putStrLn ("hbytes = "++show hbytes++"\n"++output) putStrLn output putStrLn ("histogram "++show(tolist histArr)) -tests :: Int -> (HashMap Int, String) +tests :: Int -> (KeyMap Int, String) tests n = (hashmap,unlines [ "bits per level = "++show bits , "num levels = "++show keyPathSize @@ -664,7 +695,7 @@ tests n = (hashmap,unlines hwords = heapWords hashmap mwords = heapWords mapmap -count :: HashMap v -> (Int,Int,Int,Stat Int,Stat Int,Int) +count :: KeyMap v -> (Int,Int,Int,Stat Int,Stat Int,Int) count x = go 0 x (0,0,0,mempty,mempty,0) where go _ Empty (e,o,t,l,b,f) = (e+1,o,t,l,b,f) go d (One _ y) (e,o,t,l,b,f) = go (1 + d) y (e,1+o,t,l,b,f) @@ -676,7 +707,7 @@ count x = go 0 x (0,0,0,mempty,mempty,0) -hdepth :: HashMap v -> Int +hdepth :: KeyMap v -> Int hdepth Empty = 0 hdepth (One _ x) = 1 + hdepth x hdepth (Leaf _ _) = 1 @@ -687,7 +718,7 @@ hdepth (Two _ x y) = 1 + max (hdepth x) (hdepth y) increment :: (ArrayPair arr marr a, Num a) => marr s a -> Int -> ST s () increment marr i = do { n <- mindex marr i; mwrite marr i (n+1) } -histogram :: HashMap v -> PA.MutableArray s Int -> ST s () +histogram :: KeyMap v -> PA.MutableArray s Int -> ST s () histogram Empty _ = pure () histogram (One _ x) marr = increment marr 1 >> histogram x marr histogram (Leaf _ _) _ = pure () @@ -695,14 +726,12 @@ histogram (BitmapIndexed _ arr) marr = increment marr (isize arr-1) >> mapM_ (\ histogram (Full arr) marr = increment marr (intSize-1) >> mapM_ (\ x -> histogram x marr) arr histogram (Two _ x y) marr = increment marr 2 >> histogram x marr >> histogram y marr -histo :: HashMap v -> PA.Array Int +histo :: KeyMap v -> PA.Array Int histo x = fst(withMutArray intSize process) where process marr = do { initialize (intSize - 1) ; histogram x marr } where initialize n | n <0 = pure () initialize n = mwrite marr n 0 >> initialize (n-1) - - bpairs :: [Key] bpairs = makeKeys 99 1500000 -- makeKeys 3 15 @@ -738,7 +767,7 @@ add n stat = (Stat 1 n (Just n) (Just n)) <> stat -- ==================== -- Debugging functions -bug :: Int -> IO (HashMap Int) +bug :: Int -> IO (KeyMap Int) bug n = do let ps = take n pairs -- zip (makeKeys 3 n) [0..] hh (k@(Key m0 m1 _ _ ),v) = show m0++" "++show m1++" "++show (path k)++" "++show v @@ -757,7 +786,7 @@ try ps = do testlookup :: Int -> Int -> Bool testlookup seed n = all ok results where ps = zip (makeKeys seed n) [0..] - keymap :: HashMap Int + keymap :: KeyMap Int keymap = fromList ps results = [ (i,lookupHM (fst(ps !! i)) keymap) | i <- [0..(n-1)]] ok (_,Just _) = True @@ -834,7 +863,7 @@ ppArray :: (Indexable arr a) => (a -> PDoc) -> arr a -> PDoc ppArray f arr = ppList f (tolist arr) -- ==================================== --- Pretty Printer for HashMap +-- Pretty Printer for KeyMap ppKey :: Key -> PDoc ppKey (Key w0 _ _ _) = ppWord64 w0 @@ -842,17 +871,17 @@ ppKey (Key w0 _ _ _) = ppWord64 w0 ppBitmap :: Word64 -> PDoc ppBitmap x = text (pack(showBM x)) -ppHashMap :: (v -> PDoc) -> HashMap v -> PDoc -ppHashMap p (Leaf k v) = ppSexp "L" [ppKey k,p v] -ppHashMap _ Empty = text "E" -ppHashMap p (One x mp) = ppSexp "O" [ppInt x,ppHashMap p mp] -ppHashMap p (Two x m1 m2) = ppSexp "T" [ppBitmap x ,ppHashMap p m1, ppHashMap p m2] -ppHashMap p (BitmapIndexed x arr) = ppSexp "B" [ppList q (zip (bitmapToList x) (tolist arr))] - where q (i,a) = ppInt i <+> ppHashMap p a -ppHashMap p (Full arr) = ppSexp "F" [ppList q (zip (bitmapToList fullNodeMask) (tolist arr))] - where q (i,a) = ppInt i <+> ppHashMap p a - -instance PrettyA v => Show (HashMap v) where - show x = show(ppHashMap prettyA x) - showList xs x = unlines (map (\ y -> "\n"++ show(ppHashMap prettyA y)) xs) ++ x +ppKeyMap :: (v -> PDoc) -> KeyMap v -> PDoc +ppKeyMap p (Leaf k v) = ppSexp "L" [ppKey k,p v] +ppKeyMap _ Empty = text "E" +ppKeyMap p (One x mp) = ppSexp "O" [ppInt x,ppKeyMap p mp] +ppKeyMap p (Two x m1 m2) = ppSexp "T" [ppBitmap x ,ppKeyMap p m1, ppKeyMap p m2] +ppKeyMap p (BitmapIndexed x arr) = ppSexp "B" [ppList q (zip (bitmapToList x) (tolist arr))] + where q (i,a) = ppInt i <+> ppKeyMap p a +ppKeyMap p (Full arr) = ppSexp "F" [ppList q (zip (bitmapToList fullNodeMask) (tolist arr))] + where q (i,a) = ppInt i <+> ppKeyMap p a + +instance PrettyA v => Show (KeyMap v) where + show x = show(ppKeyMap prettyA x) + showList xs x = unlines (map (\ y -> "\n"++ show(ppKeyMap prettyA y)) xs) ++ x From db587e0149ec9a8be34644370c3b5eeffc51a685 Mon Sep 17 00:00:00 2001 From: TimSheard Date: Mon, 18 Oct 2021 11:03:16 -0400 Subject: [PATCH 07/19] changes from fixing bugs on Alexi's branch. --- libs/small-steps/src/Data/Compact/Class.hs | 2 +- libs/small-steps/src/Data/Compact/KeyMap.hs | 96 +++++++++++++++++---- 2 files changed, 78 insertions(+), 20 deletions(-) diff --git a/libs/small-steps/src/Data/Compact/Class.hs b/libs/small-steps/src/Data/Compact/Class.hs index 6c02038056b..9edb774206b 100644 --- a/libs/small-steps/src/Data/Compact/Class.hs +++ b/libs/small-steps/src/Data/Compact/Class.hs @@ -94,7 +94,7 @@ instance Indexable (A.Array Int) a where merge = mergeArray instance Indexable SmallArray t where - index = Small.indexSmallArray + index = boundsCheck Small.indexSmallArray isize = Small.sizeofSmallArray fromlist = Small.smallArrayFromList tolist arr = foldr (:) [] arr diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/small-steps/src/Data/Compact/KeyMap.hs index 8d97dc98a48..757e33be7c8 100644 --- a/libs/small-steps/src/Data/Compact/KeyMap.hs +++ b/libs/small-steps/src/Data/Compact/KeyMap.hs @@ -38,7 +38,7 @@ import Data.Text(Text,pack) import qualified Prettyprinter.Internal as Pretty import Data.Set(Set) import qualified Data.Set as Set --- import Debug.Trace + -- type PArray = PA.Array type PArray = Small.SmallArray @@ -55,7 +55,7 @@ type Bitmap = Word64 -- | The number of bits in a segment. Can't be more than 6, because using Word64 -- as Bitmap can only accomodate 2^6 = 64 bits bits :: Int -bits = 6 +bits = 4 -- | Ints in the range [0..63], represents 'bits' wide portion of a key type Segment = Int @@ -172,6 +172,17 @@ instance HeapWords v => HeapWords (KeyMap v) where heapWords (Full arr) = foldl' heapPlus 1 arr heapWords (Two _ a b) = 4 + heapWords a + heapWords b +instance HeapWords () where + heapWords () = 1 + +tag :: KeyMap v -> String +tag Empty = "Empty" +tag (One _ _xs) = "One" +tag (Leaf _ _v) = "Leaf" +tag (BitmapIndexed _ _arr) = "BitmapedIndexed" +tag (Full _arr) = "Full" +tag (Two _ _a _b) = "Two" + -- ====================================================================== -- Insertion @@ -182,27 +193,28 @@ insert' :: BitState -> v -> KeyMap v -> KeyMap v insert' state v m = insertWithKey (\ _k new _old -> new) state v m insertWithKey :: (Key -> v -> v -> v) -> BitState -> v -> KeyMap v -> KeyMap v -insertWithKey combine bs0 v0 m0 = go bs0 v0 m0 +insertWithKey combine bs0 v0 m0 = goR bs0 v0 m0 where + goR state val mp = (go state val mp) go (BitState _ k) !x Empty = Leaf k x go (BitState [] k) _ _ = error ("In insert', ran out of bits for key "++show k) go (BitState (i:is) k) x (One j node) = case compare j i of - EQ -> One j (go (BitState is k) x node) - LT -> Two (setBits [i,j]) node (go (BitState is k) x Empty) - GT -> Two (setBits [i,j]) (go (BitState is k) x Empty) node + EQ -> One j (goR (BitState is k) x node) + LT -> Two (setBits [i,j]) node (goR (BitState is k) x Empty) + GT -> Two (setBits [i,j]) (goR (BitState is k) x Empty) node go (state@(BitState _ k1)) x t@(Leaf k2 y) | k1 == k2 = if x `ptrEq` y then t else (Leaf k2 (combine k2 x y)) - | otherwise = makeTwo state t (next2 state k2) x + | otherwise = makeTwo state t (next2 state k2) x go (BitState (j:js) k) x t@(BitmapIndexed bmap arr) | not(testBit bmap j) = let !arr' = insertAt arr i $! (Leaf k x) in bitmapIndexedOrFull (bmap .|. (setBit 0 j)) arr' | otherwise = let !st = index arr i - !st' = go (BitState js k) x st + !st' = goR (BitState js k) x st in if st' `ptrEq` st then t else BitmapIndexed bmap (update arr i st') @@ -213,7 +225,7 @@ insertWithKey combine bs0 v0 m0 = go bs0 v0 m0 in bitmapIndexedOrFull (bmap .|. (setBit 0 j)) arr' | otherwise = let !st = if i==0 then x0 else x1 - !st' = go (BitState js k) x st + !st' = goR (BitState js k) x st in if st' `ptrEq` st then t else if i==0 @@ -222,7 +234,7 @@ insertWithKey combine bs0 v0 m0 = go bs0 v0 m0 where i = indexFromSegment bmap j go (BitState (j:js) k) x t@(Full arr) = let !st = index arr i - !st' = go (BitState js k) x st + !st' = goR (BitState js k) x st in if st' `ptrEq` st then t else Full (update arr i st') @@ -234,12 +246,14 @@ makeTwo _state _leaf (BitState [] k) _val = error ("Case 2. In makeTwo, out of b makeTwo (BitState (i:is) k1) leaf1 (BitState (j:js) k2) val2 | i==j = One i (makeTwo (BitState is k1) leaf1 (BitState js k2) val2) | otherwise = if i < j - then Two (setBits [i,j]) (Leaf k1 val2) leaf1 + then Two (setBits [i,j]) (Leaf k1 val2) leaf1 else Two (setBits [i,j]) leaf1 (Leaf k1 val2) - + +foo :: String -> a -> String +foo s !_ = s insert :: Key -> v -> KeyMap v -> KeyMap v -insert bs v hashmap = insert' (initBitState bs) v hashmap +insert bs v hashmap = insert' (initBitState bs) v hashmap -- (trace ("INSERT "++show bs) hashmap) fromList :: [(Key,v)] -> KeyMap v fromList ps = foldl' accum Empty ps @@ -336,11 +350,18 @@ lookup' _ Empty = Nothing lookup' (BitState _ k1) (Leaf k2 v) = if k1==k2 then Just v else Nothing lookup' (BitState [] k) _ = error ("lookup', out of bits for key "++show k) lookup' (BitState (j:js) k) (One i x) = if i==j then lookup' (BitState js k) x else Nothing -lookup' (BitState (j:js) k) (Two bm x0 x1) = if i==0 then lookup' (BitState js k) x0 else lookup' (BitState js k) x1 +lookup' (BitState (j:js) k) (Two bm x0 x1) = + if testBit bm j + then (if i==0 then lookup' (BitState js k) x0 else lookup' (BitState js k) x1) + else Nothing where i = indexFromSegment bm j -lookup' (BitState (j:js) k) (BitmapIndexed bm arr) = lookup' (BitState js k) (index arr i) +lookup' (BitState (j:js) k) (BitmapIndexed bm arr) = + if testBit bm j + then lookup' (BitState js k) (index arr i) + else Nothing where i = indexFromSegment bm j -lookup' (BitState (j:js) k) (Full arr) = lookup' (BitState js k) (index arr i) +lookup' (BitState (j:js) k) (Full arr) = -- Every possible bit is set, to no testBit call necessary + lookup' (BitState js k) (index arr i) where i = indexFromSegment fullNodeMask j lookupHM :: Key -> KeyMap v -> Maybe v @@ -604,7 +625,7 @@ testsplitBitmap i = (bitmapToList l,b,bitmapToList g) -- | /O(n)/ Make a copy of an Array that removes the 'i'th element. Decreasing the size by 1. remove :: PArray a -> Int -> PArray a -remove arr i = if i<0 || i >= n +remove arr i = if i<0 || i > n then error ("index out of bounds in 'remove' "++show i++" not in range (0,"++show (isize arr -1)++")") else fst(withMutArray n action) where n = (isize arr) - 1 @@ -705,8 +726,25 @@ count x = go 0 x (0,0,0,mempty,mempty,0) foldr (go (length arr + d)) (e,o,t,l,add (length arr) b,f) arr go d (Full arr) (e,o,t,l,b,f) = foldr (go (length arr + d)) (e,o,t,l,b,f+1) arr - - +countIO:: HeapWords a => KeyMap a -> IO () +countIO hashmap = do + putStrLn $ unlines + [ "bits per level = "++show bits + , "num levels = "++show keyPathSize + , "empty = "++show empty + , "leaf = "++show leaf + , "one = "++show one + , "two = "++show two + , "bits = "++show bit + , "full = "++show full + , "hwords = "++show hwords + , "depth = "++show (hdepth hashmap) + , "histogram ="++show hist + ] + where (empty,one,two,leaf,bit,full) = count hashmap + hist = histo hashmap + hwords = heapWords hashmap + hdepth :: KeyMap v -> Int hdepth Empty = 0 hdepth (One _ x) = 1 + hdepth x @@ -885,3 +923,23 @@ instance PrettyA v => Show (KeyMap v) where show x = show(ppKeyMap prettyA x) showList xs x = unlines (map (\ y -> "\n"++ show(ppKeyMap prettyA y)) xs) ++ x + +keysX :: [Key] +keysX = + [ Key 17900508425448557813 1425228445697650578 4096886001776694930 5607342842136005805 + , Key 6883900645186699936 13296170193789961158 4397314084330617059 8869821626379988209 + , Key 10500005495319713790 2912085157004832622 13426000237053837606 12059657784398898378 + , Key 3923906598994021794 12765791139276487287 816482653431531599 7003511147053802144 + , Key 5166915752834780615 7133194944084009196 13810062108841219641 296498671410031824 + , Key 18030165020800047584 18085286706182838302 16232822895209986816 17388829728381408048 + , Key 5142157305423936627 7231225143269744777 15250651091019539686 14241693248962825662 + , Key 13428671722466854389 16561117437870591512 11235927355594486308 16930552725399654134 + , Key 4838981082210206139 12557487373235351610 6348966276768033248 1499713340968517390 + , Key 336475062096603304 6399910448856822947 3425786324025245994 16363487473709422408 + , Key 16607855275778415913 15113927333656355571 16111805289570157530 7151802073429851699 + , Key 10517657211907470890 1089616862122803787 13992791218083853691 13236284657137382314 + , Key 1782840219730873272 5422922922394198551 6207884257158004626 16093772551099792787 + , Key 17216197724774766219 6382375034658581036 883871178158682222 6551207497782514085 + , Key 292346888039769756 7462467761555063764 6493768272219444322 7737867387963351907 + , Key 6067080276442487773 97011115971541225 17793222466254767399 16164726605331358005 + ] From 0e894da1271c054baa99e837275b2e0c5ccf1ef4 Mon Sep 17 00:00:00 2001 From: TimSheard Date: Mon, 18 Oct 2021 12:48:49 -0400 Subject: [PATCH 08/19] Added UnionWith, needs work on keys. --- libs/small-steps/src/Data/Compact/HashMap.hs | 4 +- libs/small-steps/src/Data/Compact/KeyMap.hs | 80 +++++++++++++++++--- 2 files changed, 71 insertions(+), 13 deletions(-) diff --git a/libs/small-steps/src/Data/Compact/HashMap.hs b/libs/small-steps/src/Data/Compact/HashMap.hs index d39a89280c5..ac0f45249ea 100644 --- a/libs/small-steps/src/Data/Compact/HashMap.hs +++ b/libs/small-steps/src/Data/Compact/HashMap.hs @@ -38,7 +38,7 @@ intersection:: HashMap k v -> HashMap k v -> HashMap k v intersection (HashMap m1) (HashMap m2) = HashMap(KM.intersect m1 m2) foldlWithKey' :: (ans -> k -> v -> ans) -> ans -> HashMap k v -> ans -foldlWithKey' accum a (HashMap m) = KM.foldWithKey accum2 a m +foldlWithKey' accum a (HashMap m) = KM.foldWithAscKey accum2 a m where accum2 ans k v = accum ans (fromKey k) v size :: HashMap k v -> Int @@ -48,7 +48,7 @@ fromList :: Keyed k => [(k, v)] -> HashMap k v fromList xs = HashMap(KM.fromList (map (\ (k,v) -> (toKey k,v)) xs)) toList :: HashMap k v -> [(k, v)] -toList (HashMap m) = KM.foldWithKey (\ ans k v -> (fromKey k,v):ans) [] m +toList (HashMap m) = KM.foldWithAscKey (\ ans k v -> (fromKey k,v):ans) [] m mapWithKey :: (k -> v -> u) -> HashMap k v -> HashMap k u mapWithKey f (HashMap m) = HashMap (KM.mapWithKey (\ key v -> f (fromKey key) v) m) \ No newline at end of file diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/small-steps/src/Data/Compact/KeyMap.hs index 757e33be7c8..ef55cb551ef 100644 --- a/libs/small-steps/src/Data/Compact/KeyMap.hs +++ b/libs/small-steps/src/Data/Compact/KeyMap.hs @@ -55,7 +55,7 @@ type Bitmap = Word64 -- | The number of bits in a segment. Can't be more than 6, because using Word64 -- as Bitmap can only accomodate 2^6 = 64 bits bits :: Int -bits = 4 +bits = 6 -- | Ints in the range [0..63], represents 'bits' wide portion of a key type Segment = Int @@ -259,6 +259,10 @@ fromList :: [(Key,v)] -> KeyMap v fromList ps = foldl' accum Empty ps where accum ans (k,v) = insert k v ans +toList :: KeyMap v -> [(Key,v)] +toList km = foldWithDescKey accum [] km + where accum k v ans = (k,v):ans + -- ================================================================= -- Deletion @@ -325,22 +329,39 @@ bitmapE bmap arr = bitmapIndexedOrFull bmap arr -- ================================================================ -- aggregation in ascending order of keys -foldWithKey :: (ans -> Key -> v -> ans) -> ans -> KeyMap v -> ans -foldWithKey _ !ans Empty = ans -foldWithKey accum !ans (Leaf k v) = accum ans k v -foldWithKey accum !ans (One _ x) = foldWithKey accum ans x -foldWithKey accum !ans (Two _ x y) = foldWithKey accum (foldWithKey accum ans x) y -foldWithKey accum !ans0 (BitmapIndexed _ arr) = loop ans0 0 +foldWithAscKey :: (ans -> Key -> v -> ans) -> ans -> KeyMap v -> ans +foldWithAscKey _ !ans Empty = ans +foldWithAscKey accum !ans (Leaf k v) = accum ans k v +foldWithAscKey accum !ans (One _ x) = foldWithAscKey accum ans x +foldWithAscKey accum !ans (Two _ x y) = foldWithAscKey accum (foldWithAscKey accum ans x) y +foldWithAscKey accum !ans0 (BitmapIndexed _ arr) = loop ans0 0 where n = isize arr loop !ans i | i >= n = ans - loop !ans i = loop (foldWithKey accum ans (index arr i)) (i+1) -foldWithKey accum !ans0 (Full arr) = loop ans0 0 + loop !ans i = loop (foldWithAscKey accum ans (index arr i)) (i+1) +foldWithAscKey accum !ans0 (Full arr) = loop ans0 0 where n = isize arr loop !ans i | i >= n = ans - loop !ans i = loop (foldWithKey accum ans (index arr i)) (i+1) + loop !ans i = loop (foldWithAscKey accum ans (index arr i)) (i+1) sizeKeyMap :: KeyMap v -> Int -sizeKeyMap x = foldWithKey (\ ans _k _v -> ans+1) 0 x +sizeKeyMap x = foldWithAscKey (\ ans _k _v -> ans+1) 0 x + +-- ================================================================ +-- aggregation in descending order of keys + +foldWithDescKey :: (Key -> v -> ans -> ans) -> ans -> KeyMap v -> ans +foldWithDescKey _ !ans Empty = ans +foldWithDescKey accum !ans (Leaf k v) = accum k v ans +foldWithDescKey accum !ans (One _ x) = foldWithDescKey accum ans x +foldWithDescKey accum !ans (Two _ x y) = foldWithDescKey accum (foldWithDescKey accum ans y) x +foldWithDescKey accum !ans0 (BitmapIndexed _ arr) = loop ans0 (n-1) + where n = isize arr + loop !ans i | i < 0 = ans + loop !ans i = loop (foldWithDescKey accum ans (index arr i)) (i-1) +foldWithDescKey accum !ans0 (Full arr) = loop ans0 (n-1) + where n = isize arr + loop !ans i | i < 0 = ans + loop !ans i = loop (foldWithDescKey accum ans (index arr i)) (i-1) -- ================================================================== -- Lookup a key @@ -452,6 +473,43 @@ testSplit2 i = putStrLn (unlines [show hm, " ",show pathx," ",show a, " ",show b state@(BitState pathx _) = (initBitState (keys !! i)) (a,b,c) = splitKeyMap state hm +-- ========================================================= +-- UnionWith + +toListOfSegments :: KeyMap v -> [(Segment,KeyMap v)] +toListOfSegments Empty = [] -- toListOfSegments is never called with Empty nodes. +toListOfSegments (Leaf _ _) = [] -- toListOfSegments is never called with Leaf nodes. +toListOfSegments (One i x) = [(i,x)] +toListOfSegments (Two bm x y) = zip (bitmapToList bm) [x,y] +toListOfSegments (BitmapIndexed bm arr) = zip (bitmapToList bm) (tolist arr) +toListOfSegments (Full arr) = zip (bitmapToList fullNodeMask) (tolist arr) + + +mergeWith:: (KeyMap v -> KeyMap v -> KeyMap v) -> [(Segment,KeyMap v)] -> [(Segment,KeyMap v)] -> [(Segment,KeyMap v)] +mergeWith combine [] [] = [] +mergeWith combine xs [] = xs +mergeWith combine [] ys = ys +mergeWith combine (allxs@((i,x):xs)) (allys@((j,y):ys)) = + case compare i j of + EQ -> (i,combine x y) : mergeWith combine xs ys + LT -> (i,x) : mergeWith combine xs allys + GT -> (j,y) : mergeWith combine allxs ys + +unionWith :: (Key -> v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v +unionWith combine Empty Empty = Empty +unionWith combine x Empty = x +unionWith combine Empty y = y +unionWith combine (Leaf k1 v1) (Leaf k2 v2) | k1==k2 = Leaf k1 (combine k1 v1 v2) +unionWith combine (Leaf k v) y = insertWithKey combine (BitState (path k) k) v y +unionWith combine x (Leaf k v) = insertWithKey combine (BitState (path k) k) v x +unionWith combine x y = build (mergeWith (unionWith combine) xpairs ypairs) + where xpairs = toListOfSegments x + ypairs = toListOfSegments y + +hm10 = fromList (take 5 pairs) +hm11 = fromList (take 5 (drop 4 pairs)) +hm12 = unionWith (\ k x y -> x+y) hm10 hm11 + -- =========================================================== -- Maximum and Minimum Key From b59dd47d0ff13a01e020eee267a728b1a23de9ff Mon Sep 17 00:00:00 2001 From: TimSheard Date: Tue, 19 Oct 2021 09:36:58 -0400 Subject: [PATCH 09/19] Fixed unionWith to track depth, to safey handle (Leaf k v) values. --- libs/small-steps/src/Data/Compact/KeyMap.hs | 49 ++++++++++++--------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/small-steps/src/Data/Compact/KeyMap.hs index ef55cb551ef..0840617fd2e 100644 --- a/libs/small-steps/src/Data/Compact/KeyMap.hs +++ b/libs/small-steps/src/Data/Compact/KeyMap.hs @@ -476,39 +476,46 @@ testSplit2 i = putStrLn (unlines [show hm, " ",show pathx," ",show a, " ",show b -- ========================================================= -- UnionWith -toListOfSegments :: KeyMap v -> [(Segment,KeyMap v)] -toListOfSegments Empty = [] -- toListOfSegments is never called with Empty nodes. -toListOfSegments (Leaf _ _) = [] -- toListOfSegments is never called with Leaf nodes. -toListOfSegments (One i x) = [(i,x)] -toListOfSegments (Two bm x y) = zip (bitmapToList bm) [x,y] -toListOfSegments (BitmapIndexed bm arr) = zip (bitmapToList bm) (tolist arr) -toListOfSegments (Full arr) = zip (bitmapToList fullNodeMask) (tolist arr) +toListOfSegments :: Int -> KeyMap v -> [(Segment,KeyMap v)] +toListOfSegments _ Empty = [] +toListOfSegments n (l@(Leaf k _)) = [(path k !! n,l)] +toListOfSegments _ (One i x) = [(i,x)] +toListOfSegments _ (Two bm x y) = zip (bitmapToList bm) [x,y] +toListOfSegments _ (BitmapIndexed bm arr) = zip (bitmapToList bm) (tolist arr) +toListOfSegments _ (Full arr) = zip (bitmapToList fullNodeMask) (tolist arr) mergeWith:: (KeyMap v -> KeyMap v -> KeyMap v) -> [(Segment,KeyMap v)] -> [(Segment,KeyMap v)] -> [(Segment,KeyMap v)] -mergeWith combine [] [] = [] -mergeWith combine xs [] = xs -mergeWith combine [] ys = ys +mergeWith _combine [] [] = [] +mergeWith _combine xs [] = xs +mergeWith _combine [] ys = ys mergeWith combine (allxs@((i,x):xs)) (allys@((j,y):ys)) = case compare i j of EQ -> (i,combine x y) : mergeWith combine xs ys LT -> (i,x) : mergeWith combine xs allys GT -> (j,y) : mergeWith combine allxs ys -unionWith :: (Key -> v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v -unionWith combine Empty Empty = Empty -unionWith combine x Empty = x -unionWith combine Empty y = y -unionWith combine (Leaf k1 v1) (Leaf k2 v2) | k1==k2 = Leaf k1 (combine k1 v1 v2) -unionWith combine (Leaf k v) y = insertWithKey combine (BitState (path k) k) v y -unionWith combine x (Leaf k v) = insertWithKey combine (BitState (path k) k) v x -unionWith combine x y = build (mergeWith (unionWith combine) xpairs ypairs) - where xpairs = toListOfSegments x - ypairs = toListOfSegments y +unionWithN :: Int -> (Key -> v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v +unionWithN _ _ Empty Empty = Empty +unionWithN _ _ x Empty = x +unionWithN _ _ Empty y = y +unionWithN _ combine (Leaf k1 v1) (Leaf k2 v2) | k1==k2 = Leaf k1 (combine k1 v1 v2) +unionWithN _ combine (Leaf k v) y = insertWithKey combine (BitState (path k) k) v y +unionWithN _ combine x (Leaf k v) = insertWithKey combine (BitState (path k) k) v x +unionWithN n combine x y = build (mergeWith (unionWithN (n+1) combine) xpairs ypairs) + where xpairs = toListOfSegments n x + ypairs = toListOfSegments n y +unionWithKey :: (Key -> v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v +unionWithKey comb x y = unionWithN 0 comb x y + +unionWith :: (v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v +unionWith comb x y = unionWithN 0 (\ _k a b -> comb a b) x y + +hm10, hm11, hm12 :: KeyMap Int hm10 = fromList (take 5 pairs) hm11 = fromList (take 5 (drop 4 pairs)) -hm12 = unionWith (\ k x y -> x+y) hm10 hm11 +hm12 = unionWith (+) hm10 hm11 -- =========================================================== From c8bdefe3940b260586c400ad3d455293564297cf Mon Sep 17 00:00:00 2001 From: TimSheard Date: Tue, 19 Oct 2021 09:39:18 -0400 Subject: [PATCH 10/19] Added a TODO note --- libs/small-steps/src/Data/Compact/KeyMap.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/small-steps/src/Data/Compact/KeyMap.hs index 0840617fd2e..67498001c0b 100644 --- a/libs/small-steps/src/Data/Compact/KeyMap.hs +++ b/libs/small-steps/src/Data/Compact/KeyMap.hs @@ -476,6 +476,9 @@ testSplit2 i = putStrLn (unlines [show hm, " ",show pathx," ",show a, " ",show b -- ========================================================= -- UnionWith +-- TODO a function that does not use lists +-- mergeArray :: (v -> v -> v) -> Bitmap -> PArray v -> Bitmap -> PArray v -> (Bitmap,PArray v) + toListOfSegments :: Int -> KeyMap v -> [(Segment,KeyMap v)] toListOfSegments _ Empty = [] toListOfSegments n (l@(Leaf k _)) = [(path k !! n,l)] From 1a61e9738393f1ba600e1753bf99d40652e248f8 Mon Sep 17 00:00:00 2001 From: TimSheard Date: Tue, 19 Oct 2021 11:01:48 -0400 Subject: [PATCH 11/19] Fixed bug in fullNodeMask, consoidated 'bits' and 'bitsPerSubmask' into 'bitsPerSegment'. --- libs/small-steps/src/Data/Compact/KeyMap.hs | 43 +++++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/small-steps/src/Data/Compact/KeyMap.hs index 67498001c0b..f2aefd946f2 100644 --- a/libs/small-steps/src/Data/Compact/KeyMap.hs +++ b/libs/small-steps/src/Data/Compact/KeyMap.hs @@ -46,16 +46,27 @@ type PArray = Small.SmallArray bin :: Integral n => n -> [n] bin x = reverse (binary x) +{- +myindex :: String -> PArray v -> Int -> v +myindex message arr i = + if i >= 0 && i < size + then index arr i + else error ("myindex error\n "++message++"\n index "++show i++" not in range (0 .. "++show (size-1)++").") + where size = isize arr +-} + -- ========================================================================== --- bits, Segments, Paths. Breaking a Key into a sequence of small components +-- bitsPerSegment, Segments, Paths. Breaking a Key into a sequence of small components -- | Represent a set of small integers, they can range from 0 to 63 type Bitmap = Word64 -- | The number of bits in a segment. Can't be more than 6, because using Word64 -- as Bitmap can only accomodate 2^6 = 64 bits -bits :: Int -bits = 6 +bitsPerSegment :: Int +bitsPerSegment = 6 +{-# INLINE bitsPerSegment #-} + -- | Ints in the range [0..63], represents 'bits' wide portion of a key type Segment = Int @@ -65,16 +76,18 @@ type Path = [Segment] -- | The maximum value of a segment, as an Int intSize :: Int -intSize = 2 ^ bits +intSize = 2 ^ bitsPerSegment +{-# INLINE intSize #-} -- | The maximum value of a segment, as a Word64 wordSize :: Word64 -wordSize = 2 ^ ((fromIntegral bits)::Word64) +wordSize = 2 ^ ((fromIntegral bitsPerSegment)::Word64) +{-# INLINE wordSize #-} --- | The length of a list of segments representing a key. Need to be carefull if a Key isn't evenly divisible by bits +-- | The length of a list of segments representing a key. Need to be carefull if a Key isn't evenly divisible by bitsPerSegment pathSize :: Word64 pathSize = (if (mod 64 wbits)==0 then (div 64 wbits) else (div 64 wbits) + 1) - where wbits = fromIntegral bits :: Word64 + where wbits = fromIntegral bitsPerSegment :: Word64 -- | Break up a Word64 into a Path getpath :: Word64 -> Path @@ -233,7 +246,7 @@ insertWithKey combine bs0 v0 m0 = goR bs0 v0 m0 else Two bmap x0 st' where i = indexFromSegment bmap j go (BitState (j:js) k) x t@(Full arr) = - let !st = index arr i + let !st = index arr i !st' = goR (BitState js k) x st in if st' `ptrEq` st then t @@ -608,16 +621,12 @@ ptrEq :: a -> a -> Bool ptrEq x y = isTrue# (reallyUnsafePtrEquality# x y ==# 1#) {-# INLINE ptrEq #-} -bitsPerSubkey :: Int -bitsPerSubkey = 4 -{-# INLINE bitsPerSubkey #-} - maxChildren :: Int -maxChildren = 1 `unsafeShiftL` bitsPerSubkey +maxChildren = 1 `unsafeShiftL` bitsPerSegment {-# INLINE maxChildren #-} subkeyMask :: Bitmap -subkeyMask = 1 `unsafeShiftL` bitsPerSubkey - 1 +subkeyMask = 1 `unsafeShiftL` bitsPerSegment - 1 {-# INLINE subkeyMask #-} sparseIndex :: Bitmap -> Bitmap -> Int @@ -631,7 +640,7 @@ bitmapIndexedOrFull b ary | otherwise = BitmapIndexed b ary {-# INLINE bitmapIndexedOrFull #-} --- | A bitmask with the 'bitsPerSubkey' least significant bits set. +-- | A bitmask with the 'bitsPerSegment' least significant bits set. fullNodeMask :: Bitmap fullNodeMask = complement (complement 0 `unsafeShiftL` maxChildren) {-# INLINE fullNodeMask #-} @@ -765,7 +774,7 @@ testt n = do tests :: Int -> (KeyMap Int, String) tests n = (hashmap,unlines - [ "bits per level = "++show bits + [ "bits per level = "++show bitsPerSegment , "num levels = "++show keyPathSize , "empty = "++show empty , "leaf = "++show leaf @@ -797,7 +806,7 @@ count x = go 0 x (0,0,0,mempty,mempty,0) countIO:: HeapWords a => KeyMap a -> IO () countIO hashmap = do putStrLn $ unlines - [ "bits per level = "++show bits + [ "bits per level = "++show bitsPerSegment , "num levels = "++show keyPathSize , "empty = "++show empty , "leaf = "++show leaf From f4090cfe43e2050ae8a0e8fde57b66102bd4a406 Mon Sep 17 00:00:00 2001 From: TimSheard Date: Tue, 19 Oct 2021 12:07:58 -0400 Subject: [PATCH 12/19] Making union tests. --- libs/small-steps/src/Data/Compact/KeyMap.hs | 30 ++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/small-steps/src/Data/Compact/KeyMap.hs index f2aefd946f2..e3098e32b3e 100644 --- a/libs/small-steps/src/Data/Compact/KeyMap.hs +++ b/libs/small-steps/src/Data/Compact/KeyMap.hs @@ -68,10 +68,10 @@ bitsPerSegment = 6 {-# INLINE bitsPerSegment #-} --- | Ints in the range [0..63], represents 'bits' wide portion of a key +-- | Ints in the range [0.. intSize], represents one 'bitsPerSegment' wide portion of a key type Segment = Int --- | Represents a list of 'bits', which when combined is in 1-1 correspondance with a Key +-- | Represents a list of 'Segment', which when combined is in 1-1 correspondance with a Key type Path = [Segment] -- | The maximum value of a segment, as an Int @@ -490,7 +490,31 @@ testSplit2 i = putStrLn (unlines [show hm, " ",show pathx," ",show a, " ",show b -- UnionWith -- TODO a function that does not use lists --- mergeArray :: (v -> v -> v) -> Bitmap -> PArray v -> Bitmap -> PArray v -> (Bitmap,PArray v) +mergeArray :: (v -> v -> v) -> Bitmap -> PArray v -> Bitmap -> PArray v -> (Bitmap,PArray v) +mergeArray combine bm1 arr1 bm2 arr2 = (bmBoth,fst (withMutArray size action)) + where bmBoth = bm1 .&. bm2 + size = popCount bmBoth + segments = bitmapToList bmBoth + action marr3 = loop segments + where loop [] = pure () + loop (i:is) = do + let j1 = indexFromSegment bm1 i + j2 = indexFromSegment bm2 i + j3 = indexFromSegment bmBoth i + case (testBit bm1 i, testBit bm2 i) of + (True, True) -> mwrite marr3 j3 (combine (index arr1 j1) (index arr2 j2)) + (True,False) -> mwrite marr3 j3 (index arr1 j1) + (False,True) -> mwrite marr3 j3 (index arr2 j2) + (False,False) -> pure () + loop is + +bmapA, bmapB :: Bitmap +bmapA = setBits [0,3,6,11,15] +bmapB = setBits [1,3,5,9,11,14] + +arrA, arrB :: PArray Int +arrA = fromlist [0,3,6,11,15] +arrB = fromlist [1,3,5,9,11,14] toListOfSegments :: Int -> KeyMap v -> [(Segment,KeyMap v)] toListOfSegments _ Empty = [] From a351b87eecd86e2e4235dcbbf2f4bbb38abdea45 Mon Sep 17 00:00:00 2001 From: Alexey Kuleshevich Date: Tue, 19 Oct 2021 19:52:00 +0300 Subject: [PATCH 13/19] Move new KeyMap/HashMap into its own package compact-map --- cabal.project | 2 +- libs/compact-map/CHANGELOG.md | 3 + libs/compact-map/Setup.hs | 3 + libs/compact-map/compact-map.cabal | 62 +++++++++++++++++++ .../src/Data/Compact/Class.hs | 0 .../src/Data/Compact/HashMap.hs | 2 +- .../src/Data/Compact/KeyMap.hs | 0 libs/compact-map/test/Main.hs | 16 +++++ libs/compact-map/test/Test/Compact/KeyMap.hs | 7 +++ libs/small-steps/small-steps.cabal | 3 - 10 files changed, 93 insertions(+), 5 deletions(-) create mode 100644 libs/compact-map/CHANGELOG.md create mode 100644 libs/compact-map/Setup.hs create mode 100644 libs/compact-map/compact-map.cabal rename libs/{small-steps => compact-map}/src/Data/Compact/Class.hs (100%) rename libs/{small-steps => compact-map}/src/Data/Compact/HashMap.hs (98%) rename libs/{small-steps => compact-map}/src/Data/Compact/KeyMap.hs (100%) create mode 100644 libs/compact-map/test/Main.hs create mode 100644 libs/compact-map/test/Test/Compact/KeyMap.hs diff --git a/cabal.project b/cabal.project index 58ae9e70a86..d13637ab159 100644 --- a/cabal.project +++ b/cabal.project @@ -22,6 +22,7 @@ packages: libs/non-integral libs/small-steps libs/small-steps-test + libs/compact-map -- Deprecations eras/shelley/chain-and-ledger/executable-spec @@ -183,4 +184,3 @@ package small-steps-test package small-steps ghc-options: -Werror - diff --git a/libs/compact-map/CHANGELOG.md b/libs/compact-map/CHANGELOG.md new file mode 100644 index 00000000000..55a1bd028fe --- /dev/null +++ b/libs/compact-map/CHANGELOG.md @@ -0,0 +1,3 @@ +# Revision history for compact-map + + diff --git a/libs/compact-map/Setup.hs b/libs/compact-map/Setup.hs new file mode 100644 index 00000000000..e8ef27dbba9 --- /dev/null +++ b/libs/compact-map/Setup.hs @@ -0,0 +1,3 @@ +import Distribution.Simple + +main = defaultMain diff --git a/libs/compact-map/compact-map.cabal b/libs/compact-map/compact-map.cabal new file mode 100644 index 00000000000..dd3e1c6d01d --- /dev/null +++ b/libs/compact-map/compact-map.cabal @@ -0,0 +1,62 @@ +cabal-version: 2.2 + +name: compact-map +version: 0.1.0.0 +synopsis: A KeyMap that is based on collisionless HashMap implementation +homepage: https://github.com/input-output-hk/cardano-legder-specs +license: Apache-2.0 +author: IOHK Formal Methods Team +maintainer: formal.methods@iohk.io +category: Control +build-type: Simple +extra-source-files: CHANGELOG.md + +source-repository head + type: git + location: https://github.com/input-output-hk/cardano-ledger-specs + subdir: libs/compact-map + +common project-config + default-language: Haskell2010 + + ghc-options: -Wall + -Wcompat + -Wincomplete-record-updates + -Wincomplete-uni-patterns + -Wredundant-constraints + -Wunused-packages + +library + import: project-config + + exposed-modules: Data.Compact.KeyMap + , Data.Compact.HashMap + other-modules: Data.Compact.Class + build-depends: base >=4.11 && <5 + , array + , containers + , deepseq + , prettyprinter + , primitive + , random + , text + , cardano-prelude + hs-source-dirs: src + +test-suite tests + import: project-config + + hs-source-dirs: test + main-is: Main.hs + other-modules: Test.Compact.KeyMap + type: exitcode-stdio-1.0 + default-language: Haskell2010 + build-depends: base + , containers + , tasty + , tasty-expected-failure + , tasty-quickcheck + , tasty-hunit + , compact-map + , QuickCheck + ghc-options: -threaded diff --git a/libs/small-steps/src/Data/Compact/Class.hs b/libs/compact-map/src/Data/Compact/Class.hs similarity index 100% rename from libs/small-steps/src/Data/Compact/Class.hs rename to libs/compact-map/src/Data/Compact/Class.hs diff --git a/libs/small-steps/src/Data/Compact/HashMap.hs b/libs/compact-map/src/Data/Compact/HashMap.hs similarity index 98% rename from libs/small-steps/src/Data/Compact/HashMap.hs rename to libs/compact-map/src/Data/Compact/HashMap.hs index ac0f45249ea..a0e8198de32 100644 --- a/libs/small-steps/src/Data/Compact/HashMap.hs +++ b/libs/compact-map/src/Data/Compact/HashMap.hs @@ -51,4 +51,4 @@ toList :: HashMap k v -> [(k, v)] toList (HashMap m) = KM.foldWithAscKey (\ ans k v -> (fromKey k,v):ans) [] m mapWithKey :: (k -> v -> u) -> HashMap k v -> HashMap k u -mapWithKey f (HashMap m) = HashMap (KM.mapWithKey (\ key v -> f (fromKey key) v) m) \ No newline at end of file +mapWithKey f (HashMap m) = HashMap (KM.mapWithKey (\ key v -> f (fromKey key) v) m) diff --git a/libs/small-steps/src/Data/Compact/KeyMap.hs b/libs/compact-map/src/Data/Compact/KeyMap.hs similarity index 100% rename from libs/small-steps/src/Data/Compact/KeyMap.hs rename to libs/compact-map/src/Data/Compact/KeyMap.hs diff --git a/libs/compact-map/test/Main.hs b/libs/compact-map/test/Main.hs new file mode 100644 index 00000000000..4535aa581f4 --- /dev/null +++ b/libs/compact-map/test/Main.hs @@ -0,0 +1,16 @@ +module Main where + +import Test.Compact.KeyMap +import Test.Tasty + +-- ==================================================================================== + +tests :: TestTree +tests = + testGroup + "compcat-map" + [ keyMapTests + ] + +main :: IO () +main = defaultMain tests diff --git a/libs/compact-map/test/Test/Compact/KeyMap.hs b/libs/compact-map/test/Test/Compact/KeyMap.hs new file mode 100644 index 00000000000..4cc54b23fa1 --- /dev/null +++ b/libs/compact-map/test/Test/Compact/KeyMap.hs @@ -0,0 +1,7 @@ +module Test.Compact.KeyMap where + +import Data.Compact.KeyMap +import Test.Tasty + +keyMapTests :: TestTree +keyMapTests = testGroup "KeyMap" [] diff --git a/libs/small-steps/small-steps.cabal b/libs/small-steps/small-steps.cabal index bd10ca23c2f..5e79194580c 100644 --- a/libs/small-steps/small-steps.cabal +++ b/libs/small-steps/small-steps.cabal @@ -45,9 +45,6 @@ library , Data.CanonicalMaps , Data.MemoBytes , Data.Coders - , Data.Compact.Class - , Data.Compact.KeyMap - , Data.Compact.HashMap , Data.Pulse , Control.Provenance , Control.Iterate.SetAlgebra From 96f049e7ab7cb9913de505db79258be1b02e6984 Mon Sep 17 00:00:00 2001 From: Alexey Kuleshevich Date: Tue, 19 Oct 2021 20:13:34 +0300 Subject: [PATCH 14/19] Add first property test --- libs/compact-map/src/Data/Compact/KeyMap.hs | 3 ++ libs/compact-map/test/Test/Compact/KeyMap.hs | 29 ++++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/libs/compact-map/src/Data/Compact/KeyMap.hs b/libs/compact-map/src/Data/Compact/KeyMap.hs index e3098e32b3e..64eace377c1 100644 --- a/libs/compact-map/src/Data/Compact/KeyMap.hs +++ b/libs/compact-map/src/Data/Compact/KeyMap.hs @@ -168,6 +168,9 @@ data KeyMap v | Full {-# UNPACK #-} !(Small.SmallArray (KeyMap v)) -- intSize subtrees deriving (NFData,Generic) +instance Eq v => Eq (KeyMap v) where + (==) = undefined + heapAdd :: HeapWords a => a -> Int -> Int heapAdd x ans = heapWords x + ans diff --git a/libs/compact-map/test/Test/Compact/KeyMap.hs b/libs/compact-map/test/Test/Compact/KeyMap.hs index 4cc54b23fa1..e86be500f4d 100644 --- a/libs/compact-map/test/Test/Compact/KeyMap.hs +++ b/libs/compact-map/test/Test/Compact/KeyMap.hs @@ -1,7 +1,32 @@ module Test.Compact.KeyMap where -import Data.Compact.KeyMap +import Data.Compact.KeyMap as KeyMap import Test.Tasty +import Test.Tasty.QuickCheck +import Test.QuickCheck + + +instance Arbitrary Key where + arbitrary = + oneof + [ Key <$> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary + , Key <$> chooseAny <*> chooseAny <*> chooseAny <*> chooseAny + ] + +instance Arbitrary a => Arbitrary (KeyMap a) where + arbitrary = do + let go i m + | i > 0 = do + key <- arbitrary + val <- arbitrary + go (i - 1) $! insert key val m + | otherwise = pure m + NonNegative n <- arbitrary + go (n :: Int) KeyMap.Empty + +prop_RountripToFromList :: KeyMap Int -> Property +prop_RountripToFromList km = KeyMap.fromList (KeyMap.toList km) === km keyMapTests :: TestTree -keyMapTests = testGroup "KeyMap" [] +keyMapTests = + testGroup "KeyMap" [testProperty "to/fromList" prop_RountripToFromList] From 728d5439630bf505aec92626690abf452b5a5990 Mon Sep 17 00:00:00 2001 From: TimSheard Date: Tue, 19 Oct 2021 16:35:24 -0400 Subject: [PATCH 15/19] Debugged UnionWith. --- libs/compact-map/src/Data/Compact/KeyMap.hs | 94 +++++++++++---------- 1 file changed, 50 insertions(+), 44 deletions(-) diff --git a/libs/compact-map/src/Data/Compact/KeyMap.hs b/libs/compact-map/src/Data/Compact/KeyMap.hs index 64eace377c1..80d9b82e98a 100644 --- a/libs/compact-map/src/Data/Compact/KeyMap.hs +++ b/libs/compact-map/src/Data/Compact/KeyMap.hs @@ -4,6 +4,7 @@ {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} {-# OPTIONS_GHC -Wno-orphans #-} @@ -38,6 +39,7 @@ import Data.Text(Text,pack) import qualified Prettyprinter.Internal as Pretty import Data.Set(Set) import qualified Data.Set as Set +import Debug.Trace -- type PArray = PA.Array @@ -492,17 +494,45 @@ testSplit2 i = putStrLn (unlines [show hm, " ",show pathx," ",show a, " ",show b -- ========================================================= -- UnionWith --- TODO a function that does not use lists -mergeArray :: (v -> v -> v) -> Bitmap -> PArray v -> Bitmap -> PArray v -> (Bitmap,PArray v) -mergeArray combine bm1 arr1 bm2 arr2 = (bmBoth,fst (withMutArray size action)) - where bmBoth = bm1 .&. bm2 +-- | Make an array of size 1, with 'x' stored at index 0. +array1 :: a -> PArray a +array1 x = fst(withMutArray 1 (\ marr -> mwrite marr 0 x)) + +-- | Make an array of size 2, with 'x' stored at index 0. +array2 :: a -> a -> PArray a +array2 x y = fst(withMutArray 2 (\ marr -> mwrite marr 0 x >> mwrite marr 1 y)) + + +-- | Turn a (KeyMap v) into a BitMap and an PArray (KeyMap v) +toSegArray :: Int -> KeyMap v -> (Bitmap,PArray (KeyMap v)) +toSegArray _ Empty = error ("not possible: Empty in toSegArray") +toSegArray n (l@(Leaf k _)) = (setBit 0 (path k !! n),array1 l) +toSegArray _ (One i x) = (setBits [i],array1 x) +toSegArray _ (Two bm x y) = (bm, array2 x y) +toSegArray _ (BitmapIndexed bm arr) = (bm,arr) +toSegArray _ (Full arr) = (fullNodeMask,arr) + +union2 :: Int -> (Key -> v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v +union2 _ _ Empty Empty = Empty +union2 _ _ x Empty = x +union2 _ _ Empty y = y +union2 n combine x y = bitmapIndexedOrFull bmap arrAll + where (bmx,arrx) = toSegArray n x + (bmy,arry) = toSegArray n y + (bmap,arrAll) = mergeArrayWithBitMaps union3 bmx arrx bmy arry + union3 (Leaf k1 v1) (Leaf k2 v2) | k1==k2 = Leaf k1 (combine k1 v1 v2) + union3 x y = union2 (n+1) combine x y + +mergeArrayWithBitMaps :: (v -> v -> v) -> Bitmap -> PArray v -> Bitmap -> PArray v -> (Bitmap,PArray v) +mergeArrayWithBitMaps combine bm1 arr1 bm2 arr2 = (bmBoth,fst (withMutArray size action)) + where bmBoth = bm1 .|. bm2 size = popCount bmBoth segments = bitmapToList bmBoth - action marr3 = loop segments + action marr3 = (loop segments) where loop [] = pure () loop (i:is) = do - let j1 = indexFromSegment bm1 i - j2 = indexFromSegment bm2 i + let j1 = (indexFromSegment bm1 i) + j2 = (indexFromSegment bm2 i) j3 = indexFromSegment bmBoth i case (testBit bm1 i, testBit bm2 i) of (True, True) -> mwrite marr3 j3 (combine (index arr1 j1) (index arr2 j2)) @@ -518,49 +548,19 @@ bmapB = setBits [1,3,5,9,11,14] arrA, arrB :: PArray Int arrA = fromlist [0,3,6,11,15] arrB = fromlist [1,3,5,9,11,14] - -toListOfSegments :: Int -> KeyMap v -> [(Segment,KeyMap v)] -toListOfSegments _ Empty = [] -toListOfSegments n (l@(Leaf k _)) = [(path k !! n,l)] -toListOfSegments _ (One i x) = [(i,x)] -toListOfSegments _ (Two bm x y) = zip (bitmapToList bm) [x,y] -toListOfSegments _ (BitmapIndexed bm arr) = zip (bitmapToList bm) (tolist arr) -toListOfSegments _ (Full arr) = zip (bitmapToList fullNodeMask) (tolist arr) - - -mergeWith:: (KeyMap v -> KeyMap v -> KeyMap v) -> [(Segment,KeyMap v)] -> [(Segment,KeyMap v)] -> [(Segment,KeyMap v)] -mergeWith _combine [] [] = [] -mergeWith _combine xs [] = xs -mergeWith _combine [] ys = ys -mergeWith combine (allxs@((i,x):xs)) (allys@((j,y):ys)) = - case compare i j of - EQ -> (i,combine x y) : mergeWith combine xs ys - LT -> (i,x) : mergeWith combine xs allys - GT -> (j,y) : mergeWith combine allxs ys - -unionWithN :: Int -> (Key -> v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v -unionWithN _ _ Empty Empty = Empty -unionWithN _ _ x Empty = x -unionWithN _ _ Empty y = y -unionWithN _ combine (Leaf k1 v1) (Leaf k2 v2) | k1==k2 = Leaf k1 (combine k1 v1 v2) -unionWithN _ combine (Leaf k v) y = insertWithKey combine (BitState (path k) k) v y -unionWithN _ combine x (Leaf k v) = insertWithKey combine (BitState (path k) k) v x -unionWithN n combine x y = build (mergeWith (unionWithN (n+1) combine) xpairs ypairs) - where xpairs = toListOfSegments n x - ypairs = toListOfSegments n y +testmergeBm = mergeArrayWithBitMaps (+) bmapA arrA bmapB arrB unionWithKey :: (Key -> v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v -unionWithKey comb x y = unionWithN 0 comb x y +unionWithKey comb x y = union2 0 comb x y unionWith :: (v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v -unionWith comb x y = unionWithN 0 (\ _k a b -> comb a b) x y +unionWith comb x y = union2 0 (\ _k a b -> comb a b) x y -hm10, hm11, hm12 :: KeyMap Int +hm10, hm11, hm12:: KeyMap Int hm10 = fromList (take 5 pairs) hm11 = fromList (take 5 (drop 4 pairs)) hm12 = unionWith (+) hm10 hm11 - -- =========================================================== -- Maximum and Minimum Key @@ -662,9 +662,12 @@ sparseIndex b m = popCount (b .&. (m - 1)) -- | Create a 'BitmapIndexed' or 'Full' node. bitmapIndexedOrFull :: Bitmap -> PArray (KeyMap v) -> KeyMap v -bitmapIndexedOrFull b ary - | b == fullNodeMask = Full ary - | otherwise = BitmapIndexed b ary +bitmapIndexedOrFull b arr | isize arr == 0 = Empty +bitmapIndexedOrFull b arr | isize arr == 1 = One (head (bitmapToList b)) (index arr 0) +bitmapIndexedOrFull b arr | isize arr == 2 = Two b (index arr 0) (index arr 1) +bitmapIndexedOrFull b arr + | b == fullNodeMask = Full arr + | otherwise = BitmapIndexed b arr {-# INLINE bitmapIndexedOrFull #-} -- | A bitmask with the 'bitsPerSegment' least significant bits set. @@ -953,6 +956,9 @@ instance PrettyA Int where instance PrettyA Word64 where prettyA = ppWord64 + +instance PrettyA v => PrettyA (KeyMap v) where + prettyA km = ppKeyMap prettyA km ppWord64 :: Word64 -> Doc a ppWord64 = viaShow From 7623d8bb73612fdbb85a3bdc6bc3c0b629d6a776 Mon Sep 17 00:00:00 2001 From: TimSheard Date: Wed, 20 Oct 2021 16:58:46 -0400 Subject: [PATCH 16/19] Redid insertWithKey and lookupHM to no longer use BitState, addes strictness ! to Two and One, wrote a bulk insertion. --- libs/compact-map/src/Data/Compact/HashMap.hs | 5 +- libs/compact-map/src/Data/Compact/KeyMap.hs | 259 ++++++++++++------- 2 files changed, 169 insertions(+), 95 deletions(-) diff --git a/libs/compact-map/src/Data/Compact/HashMap.hs b/libs/compact-map/src/Data/Compact/HashMap.hs index a0e8198de32..4d0dfb125df 100644 --- a/libs/compact-map/src/Data/Compact/HashMap.hs +++ b/libs/compact-map/src/Data/Compact/HashMap.hs @@ -23,9 +23,8 @@ insert :: k -> v -> HashMap k v -> HashMap k v insert k v (HashMap m) = HashMap(KM.insert (toKey k) v m) insertWithKey :: (k -> v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v -insertWithKey combine key v (HashMap m) = HashMap(KM.insertWithKey comb state v m) - where comb k v1 v2 = combine (fromKey k) v1 v2 - state = (KM.initBitState (toKey key)) +insertWithKey combine key v (HashMap m) = HashMap(KM.insertWithKey comb (toKey key) v m) + where comb k v1 v2 = combine (fromKey k) v1 v2 restrictKeys :: HashMap k v -> Set k -> HashMap k v restrictKeys (HashMap m) set = HashMap(KM.domainRestrict m (Set.map toKey set)) diff --git a/libs/compact-map/src/Data/Compact/KeyMap.hs b/libs/compact-map/src/Data/Compact/KeyMap.hs index 80d9b82e98a..f0e3f0e8179 100644 --- a/libs/compact-map/src/Data/Compact/KeyMap.hs +++ b/libs/compact-map/src/Data/Compact/KeyMap.hs @@ -39,7 +39,7 @@ import Data.Text(Text,pack) import qualified Prettyprinter.Internal as Pretty import Data.Set(Set) import qualified Data.Set as Set -import Debug.Trace +import Data.List(sortBy) -- type PArray = PA.Array @@ -123,15 +123,15 @@ genKey g = (Key w0 w1 w2 w3,g4) (w3,g4) = genWord64 g3 -- | Break up a Key into a Path -path :: Key -> Path -path (Key w0 w1 w2 w3) = getpath w0 ++ getpath w1 ++ getpath w2 ++ getpath w3 +keyPath :: Key -> Path +keyPath (Key w0 w1 w2 w3) = getpath w0 ++ getpath w1 ++ getpath w2 ++ getpath w3 -- | A pair of a Key and its equivalent Path data BitState = BitState Path !Key -- Initialize a BitState from a Key initBitState :: Key -> BitState -initBitState key = BitState (path key) key +initBitState key = BitState (keyPath key) key -- | Obtain the Key from a BitState getBytes :: BitState -> Key @@ -140,7 +140,7 @@ getBytes (BitState _ bs) = bs -- | Make a new BitState from a Key, using the old BitState to figure out -- how far down the path have we already gone. next2 :: BitState -> Key -> BitState -next2 (BitState ps _) key = (BitState (drop n (path key)) key) +next2 (BitState ps _) key = (BitState (drop n (keyPath key)) key) where n = (fromIntegral keyPathSize) - length ps showBM :: Bitmap -> String @@ -157,21 +157,20 @@ instance HeapWords Key where instance Show BitState where show (BitState p key) = "(BitState "++show p++" "++show key++")" - -- =============================================================== data KeyMap v = Empty - | Leaf {-# UNPACK #-} !Key v - | One {-# UNPACK #-} !Int (KeyMap v) -- 1 subtree - | Two {-# UNPACK #-} !Bitmap (KeyMap v) (KeyMap v) -- 2 subtrees - | BitmapIndexed {-# UNPACK #-} !Bitmap -- 3 - (intSize - 1) subtrees + | Leaf {-# UNPACK #-} !Key !v + | One {-# UNPACK #-} !Int !(KeyMap v) -- 1 subtree + | Two {-# UNPACK #-} !Bitmap !(KeyMap v) !(KeyMap v) -- 2 subtrees + | BitmapIndexed {-# UNPACK #-} !Bitmap -- 3 - (intSize - 1) subtrees {-# UNPACK #-} !(Small.SmallArray (KeyMap v)) | Full {-# UNPACK #-} !(Small.SmallArray (KeyMap v)) -- intSize subtrees deriving (NFData,Generic) instance Eq v => Eq (KeyMap v) where - (==) = undefined + (==) x y = toList x == toList y heapAdd :: HeapWords a => a -> Int -> Int heapAdd x ans = heapWords x + ans @@ -207,71 +206,73 @@ tag (Two _ _a _b) = "Two" indexFromSegment :: Bitmap -> Int -> Int indexFromSegment bmap j = sparseIndex bmap (setBit 0 j) -insert' :: BitState -> v -> KeyMap v -> KeyMap v -insert' state v m = insertWithKey (\ _k new _old -> new) state v m - -insertWithKey :: (Key -> v -> v -> v) -> BitState -> v -> KeyMap v -> KeyMap v -insertWithKey combine bs0 v0 m0 = goR bs0 v0 m0 +insertWithKey' :: (Key -> v -> v -> v) -> Path -> Key -> v -> KeyMap v -> KeyMap v +insertWithKey' combine path k x kmap = go 0 kmap where - goR state val mp = (go state val mp) - go (BitState _ k) !x Empty = Leaf k x - go (BitState [] k) _ _ = error ("In insert', ran out of bits for key "++show k) - go (BitState (i:is) k) x (One j node) = - case compare j i of - EQ -> One j (goR (BitState is k) x node) - LT -> Two (setBits [i,j]) node (goR (BitState is k) x Empty) - GT -> Two (setBits [i,j]) (goR (BitState is k) x Empty) node - go (state@(BitState _ k1)) x t@(Leaf k2 y) - | k1 == k2 = if x `ptrEq` y - then t - else (Leaf k2 (combine k2 x y)) - | otherwise = makeTwo state t (next2 state k2) x - go (BitState (j:js) k) x t@(BitmapIndexed bmap arr) + go _ Empty = Leaf k x + go n (One j node) = + case compare j i of + EQ -> One j (go (n+1) node) + LT -> Two (setBits [i,j]) node (go (n+1) Empty) + GT -> Two (setBits [i,j]) (go (n+1) Empty) node + where i = path !! n + go n t@(Leaf k2 y) + | k == k2 = if x `ptrEq` y + then t + else (Leaf k (combine k x y)) + | otherwise = twoLeaf (drop n (keyPath k2)) t (drop n path) k x + go n t@(BitmapIndexed bmap arr) | not(testBit bmap j) = let !arr' = insertAt arr i $! (Leaf k x) in bitmapIndexedOrFull (bmap .|. (setBit 0 j)) arr' | otherwise = let !st = index arr i - !st' = goR (BitState js k) x st + !st' = go (n+1) st in if st' `ptrEq` st then t else BitmapIndexed bmap (update arr i st') where i = indexFromSegment bmap j - go (BitState (j:js) k) x t@(Two bmap x0 x1) + j = (path !! n) + go n t@(Two bmap x0 x1) | not(testBit bmap j) = let !arr' = insertAt (fromlist [x0,x1]) i $! (Leaf k x) in bitmapIndexedOrFull (bmap .|. (setBit 0 j)) arr' | otherwise = let !st = if i==0 then x0 else x1 - !st' = goR (BitState js k) x st + !st' = go (n+1) st in if st' `ptrEq` st then t else if i==0 then Two bmap st' x1 else Two bmap x0 st' where i = indexFromSegment bmap j - go (BitState (j:js) k) x t@(Full arr) = + j = path !! n + go n t@(Full arr) = let !st = index arr i - !st' = goR (BitState js k) x st + !st' = go (n+1) st in if st' `ptrEq` st then t else Full (update arr i st') where i = indexFromSegment fullNodeMask j + j = path !! n -makeTwo :: BitState -> KeyMap v -> BitState -> v -> KeyMap v -makeTwo (BitState [] k) _leaf _state _val = error ("Case 1. In makeTwo, out of bits for key "++show k) -makeTwo _state _leaf (BitState [] k) _val = error ("Case 2. In makeTwo, out of bits for key "++show k) -makeTwo (BitState (i:is) k1) leaf1 (BitState (j:js) k2) val2 - | i==j = One i (makeTwo (BitState is k1) leaf1 (BitState js k2) val2) +twoLeaf :: Path -> KeyMap v -> Path -> Key -> v -> KeyMap v +twoLeaf [] _ _ _ _ = error ("the path ran out of segments in twoLeaf case 1.") +twoLeaf _ _ [] _ _ = error ("the path ran out of segments in twoLeaf case 1.") +twoLeaf (i:is) leaf1 (j:js) k2 v2 + | i==j = One i (twoLeaf is leaf1 js k2 v2) | otherwise = if i < j - then Two (setBits [i,j]) (Leaf k1 val2) leaf1 - else Two (setBits [i,j]) leaf1 (Leaf k1 val2) + then Two (setBits [i,j]) leaf1 (Leaf k2 v2) + else Two (setBits [i,j]) (Leaf k2 v2) leaf1 + +insertWithKey :: (Key -> v -> v -> v) -> Key -> v -> KeyMap v -> KeyMap v +insertWithKey f k v m = insertWithKey' f (keyPath k) k v m -foo :: String -> a -> String -foo s !_ = s +insertWith :: (t -> t -> t) -> Key -> t -> KeyMap t -> KeyMap t +insertWith f k v m = insertWithKey' (\ _ key val -> f key val) (keyPath k) k v m insert :: Key -> v -> KeyMap v -> KeyMap v -insert bs v hashmap = insert' (initBitState bs) v hashmap -- (trace ("INSERT "++show bs) hashmap) +insert k v m = insertWithKey' (\ _key new _old -> new) (keyPath k) k v m fromList :: [(Key,v)] -> KeyMap v fromList ps = foldl' accum Empty ps @@ -384,28 +385,28 @@ foldWithDescKey accum !ans0 (Full arr) = loop ans0 (n-1) -- ================================================================== -- Lookup a key -lookup' :: BitState -> KeyMap v -> Maybe v -lookup' _ Empty = Nothing -lookup' (BitState _ k1) (Leaf k2 v) = if k1==k2 then Just v else Nothing -lookup' (BitState [] k) _ = error ("lookup', out of bits for key "++show k) -lookup' (BitState (j:js) k) (One i x) = if i==j then lookup' (BitState js k) x else Nothing -lookup' (BitState (j:js) k) (Two bm x0 x1) = - if testBit bm j - then (if i==0 then lookup' (BitState js k) x0 else lookup' (BitState js k) x1) - else Nothing - where i = indexFromSegment bm j -lookup' (BitState (j:js) k) (BitmapIndexed bm arr) = - if testBit bm j - then lookup' (BitState js k) (index arr i) - else Nothing - where i = indexFromSegment bm j -lookup' (BitState (j:js) k) (Full arr) = -- Every possible bit is set, to no testBit call necessary - lookup' (BitState js k) (index arr i) - where i = indexFromSegment fullNodeMask j - -lookupHM :: Key -> KeyMap v -> Maybe v -lookupHM bytes mp = lookup' (initBitState bytes) mp - +lookupHM :: Key-> KeyMap v -> Maybe v +lookupHM key km = go (keyPath key) km + where go _ Empty = Nothing + go _ (Leaf key2 v) = if key == key2 then Just v else Nothing + go [] _ = Nothing -- Path is empty, we will never find it. + go (j:js) (One i x) = if i==j then go js x else Nothing + go (j:js) (Two bm x0 x1) = + if testBit bm j + then (if i==0 then go js x0 else go js x1) + else Nothing + where i = indexFromSegment bm j + + go (j:js) (BitmapIndexed bm arr) = + if testBit bm j + then go js (index arr i) + else Nothing + where i = indexFromSegment bm j + + go (j:js) (Full arr) = -- Every possible bit is set, so no testBit call necessary + go js (index arr i) + where i = indexFromSegment fullNodeMask j + -- ========================================================= -- map @@ -506,7 +507,7 @@ array2 x y = fst(withMutArray 2 (\ marr -> mwrite marr 0 x >> mwrite marr 1 y)) -- | Turn a (KeyMap v) into a BitMap and an PArray (KeyMap v) toSegArray :: Int -> KeyMap v -> (Bitmap,PArray (KeyMap v)) toSegArray _ Empty = error ("not possible: Empty in toSegArray") -toSegArray n (l@(Leaf k _)) = (setBit 0 (path k !! n),array1 l) +toSegArray n (l@(Leaf k _)) = (setBit 0 (keyPath k !! n),array1 l) toSegArray _ (One i x) = (setBits [i],array1 x) toSegArray _ (Two bm x y) = (bm, array2 x y) toSegArray _ (BitmapIndexed bm arr) = (bm,arr) @@ -521,7 +522,7 @@ union2 n combine x y = bitmapIndexedOrFull bmap arrAll (bmy,arry) = toSegArray n y (bmap,arrAll) = mergeArrayWithBitMaps union3 bmx arrx bmy arry union3 (Leaf k1 v1) (Leaf k2 v2) | k1==k2 = Leaf k1 (combine k1 v1 v2) - union3 x y = union2 (n+1) combine x y + union3 a b = union2 (n+1) combine a b mergeArrayWithBitMaps :: (v -> v -> v) -> Bitmap -> PArray v -> Bitmap -> PArray v -> (Bitmap,PArray v) mergeArrayWithBitMaps combine bm1 arr1 bm2 arr2 = (bmBoth,fst (withMutArray size action)) @@ -548,6 +549,8 @@ bmapB = setBits [1,3,5,9,11,14] arrA, arrB :: PArray Int arrA = fromlist [0,3,6,11,15] arrB = fromlist [1,3,5,9,11,14] + +testmergeBm :: (Bitmap, PArray Int) testmergeBm = mergeArrayWithBitMaps (+) bmapA arrA bmapB arrB unionWithKey :: (Key -> v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v @@ -662,7 +665,7 @@ sparseIndex b m = popCount (b .&. (m - 1)) -- | Create a 'BitmapIndexed' or 'Full' node. bitmapIndexedOrFull :: Bitmap -> PArray (KeyMap v) -> KeyMap v -bitmapIndexedOrFull b arr | isize arr == 0 = Empty +bitmapIndexedOrFull _ arr | isize arr == 0 = Empty bitmapIndexedOrFull b arr | isize arr == 1 = One (head (bitmapToList b)) (index arr 0) bitmapIndexedOrFull b arr | isize arr == 2 = Two b (index arr 0) (index arr 1) bitmapIndexedOrFull b arr @@ -915,7 +918,7 @@ add n stat = (Stat 1 n (Just n) (Just n)) <> stat bug :: Int -> IO (KeyMap Int) bug n = do let ps = take n pairs -- zip (makeKeys 3 n) [0..] - hh (k@(Key m0 m1 _ _ ),v) = show m0++" "++show m1++" "++show (path k)++" "++show v + hh (k@(Key m0 m1 _ _ ),v) = show m0++" "++show m1++" "++show (keyPath k)++" "++show v putStrLn (unlines (map hh ps)) -- putStrLn (show (fromList ps)) @@ -923,7 +926,7 @@ bug n = do try :: [(Key,Int)] -> IO () try ps = do - let hh (k@(Key m0 m1 _ _),v) = show m0++" "++show m1++" "++show (path k)++" "++show v + let hh (k@(Key m0 m1 _ _),v) = show m0++" "++show m1++" "++show (keyPath k)++" "++show v putStrLn (unlines (map hh ps)) putStrLn (show (fromList ps)) @@ -936,7 +939,7 @@ testlookup seed n = all ok results results = [ (i,lookupHM (fst(ps !! i)) keymap) | i <- [0..(n-1)]] ok (_,Just _) = True ok (i,Nothing) = error ("testlookup failure: "++show i++" "++show pair++"\n"++ - show (path (fst pair))++"\n "++show keymap) + show (keyPath (fst pair))++"\n "++show keymap) where pair = (ps !! i) -- ====================================================================================== @@ -1033,23 +1036,95 @@ instance PrettyA v => Show (KeyMap v) where show x = show(ppKeyMap prettyA x) showList xs x = unlines (map (\ y -> "\n"++ show(ppKeyMap prettyA y)) xs) ++ x +-- ==================================================================== +-- Bulk insert + +bulkInsert :: Int -> PArray (Path,KeyMap v) -> Int -> Int -> KeyMap v +bulkInsert _n arr lo hi | lo < 0 || lo > n || hi <0 || hi > n = + error ("lo or hi out of bounds (0 .. "++show n++") lo="++show lo++" hi="++show hi) + where n = isize arr - 1 +bulkInsert _n arr lo hi | lo==hi = snd (index arr lo) +bulkInsert n arr lo hi = -- trace ("BulkInsert n="++show n++" lo="++show lo++" hi="++show hi++" segements="++show segmentRanges) $ + BitmapIndexed bmap (fst(withMutArray size (action 0 segmentRanges))) + where (size,segments,bmap) = getBitmap n arr lo hi + segmentRanges = ranges n arr lo hi segments + action _j [] _marr = pure () + action j ((lox,hix):more) marr = do + mwrite marr j (bulkInsert (n+1) arr lox hix) + action (j+1) more marr + +getBitmap :: Int -> PArray (Path,KeyMap v) -> Int -> Int -> (Int,[Segment],Bitmap) +getBitmap n arr lo hi = (size,segments,bitmap) + where accum bm (path,_) = setBit bm (path !! n) + bitmap = foldRange accum 0 arr lo hi + segments = bitmapToList bitmap + size = length segments + +-- | given starting row 'i' find the last row 'j', such that column 'n' has 'val' in all rows 'i' to 'j' +contiguous :: Int -> Int -> Int -> Int -> PArray ([Int], b) -> Int +contiguous _n _val i _maxi _arr | i < 0 = i +contiguous _n _val i _maxi arr | i >= isize arr = isize arr - 1 +contiguous _n _val i maxi _arr | i > maxi = i-1 -- Do not look outside the valid range for matching val +contiguous n val i maxi arr = if (fst(index arr i) !! n) == val then contiguous n val (i+1) maxi arr else (i-1) + +-- | compute the row range where the 'n' column has the same value 'val', we assume the rows are sorted +-- in ascending order, and so is the list of 'vals' + +ranges :: Int -> PArray ([Int], b) -> Int -> Int -> [Int] -> [(Int, Int)] +ranges _n _arr _i _hi [] = [] +ranges n arr i hi (val:vals) = (i,j) : ranges n arr (j+1) hi vals + where j = contiguous n val i hi arr + +foldRange :: (ans -> t -> ans) -> ans -> PArray t -> Int -> Int -> ans +foldRange _accum ans _arr lo hi | lo > hi = ans +foldRange accum ans arr lo hi = foldRange accum (accum ans (index arr lo)) arr (lo+1) hi -keysX :: [Key] -keysX = - [ Key 17900508425448557813 1425228445697650578 4096886001776694930 5607342842136005805 - , Key 6883900645186699936 13296170193789961158 4397314084330617059 8869821626379988209 - , Key 10500005495319713790 2912085157004832622 13426000237053837606 12059657784398898378 - , Key 3923906598994021794 12765791139276487287 816482653431531599 7003511147053802144 - , Key 5166915752834780615 7133194944084009196 13810062108841219641 296498671410031824 - , Key 18030165020800047584 18085286706182838302 16232822895209986816 17388829728381408048 - , Key 5142157305423936627 7231225143269744777 15250651091019539686 14241693248962825662 - , Key 13428671722466854389 16561117437870591512 11235927355594486308 16930552725399654134 - , Key 4838981082210206139 12557487373235351610 6348966276768033248 1499713340968517390 - , Key 336475062096603304 6399910448856822947 3425786324025245994 16363487473709422408 - , Key 16607855275778415913 15113927333656355571 16111805289570157530 7151802073429851699 - , Key 10517657211907470890 1089616862122803787 13992791218083853691 13236284657137382314 - , Key 1782840219730873272 5422922922394198551 6207884257158004626 16093772551099792787 - , Key 17216197724774766219 6382375034658581036 883871178158682222 6551207497782514085 - , Key 292346888039769756 7462467761555063764 6493768272219444322 7737867387963351907 - , Key 6067080276442487773 97011115971541225 17793222466254767399 16164726605331358005 - ] +-- ========================================== +-- test that incremental and bulk loading create the same KeyMap + +testbulk :: Int -> Int -> Bool +testbulk seed n = (bulk == incremental) + where keys = makeKeys seed n + pairsb = zip keys [0..] + paths:: [(Path,KeyMap Int)] + paths = sortBy cmp $ map f pairsb + f (k,v) = (keyPath k,Leaf k v) + cmp (p1,_) (p2,_) = compare p1 p2 + pathArr = fromlist paths + incremental = fromList pairsb + bulk = bulkInsert 0 pathArr 0 (isize pathArr - 1) + + +-- =================================================== +-- try and measure that bulk loading allocates less memory +-- Does not count the creation and sorting of the array +-- TODO can we do something kike this with a list rather than an array? +-- or sort the array in place? + + +keysbulk :: [Key] +keysbulk = makeKeys 199 50000 + +pairsbulk :: [(Key,Int)] +pairsbulk = zip keysbulk [0..] + +pathsbulk :: [(Path,KeyMap Int)] +pathsbulk = sortBy cmpbulk $ map fbulk pairsbulk + where fbulk (k,v) = (keyPath k,Leaf k v) + cmpbulk (p1,_) (p2,_) = compare p1 p2 + +-- use the ghci command :set +s to enable statistics +-- (2.30 secs, 1,159,454,816 bytes) size = 10000 +-- (13.16 secs, 6,829,808,992 bytes) size = 50000 +-- (1.74 secs, 1,623,304,184 bytes) size = 50000, no print +tryincremental :: Int +tryincremental = sizeKeyMap(fromList pairsbulk) + +-- (1.93 secs, 968,147,688 bytes) size = 10000 +-- (11.96 secs, 5,937,089,424 bytes) size = 50000 +-- (0.94 secs, 661,637,584 bytes) size = 50000, no print +trybulk :: Int +trybulk = sizeKeyMap(bulkInsert 0 pathArr 0 (isize pathArr - 1)) + where pathArr :: PArray (Path,KeyMap Int) + pathArr = fromlist pathsbulk + From 1eb88a6eb7a6f9eca3b3f3fdec530817a4b5f679 Mon Sep 17 00:00:00 2001 From: TimSheard Date: Thu, 21 Oct 2021 17:33:23 -0400 Subject: [PATCH 17/19] Cleaned up, debugged mylub, removed BtState. --- libs/compact-map/src/Data/Compact/HashMap.hs | 3 +- libs/compact-map/src/Data/Compact/KeyMap.hs | 246 +++++++++++++------ 2 files changed, 168 insertions(+), 81 deletions(-) diff --git a/libs/compact-map/src/Data/Compact/HashMap.hs b/libs/compact-map/src/Data/Compact/HashMap.hs index 4d0dfb125df..48bf3eb70bf 100644 --- a/libs/compact-map/src/Data/Compact/HashMap.hs +++ b/libs/compact-map/src/Data/Compact/HashMap.hs @@ -31,7 +31,8 @@ restrictKeys (HashMap m) set = HashMap(KM.domainRestrict m (Set.map toKey set)) splitLookup:: k -> HashMap k a -> (HashMap k a, Maybe a, HashMap k a) splitLookup k (HashMap m) = (HashMap a,b,HashMap c) - where (a,b,c) = KM.splitKeyMap (KM.initBitState (toKey k)) m + where (a,b,c) = KM.splitKeyMap (KM.keyPath key) key m + key = toKey k intersection:: HashMap k v -> HashMap k v -> HashMap k v intersection (HashMap m1) (HashMap m2) = HashMap(KM.intersect m1 m2) diff --git a/libs/compact-map/src/Data/Compact/KeyMap.hs b/libs/compact-map/src/Data/Compact/KeyMap.hs index f0e3f0e8179..aa33a028524 100644 --- a/libs/compact-map/src/Data/Compact/KeyMap.hs +++ b/libs/compact-map/src/Data/Compact/KeyMap.hs @@ -22,6 +22,7 @@ import Data.Bits complement, popCount, unsafeShiftL, + shiftR, setBit, testBit, clearBit, @@ -40,23 +41,16 @@ import qualified Prettyprinter.Internal as Pretty import Data.Set(Set) import qualified Data.Set as Set import Data.List(sortBy) +-- import Debug.Trace -- type PArray = PA.Array type PArray = Small.SmallArray +-- | Show 'n' as a binary number with most significant bits on the left. bin :: Integral n => n -> [n] bin x = reverse (binary x) -{- -myindex :: String -> PArray v -> Int -> v -myindex message arr i = - if i >= 0 && i < size - then index arr i - else error ("myindex error\n "++message++"\n index "++show i++" not in range (0 .. "++show (size-1)++").") - where size = isize arr --} - -- ========================================================================== -- bitsPerSegment, Segments, Paths. Breaking a Key into a sequence of small components @@ -76,7 +70,7 @@ type Segment = Int -- | Represents a list of 'Segment', which when combined is in 1-1 correspondance with a Key type Path = [Segment] --- | The maximum value of a segment, as an Int +-- | The maximum value of a Segment, as an Int intSize :: Int intSize = 2 ^ bitsPerSegment {-# INLINE intSize #-} @@ -86,21 +80,17 @@ wordSize :: Word64 wordSize = 2 ^ ((fromIntegral bitsPerSegment)::Word64) {-# INLINE wordSize #-} --- | The length of a list of segments representing a key. Need to be carefull if a Key isn't evenly divisible by bitsPerSegment +-- | The length of a list of segments representing a key. Need to be +-- carefull if a Key isn't evenly divisible by bitsPerSegment pathSize :: Word64 pathSize = (if (mod 64 wbits)==0 then (div 64 wbits) else (div 64 wbits) + 1) where wbits = fromIntegral bitsPerSegment :: Word64 --- | Break up a Word64 into a Path -getpath :: Word64 -> Path -getpath w64 = loop pathSize w64 [] - where loop :: Word64 -> Word64 -> [Int] -> [Int] - loop 0 _ ans = ans - loop cnt n ans = loop (cnt - 1) (div n wordSize) ((fromIntegral (mod n wordSize)):ans) -- ======================================================================== -- Keys +-- | Represents 32 Bytes, (wordsPerKey * 8) Bytes compactly data Key = Key {-# UNPACK #-} !Word64 {-# UNPACK #-} !Word64 {-# UNPACK #-} !Word64 @@ -122,27 +112,24 @@ genKey g = (Key w0 w1 w2 w3,g4) (w2,g3) = genWord64 g2 (w3,g4) = genWord64 g3 +-- | Note that (mod n wordSize) and (n .&. modMask) are the same +modMask :: Word64 +modMask = wordSize - 1 + +-- | Break up a Word64 into a Path . Equivalent to +-- loop 0 _ ans = ans +-- loop cnt n ans = loop (cnt - 1) (div n wordSize) ((fromIntegral (mod n wordSize)):ans) +-- But much faster. +getpath :: Word64 -> Path +getpath w64 = loop pathSize w64 [] + where loop :: Word64 -> Word64 -> [Int] -> [Int] + loop 0 _ ans = ans + loop cnt n ans = loop (cnt - 1) (shiftR n bitsPerSegment) ((fromIntegral(n .&. modMask)):ans) + -- | Break up a Key into a Path keyPath :: Key -> Path keyPath (Key w0 w1 w2 w3) = getpath w0 ++ getpath w1 ++ getpath w2 ++ getpath w3 --- | A pair of a Key and its equivalent Path -data BitState = BitState Path !Key - --- Initialize a BitState from a Key -initBitState :: Key -> BitState -initBitState key = BitState (keyPath key) key - --- | Obtain the Key from a BitState -getBytes :: BitState -> Key -getBytes (BitState _ bs) = bs - --- | Make a new BitState from a Key, using the old BitState to figure out --- how far down the path have we already gone. -next2 :: BitState -> Key -> BitState -next2 (BitState ps _) key = (BitState (drop n (keyPath key)) key) - where n = (fromIntegral keyPathSize) - length ps - showBM :: Bitmap -> String showBM bm = show(bitmapToList bm) @@ -154,9 +141,6 @@ bitmapToList bm = loop 63 [] instance HeapWords Key where heapWords (Key _ _ _ _) = 5 -instance Show BitState where - show (BitState p key) = "(BitState "++show p++" "++show key++")" - -- =============================================================== data KeyMap v @@ -285,37 +269,37 @@ toList km = foldWithDescKey accum [] km -- ================================================================= -- Deletion --- | Delete the Key encoded in the BitState from the KeyMap -delete' :: BitState -> KeyMap v -> KeyMap v -delete' (BitState [] _) hm = hm -- Removing a bogus key, leaves 'hm' unchanged -delete' _ Empty = Empty -delete' (BitState _ k) (hm@(Leaf k2 _)) = if k==k2 then Empty else hm -delete' (BitState(i:is) k) (hm@(One j x)) = if i==j then oneE j (delete' (BitState is k) x) else hm -delete' (BitState(i:is) k) (hm@(Two bmap x y)) = +-- | Delete the Key encoded in the Path from the KeyMap +delete' :: Path -> Key -> KeyMap v -> KeyMap v +delete' [] _key hm = hm -- Removing a bogus key, leaves 'hm' unchanged +delete' _ _key Empty = Empty +delete' _ k (hm@(Leaf k2 _)) = if k==k2 then Empty else hm +delete' (i:is) k (hm@(One j x)) = if i==j then oneE j (delete' is k x) else hm +delete' (i:is) k (hm@(Two bmap x y)) = if testBit bmap i - then twoE bmap (delete' (BitState is k) x) (delete' (BitState is k) y) + then twoE bmap (delete' is k x) (delete' is k y) else hm -delete' (BitState(i:is) k) (hm@(BitmapIndexed bmap arr)) = +delete' (i:is) k (hm@(BitmapIndexed bmap arr)) = if testBit bmap i then let m = setBit 0 i j = sparseIndex bmap m - result = delete' (BitState is k) (index arr j) + result = delete' is k (index arr j) -- Consume an upwards floating Empty by removing that element from the array in case result of Empty -> bitmapE (clearBit bmap i) (remove arr j) _ -> BitmapIndexed bmap (update arr j result) else hm -delete' (BitState(i:is) k) (Full arr) = +delete' (i:is) k (Full arr) = let m = setBit 0 i j = sparseIndex fullNodeMask m - result = delete' (BitState is k) (index arr j) + result = delete' is k (index arr j) -- Consume an upwards floating Empty by removing that element from the array in case result of Empty -> BitmapIndexed (clearBit fullNodeMask i) (remove arr j) _ -> Full(update arr j result) delete :: Key -> KeyMap v -> KeyMap v -delete k hm = delete' (initBitState k) hm +delete k hm = delete' (keyPath k) k hm -- One of the invariants is that no Empty ever appears in any of the other -- constructors of KeyMap. So we make "smart" constructors that remove Empty @@ -335,10 +319,6 @@ twoE bmap x Empty = oneE (ith bmap 0) x twoE bmap Empty x = oneE (ith bmap 1) x twoE bmap x y = Two bmap x y --- | The first (smallest) Segment in a BitMap -firstSeg :: Bitmap -> Segment -firstSeg bmap = head(bitmapToList bmap) - -- Float Empty's up over BitmpIndexed, Note that if the size of the arr -- becomes 2, then rebuild with Two rather than BitmapIndexed bitmapE :: Bitmap -> PArray (KeyMap v) -> KeyMap v @@ -425,16 +405,16 @@ instance Functor KeyMap where -- Split a KeyMap into 3 parts -- | return (smaller than 'key', has key?, greater than 'key') -splitKeyMap:: BitState -> KeyMap v -> (KeyMap v,Maybe v,KeyMap v) -splitKeyMap (BitState [] _) hm = (hm,Nothing,Empty) -splitKeyMap (BitState (i:is) key) hm = +splitKeyMap:: Path -> Key -> KeyMap v -> (KeyMap v,Maybe v,KeyMap v) +splitKeyMap [] _key hm = (hm,Nothing,Empty) +splitKeyMap (i:is) key hm = case splitBySegment i hm of (less,x,greater) -> case x of Empty -> (build less,Nothing,build greater) (Leaf k v) -> (build less,if key==k then (Just v) else Nothing,build greater) other -> (reconstruct i less less1,ans,reconstruct i greater greater1) - where (less1,ans,greater1) = splitKeyMap (BitState is key) other + where (less1,ans,greater1) = splitKeyMap is key other splitBySegment :: Segment -> KeyMap v -> ([(Segment,KeyMap v)],KeyMap v, [(Segment,KeyMap v)]) splitBySegment i _x | i < 0 = ([],Empty,[]) @@ -489,8 +469,8 @@ testSplit2 i = putStrLn (unlines [show hm, " ",show pathx," ",show a, " ",show b ps = zip (take 12 keys) [0..] hm :: KeyMap Int hm = fromList ps - state@(BitState pathx _) = (initBitState (keys !! i)) - (a,b,c) = splitKeyMap state hm + pathx = (keyPath (keys !! i)) + (a,b,c) = splitKeyMap pathx (keys !! i) hm -- ========================================================= -- UnionWith @@ -590,34 +570,103 @@ getMax (Full arr) = getMax (index arr (isize arr - 1)) -- | The (key,value) pairs (i.e. a subset) of 'h1' where key is in the domain of both 'h1' and 'h2' intersect :: KeyMap v -> KeyMap v -> KeyMap v intersect map1 map2 = - case next map1 map2 of + case maxMinOf map1 map2 of Nothing -> Empty Just k -> leapfrog k map1 map2 Empty +-- | Accumulate a new Key map, by adding the key value pairs to 'ans', for +-- the Keys that appear in both maps 'x' and 'y'. The key 'k' should +-- be the smallest key in either 'x' or 'y', used to get started. leapfrog :: Key -> KeyMap v -> KeyMap v -> KeyMap v -> KeyMap v leapfrog k x y ans = case (lub k x,lub k y) of (Just(k1,v1,h1),Just(k2,_,h2)) -> - case next h1 h2 of + case maxMinOf h1 h2 of Just k3 -> leapfrog k3 h1 h2 (if k1==k2 then insert k1 v1 ans else ans) Nothing -> (if k1==k2 then insert k1 v1 ans else ans) _ -> ans + +-- | Get the larger of the two min keys of 'x' and 'y'. Nothing if either is Empty. +maxMinOf :: KeyMap v1 -> KeyMap v2 -> Maybe Key +maxMinOf x y = case (getMin x,getMin y) of + (Just (k1,_),Just (k2,_)) -> Just(max k1 k2) + _ -> Nothing + +-- ================================================================================== +-- Given a Key, Split a KeyMap into a least upper bound on the Key and everything else +-- greater than the key. Particularly usefull when computing things that involve the +-- intersection over the Key's in two KeyMaps. See eapfrog above for an example. + -- | Find the smallest key <= 'key', and a KeyMap of everything bigger than 'key' lub :: Key -> KeyMap v -> Maybe (Key, v, KeyMap v) lub key hm = - case splitKeyMap (initBitState key) hm of - (_,Just _,Empty) -> Nothing + case splitKeyMap (keyPath key) key hm of + (_,Just v,Empty) -> Just(key,v,Empty) (_,Just v,hm2) -> Just(key,v,hm2) (_,Nothing,hm1) -> case getMin hm1 of Just (k,v) -> Just(k,v,hm1) Nothing -> Nothing -next :: KeyMap v1 -> KeyMap v2 -> Maybe Key -next x y = case (getMin x,getMin y) of - (Just (k1,_),Just (k2,_)) -> Just(max k1 k2) - _ -> Nothing + +-- | The smallest (key and value) greater-or-equal to 'key', plus a new KeyMap +-- that includes everything greater than that lub key. +mylub :: Key -> KeyMap v -> Maybe (Key, v, KeyMap v) +mylub key mp = go (keyPath key) mp + where go [] _ = Nothing + go _ Empty = Nothing + go _ (Leaf k v) = if k >= key then Just(k,v,Empty) else Nothing + go (i:is) (One j x) = + case compare i j of + EQ -> go is x + LT -> go is x + GT -> Nothing + go path (Two bm x y) = mylubArray path bm (fromlist [x,y]) + go path (BitmapIndexed bm arr) = mylubArray path bm arr + go path (Full arr) = mylubArray path fullNodeMask arr + mylubArray [] _ _ = Nothing + mylubArray (i:is) bm arr = + case findFirstLargerSegment key arr i bm of + Nothing -> Nothing + Just (n,j,newbm) -> + case go is (index arr n) of + Nothing -> Nothing + Just (k,v,Empty) -> -- This case occurs only when (index arr n) is a (Leaf kk v) + if k==key -- And kk >= key, but the two cases: 1) kk=key and kk>key differ + then let arr2 = (slice (n+1) (isize arr - 1) arr) + in Just (k,v, bitmapIndexedOrFull (clearBit newbm j) arr2) + else Just (k,v, bitmapIndexedOrFull newbm (suffixAfterIndex n (Leaf k v) arr)) + Just (k,v,keymap) -> Just (k,v, bitmapIndexedOrFull newbm (suffixAfterIndex n keymap arr)) + + +-- | 'seg' is the current Segment in the Path of 'key'. 'bm' is the set of Segments that +-- are stored in 'arr'. We are looking for the index, 'i', of the first KeyMap in 'arr' where +-- there is some key that is greater than or equal to 'key'. Since 'j' is the first segment +-- of things stored at index 'i', we can skip any index whose first segment 'j' is less than 'seg'. +findFirstLargerSegment :: Key -> PArray (KeyMap v) -> Segment -> Bitmap -> Maybe (Int,Segment,Bitmap) +findFirstLargerSegment key arr seg bm + | not(isize arr == length segmentsFromArray) = error ("bitmp does not describe array") + | otherwise = loop 0 bm segmentsFromArray + where segmentsFromArray = (bitmapToList bm) + loop _ _ [] = Nothing + loop i b (j:js) = + if (j < seg) + then loop (i+1) (clearBit b j) js + else case getMax (index arr i) of + Nothing -> loop (i+1) (clearBit b j) js + Just (k,_) -> + if k < key + then loop (i+1) (clearBit b j) js + else Just(i,j,b) + +testlub :: [(Int, Bool)] +testlub = [ (i,mylub key kmap == lub key kmap) | i <- [0..55], key <- [bpairs !! i] ] + where kmap = fromList (take 50 pairs) + +kmap12 :: KeyMap Int +kmap12 = fromList (take 12 pairs) + testIntersect :: KeyMap Int testIntersect = intersect h1x h2x @@ -663,7 +712,7 @@ sparseIndex :: Bitmap -> Bitmap -> Int sparseIndex b m = popCount (b .&. (m - 1)) {-# INLINE sparseIndex #-} --- | Create a 'BitmapIndexed' or 'Full' node. +-- | Create a 'BitmapIndexed' or 'Full' or 'One' or 'Two' node depending on the size of 'arr' bitmapIndexedOrFull :: Bitmap -> PArray (KeyMap v) -> KeyMap v bitmapIndexedOrFull _ arr | isize arr == 0 = Empty bitmapIndexedOrFull b arr | isize arr == 1 = One (head (bitmapToList b)) (index arr 0) @@ -777,6 +826,15 @@ insertAt :: PArray e -> Int -> e -> PArray e insertAt arr idx b = runST(insertM arr idx b) {-# INLINE insertAt #-} +-- | /O(n)/ Make a new array which has a repacement 'v' for index 'n', and copies the values +-- at indices greater than 'n'. The values at indices before 'n' are thrown away. +-- The size of the output, is n smaller than the size of the input. +suffixAfterIndex :: Int -> v -> PArray v -> PArray v +suffixAfterIndex n v arr = fst(withMutArray size action) + where size = ((isize arr) - n) + action marr = mwrite marr 0 v >> mcopy marr 1 arr (n+1) (size - 1) +{-# INLINE suffixAfterIndex #-} + -- | Create a new Array of size 'n' filled with objects 'a' arrayOf :: Int -> a -> PArray a arrayOf n a = runST $ do @@ -1039,13 +1097,39 @@ instance PrettyA v => Show (KeyMap v) where -- ==================================================================== -- Bulk insert +{- The input to bulkInsert looks like this. Each row represents one key value pair. +On the left, each column represents the bits from one Segment of the key. +On the right is the Key and 'v' as a (Leaf KeyMap 'v'). The rows are sorted by the [Segment] + +([ 2,13,42,25, 3,48,19,53,21,34], (L 2551962348052371621 8)) +([ 4,54,49, 6,56,12,18,32,32,41], (L 5598286061563415157 1)) + +([ 6, 5,40,60,58,61,31,53, 5,27], (L 7019127953809495798 4)) +([ 6,51,10, 4,48,59,22,42, 2,59], (L 7839099055843585733 3)) + +([ 7,57, 1,12,57,50,61,15,47,47], (L 9097609470548048848 5)) + +([ 9,32,38,58,62,34,17,42, 0,28], (L 10963709726988699431 9)) +([ 9,63,16,12,42,50,36,46,46,35], (L 11515759964265834722 2)) + +([10,53, 8, 5,22,24,58,25, 8,61], (L 12486253495695216508 0)) + +([14,13,49, 4, 7,37, 4,23, 5,28], (L 16388898632001935134 6)) +([14,43,28,10,55,12,51,63,56,24], (L 16923449273545098794 7)) + + +For each column we break the code into groups where the Segment matches +on that column. Above we have grouped the 6's, 9's and 14's together by column 1 +-} + +-- | Make a (KeyMap v) out of the input. Works by focusing on a particular range of rows ('lo' .. 'hi') +-- It calls it self recursively, by chooing a smaller range, and increasing the column number 'n' by 1. bulkInsert :: Int -> PArray (Path,KeyMap v) -> Int -> Int -> KeyMap v bulkInsert _n arr lo hi | lo < 0 || lo > n || hi <0 || hi > n = error ("lo or hi out of bounds (0 .. "++show n++") lo="++show lo++" hi="++show hi) where n = isize arr - 1 bulkInsert _n arr lo hi | lo==hi = snd (index arr lo) -bulkInsert n arr lo hi = -- trace ("BulkInsert n="++show n++" lo="++show lo++" hi="++show hi++" segements="++show segmentRanges) $ - BitmapIndexed bmap (fst(withMutArray size (action 0 segmentRanges))) +bulkInsert n arr lo hi = bitmapIndexedOrFull bmap (fst(withMutArray size (action 0 segmentRanges))) where (size,segments,bmap) = getBitmap n arr lo hi segmentRanges = ranges n arr lo hi segments action _j [] _marr = pure () @@ -1053,6 +1137,8 @@ bulkInsert n arr lo hi = -- trace ("BulkInsert n="++show n++" lo="++show lo++" h mwrite marr j (bulkInsert (n+1) arr lox hix) action (j+1) more marr +-- | get the bitmap of column 'n' for the rows 'lo' to 'hi' of arr. +-- This is a set of all the segments present for that range. getBitmap :: Int -> PArray (Path,KeyMap v) -> Int -> Int -> (Int,[Segment],Bitmap) getBitmap n arr lo hi = (size,segments,bitmap) where accum bm (path,_) = setBit bm (path !! n) @@ -1060,21 +1146,23 @@ getBitmap n arr lo hi = (size,segments,bitmap) segments = bitmapToList bitmap size = length segments --- | given starting row 'i' find the last row 'j', such that column 'n' has 'val' in all rows 'i' to 'j' +-- | Given starting row 'i' find the last row 'j', such that column 'n' has 'val' in all rows 'i' to 'j' +-- Both 'i' and 'j' must be in the range (i .. maxi), which denote the beginning and end of the +-- of the data for the current segment. contiguous :: Int -> Int -> Int -> Int -> PArray ([Int], b) -> Int contiguous _n _val i _maxi _arr | i < 0 = i contiguous _n _val i _maxi arr | i >= isize arr = isize arr - 1 contiguous _n _val i maxi _arr | i > maxi = i-1 -- Do not look outside the valid range for matching val contiguous n val i maxi arr = if (fst(index arr i) !! n) == val then contiguous n val (i+1) maxi arr else (i-1) --- | compute the row range where the 'n' column has the same value 'val', we assume the rows are sorted +-- | compute the row ranges where the 'n' column has the same value 'val', we assume the rows are sorted -- in ascending order, and so is the list of 'vals' - ranges :: Int -> PArray ([Int], b) -> Int -> Int -> [Int] -> [(Int, Int)] ranges _n _arr _i _hi [] = [] ranges n arr i hi (val:vals) = (i,j) : ranges n arr (j+1) hi vals where j = contiguous n val i hi arr +-- like foldl, except we fold only a limited range ('lo' .. 'hi') of the indices of 'arr' foldRange :: (ans -> t -> ans) -> ans -> PArray t -> Int -> Int -> ans foldRange _accum ans _arr lo hi | lo > hi = ans foldRange accum ans arr lo hi = foldRange accum (accum ans (index arr lo)) arr (lo+1) hi @@ -1082,26 +1170,24 @@ foldRange accum ans arr lo hi = foldRange accum (accum ans (index arr lo)) arr ( -- ========================================== -- test that incremental and bulk loading create the same KeyMap -testbulk :: Int -> Int -> Bool -testbulk seed n = (bulk == incremental) +testbulk :: Int -> Int -> (KeyMap Int, Bool) +testbulk seed n = (bulk, bulk == incremental) where keys = makeKeys seed n + f (k,v) = (keyPath k,Leaf k v) + cmp (p1,_) (p2,_) = compare p1 p2 pairsb = zip keys [0..] paths:: [(Path,KeyMap Int)] paths = sortBy cmp $ map f pairsb - f (k,v) = (keyPath k,Leaf k v) - cmp (p1,_) (p2,_) = compare p1 p2 pathArr = fromlist paths incremental = fromList pairsb bulk = bulkInsert 0 pathArr 0 (isize pathArr - 1) - -- =================================================== -- try and measure that bulk loading allocates less memory -- Does not count the creation and sorting of the array -- TODO can we do something kike this with a list rather than an array? -- or sort the array in place? - keysbulk :: [Key] keysbulk = makeKeys 199 50000 From cebce4c188df4ac81cc82b7e0b7beda130156cdf Mon Sep 17 00:00:00 2001 From: Alexey Kuleshevich Date: Tue, 26 Oct 2021 18:38:15 +0300 Subject: [PATCH 18/19] Ormolize --- libs/compact-map/src/Data/Compact/Class.hs | 299 ++--- libs/compact-map/src/Data/Compact/HashMap.hs | 37 +- libs/compact-map/src/Data/Compact/KeyMap.hs | 1111 +++++++++--------- libs/compact-map/test/Test/Compact/KeyMap.hs | 7 +- 4 files changed, 764 insertions(+), 690 deletions(-) diff --git a/libs/compact-map/src/Data/Compact/Class.hs b/libs/compact-map/src/Data/Compact/Class.hs index 9edb774206b..26c219fcd60 100644 --- a/libs/compact-map/src/Data/Compact/Class.hs +++ b/libs/compact-map/src/Data/Compact/Class.hs @@ -1,31 +1,37 @@ -{-# LANGUAGE RankNTypes #-} {-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FunctionalDependencies #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} - -- HeapWords for Array and PrimArray {-# OPTIONS_GHC -Wno-orphans #-} module Data.Compact.Class where +import Cardano.Prelude (HeapWords (..)) +import Control.Monad.ST (ST, runST) import qualified Data.Array as A -import qualified Data.Primitive.Array as PA import qualified Data.Array.MArray as MutA +import qualified Data.Primitive.Array as PA import Data.Primitive.PrimArray - ( PrimArray, indexPrimArray, primArrayFromList, primArrayToList, sizeofPrimArray, copyPrimArray, - MutablePrimArray,unsafeFreezePrimArray , newPrimArray,sizeofMutablePrimArray, readPrimArray, writePrimArray, - ) - + ( MutablePrimArray, + PrimArray, + copyPrimArray, + indexPrimArray, + newPrimArray, + primArrayFromList, + primArrayToList, + readPrimArray, + sizeofMutablePrimArray, + sizeofPrimArray, + unsafeFreezePrimArray, + writePrimArray, + ) +import Data.Primitive.SmallArray (SmallArray, SmallMutableArray) import qualified Data.Primitive.SmallArray as Small -import Data.Primitive.SmallArray(SmallArray,SmallMutableArray) - import Data.Primitive.Types (Prim (..)) -import GHC.Arr(STArray(..),unsafeFreezeSTArray) -import Control.Monad.ST (ST, runST) -import Cardano.Prelude (HeapWords (..)) +import GHC.Arr (STArray (..), unsafeFreezeSTArray) - -- ============================================================================================ -- Array like objects which can access elements by their index @@ -51,7 +57,6 @@ binsearch lo hi k v = (if index v mid > k then binsearch lo mid k v else binsear where mid = lo + (div (hi - lo) 2) - -- | Find the index and the value at the least upper bound of 'target' alub :: (Ord t1, Indexable t2 t1) => (Int, Int) -> t2 t1 -> t1 -> Maybe (Int, t1) alub (lo, hi) arr target @@ -64,8 +69,8 @@ alub (lo, hi) arr target mid = lo + (div (hi - lo) 2) boundsCheck :: Indexable t1 a => (t1 a -> Int -> t2) -> t1 a -> Int -> t2 -boundsCheck indexf arr i | i>=0 && i < isize arr = indexf arr i -boundsCheck _ arr i = error ("boundscheck error, "++show i++", not in bounds (0.."++show (isize arr -1)++").") +boundsCheck indexf arr i | i >= 0 && i < isize arr = indexf arr i +boundsCheck _ arr i = error ("boundscheck error, " ++ show i ++ ", not in bounds (0.." ++ show (isize arr -1) ++ ").") -- Built in type Instances @@ -76,14 +81,14 @@ instance Indexable PA.Array x where tolist arr = foldr (:) [] arr catenate = catArray merge = mergeArray - + instance Prim a => Indexable PrimArray a where index = boundsCheck indexPrimArray isize = sizeofPrimArray fromlist = primArrayFromList tolist = primArrayToList catenate = catArray - merge = mergeArray + merge = mergeArray instance Indexable (A.Array Int) a where index = (A.!) @@ -95,7 +100,7 @@ instance Indexable (A.Array Int) a where instance Indexable SmallArray t where index = boundsCheck Small.indexSmallArray - isize = Small.sizeofSmallArray + isize = Small.sizeofSmallArray fromlist = Small.smallArrayFromList tolist arr = foldr (:) [] arr catenate = catArray @@ -105,12 +110,14 @@ instance Indexable SmallArray t where -- Pairs of Mutable Arrays and ImMutable Arrays that can be converted safely -- ======================================================================== - -mboundsCheck :: (ArrayPair arr marr a) => - (marr s a -> Int -> ST s a) -> marr s a -> Int -> ST s a -mboundsCheck indexf arr i | i>=0 && i < msize arr = indexf arr i -mboundsCheck _ arr i = error ("mboundscheck error, "++show i++", not in bounds (0.."++show (msize arr -1)++").") - +mboundsCheck :: + (ArrayPair arr marr a) => + (marr s a -> Int -> ST s a) -> + marr s a -> + Int -> + ST s a +mboundsCheck indexf arr i | i >= 0 && i < msize arr = indexf arr i +mboundsCheck _ arr i = error ("mboundscheck error, " ++ show i ++ ", not in bounds (0.." ++ show (msize arr -1) ++ ").") class Indexable arr a => ArrayPair arr marr a | marr -> arr, arr -> marr where mindex :: marr s a -> Int -> ST s a @@ -126,49 +133,50 @@ instance ArrayPair SmallArray SmallMutableArray a where mindex = mboundsCheck Small.readSmallArray msize = Small.sizeofSmallMutableArray mnew size = Small.newSmallArray size undefined - mfreeze = Small.unsafeFreezeSmallArray - mwrite arr i a = if i>=0 && i<(msize arr) - then Small.writeSmallArray arr i a - else error ("mwrite error, "++show i++", not in bounds (0.."++show (msize arr -1)++").") + mfreeze = Small.unsafeFreezeSmallArray + mwrite arr i a = + if i >= 0 && i < (msize arr) + then Small.writeSmallArray arr i a + else error ("mwrite error, " ++ show i ++ ", not in bounds (0.." ++ show (msize arr -1) ++ ").") mcopy = Small.copySmallArray instance ArrayPair PA.Array PA.MutableArray a where - msize = PA.sizeofMutableArray + msize = PA.sizeofMutableArray mindex = mboundsCheck PA.readArray mnew n = PA.newArray n undefined mfreeze = PA.unsafeFreezeArray mwrite arr i a = - if i>=0 && i<(msize arr) - then PA.writeArray arr i a - else error ("mwrite error, "++show i++", not in bounds (0.."++show (msize arr -1)++").") + if i >= 0 && i < (msize arr) + then PA.writeArray arr i a + else error ("mwrite error, " ++ show i ++ ", not in bounds (0.." ++ show (msize arr -1) ++ ").") mcopy = PA.copyArray instance Prim a => ArrayPair PrimArray MutablePrimArray a where msize = sizeofMutablePrimArray mindex = mboundsCheck readPrimArray - mnew = newPrimArray + mnew = newPrimArray mfreeze = unsafeFreezePrimArray mwrite arr i a = - if i>=0 && i<(msize arr) - then writePrimArray arr i a - else error ("mwrite error, "++show i++", not in bounds (0.."++show (msize arr -1)++").") + if i >= 0 && i < (msize arr) + then writePrimArray arr i a + else error ("mwrite error, " ++ show i ++ ", not in bounds (0.." ++ show (msize arr -1) ++ ").") mcopy = copyPrimArray -- | MutArray fixes the index type to Int for the STArray type constructor newtype MutArray s t = MutArray (STArray s Int t) instance ArrayPair (A.Array Int) MutArray a where - msize (MutArray (STArray lo hi _ _)) = hi - lo + 1 + msize (MutArray (STArray lo hi _ _)) = hi - lo + 1 mindex (MutArray arr) i = MutA.readArray arr i - mnew n = MutArray <$> (MutA.newArray_ (0,n-1)) + mnew n = MutArray <$> (MutA.newArray_ (0, n -1)) mfreeze (MutArray arr) = unsafeFreezeSTArray arr mwrite (MutArray arr) i a = MutA.writeArray arr i a mcopy marr startm arr start count = go startm start count - where go _i _j 0 = pure () - go i j n = do - mwrite marr i (index arr j) - go (i+1) (j+1) (n-1) - + where + go _i _j 0 = pure () + go i j n = do + mwrite marr i (index arr j) + go (i + 1) (j + 1) (n -1) -- ======================================================= -- Usefull functions that use Mutable Arrays @@ -176,9 +184,9 @@ instance ArrayPair (A.Array Int) MutArray a where -- | Build a mutable array from a list mfromlist :: ArrayPair arr marr a => [a] -> ST s (marr s a) mfromlist xs = do - marr <- mnew (length xs) + marr <- mnew (length xs) let loop _i [] = pure () - loop i (y:ys) = mwrite marr i y >> loop (i+1) ys + loop i (y : ys) = mwrite marr i y >> loop (i + 1) ys loop 0 xs pure marr @@ -187,42 +195,45 @@ mfromlist xs = do -- catArray [[2,1],[14],[6,5,11]] --> [2,1,14,6,5,11] -- mergeArray [[1,2],[14],[5,6,11]] --> [1,2,5,6,11,14] catArray :: ArrayPair arr marr a => Int -> [arr a] -> arr a -catArray totalsize xs = fst(withMutArray totalsize (build 0 xs)) - where build _next [] _marr = pure () - build next (arr: arrs) marr = - do let size = isize arr - mcopy marr next arr 0 size - build (next+size) arrs marr +catArray totalsize xs = fst (withMutArray totalsize (build 0 xs)) + where + build _next [] _marr = pure () + build next (arr : arrs) marr = + do + let size = isize arr + mcopy marr next arr 0 size + build (next + size) arrs marr -- | Swap the values at indices 'i' and 'j' in mutable array 'marr' swap :: ArrayPair arr marr a => marr s a -> Int -> Int -> ST s () -swap _ i j | i==j = pure () +swap _ i j | i == j = pure () swap marr i j = do - ti <- mindex marr i; - tj <- mindex marr j; + ti <- mindex marr i + tj <- mindex marr j mwrite marr i tj mwrite marr j ti mToList :: ArrayPair arr marr a => Int -> marr s a -> ST s [a] mToList first marr = loop first [] - where hi = (msize marr - 1) - loop lo xs | lo > hi = pure(reverse xs) - loop lo xs = do {x <- mindex marr lo; loop (lo+1) (x:xs)} - + where + hi = (msize marr - 1) + loop lo xs | lo > hi = pure (reverse xs) + loop lo xs = do x <- mindex marr lo; loop (lo + 1) (x : xs) -- | Extract a slice from an array slice :: ArrayPair arr2 marr a => Int -> Int -> arr2 a -> arr2 a slice 0 hi arr | hi == (isize arr -1) = arr -slice lo hi arr = fst(withMutArray size action) - where size = max (hi - lo + 1) 0 - action marr = mcopy marr 0 arr lo size +slice lo hi arr = fst (withMutArray size action) + where + size = max (hi - lo + 1) 0 + action marr = mcopy marr 0 arr lo size {-# INLINE slice #-} -- ================================================================ -- Functions for using mutable initialization in a safe manner. -- Using these functions is the safe way to use the method 'mfreeze' -withMutArray:: ArrayPair arr marr a => Int -> (forall s. marr s a -> ST s x) -> (arr a,x) +withMutArray :: ArrayPair arr marr a => Int -> (forall s. marr s a -> ST s x) -> (arr a, x) withMutArray n process = runST $ do marr <- mnew n x <- process marr @@ -230,11 +241,11 @@ withMutArray n process = runST $ do pure (arr, x) with2MutArray :: - ( ArrayPair arr1 marr1 a, ArrayPair arr2 marr2 b) => + (ArrayPair arr1 marr1 a, ArrayPair arr2 marr2 b) => Int -> Int -> (forall s. marr1 s a -> marr2 s b -> ST s x) -> - (arr1 a, arr2 b,x) + (arr1 a, arr2 b, x) with2MutArray size1 size2 process = runST $ do arr1 <- mnew size1 arr2 <- mnew size2 @@ -252,30 +263,30 @@ with2MutArray size1 size2 process = runST $ do class Ord key => Search t key where search :: key -> t -> Maybe Int -instance Ord key => Search (PA.Array key) key - where search key v = binsearch 0 (isize v - 1) key v +instance Ord key => Search (PA.Array key) key where + search key v = binsearch 0 (isize v - 1) key v -instance (Prim key,Ord key) => Search (PrimArray key) key - where search key v = binsearch 0 (isize v - 1) key v +instance (Prim key, Ord key) => Search (PrimArray key) key where + search key v = binsearch 0 (isize v - 1) key v -instance Ord key => Search (A.Array Int key) key - where search key v = binsearch 0 (isize v - 1) key v +instance Ord key => Search (A.Array Int key) key where + search key v = binsearch 0 (isize v - 1) key v instance (Search t key) => Search [t] key where - search _ [] = Nothing - search key (x:xs) = - case search key x of - Nothing -> search key xs - Just i -> Just i + search _ [] = Nothing + search key (x : xs) = + case search key x of + Nothing -> search key xs + Just i -> Just i instance Search t key => Search (Node t) key where - search key (Node _ x) = search key x + search key (Node _ x) = search key x -- ============================================================== -- Overloaded operations on (Map k v) class Maplike m k v where - makemap :: [(k,v)] -> m k v + makemap :: [(k, v)] -> m k v lookupmap :: Ord k => k -> m k v -> Maybe v insertmap :: Ord k => k -> v -> m k v -> m k v @@ -283,7 +294,7 @@ class Maplike m k v where -- Overloaded operations on (Set k) class Setlike m k where - makeset :: [k] -> m k + makeset :: [k] -> m k elemset :: Ord k => k -> m k -> Bool insertset :: Ord k => k -> m k -> m k emptyset :: m k @@ -331,7 +342,6 @@ pieces xs = chop parts xs chop [] _zs = [] chop ((_, n) : ys) zs = (n, take n zs) : chop ys (drop n zs) - -- | When a list is represented with the structure of binary numbers, an important -- property is that every such list has a full prefix. This is a prefix which has -- contiguous powers of two. For example: @@ -340,22 +350,21 @@ pieces xs = chop parts xs -- (16, [node 1,node 1,node 2, node 4, node 8], [node 32,node 128]) -- because [1,2,4,8] is the longest contiguous prefix consisting of adjacent powers of 2. -- In the worst case the prefix has length 1. -splitAtFullPrefix :: (node -> Int) -> Int -> node -> [node] -> (Int,[node],[node]) -splitAtFullPrefix getsize _next node [] = (getsize node,[node],[]) -splitAtFullPrefix getsize next node1 (node2:more) = - let n = getsize node1 - m = getsize node2 - in if next==m - then case splitAtFullPrefix getsize (next*2) node2 more of - (count,prefix,rest) -> (count+n, node1:prefix, rest) - else (n,[node1],node2:more) - +splitAtFullPrefix :: (node -> Int) -> Int -> node -> [node] -> (Int, [node], [node]) +splitAtFullPrefix getsize _next node [] = (getsize node, [node], []) +splitAtFullPrefix getsize next node1 (node2 : more) = + let n = getsize node1 + m = getsize node2 + in if next == m + then case splitAtFullPrefix getsize (next * 2) node2 more of + (count, prefix, rest) -> (count + n, node1 : prefix, rest) + else (n, [node1], node2 : more) -- ============================================================================== -- | A node carries a 'size' and some array-like type 'arr' data Node arr = Node {-# UNPACK #-} !Int arr - deriving Show + deriving (Show) arrayPart :: Node arr -> arr arrayPart (Node _ arr) = arr @@ -370,86 +379,96 @@ nodesize (Node i _) = i -- The function 'smaller' compares two 't' for smallness. -- 'pair' is the smallest (index,t) we have seen so far. 'lo' and 'hi' limit -- the bounds of where to look. -smallestIndex ::(ArrayPair arr marr t) => (t -> t -> Bool) -> marr s t -> (Int,t) ->Int -> Int -> ST s (Int,t) -smallestIndex smaller marr initpair initlo hi = loop initpair initlo where - loop pair lo | lo > hi = pure pair - loop (pair@(_i,t)) lo = do - t2 <- mindex marr lo - if smaller t t2 - then loop pair (lo+1) - else loop (lo,t2) (lo+1) +smallestIndex :: (ArrayPair arr marr t) => (t -> t -> Bool) -> marr s t -> (Int, t) -> Int -> Int -> ST s (Int, t) +smallestIndex smaller marr initpair initlo hi = loop initpair initlo + where + loop pair lo | lo > hi = pure pair + loop (pair@(_i, t)) lo = do + t2 <- mindex marr lo + if smaller t t2 + then loop pair (lo + 1) + else loop (lo, t2) (lo + 1) -- | Apply 'action' to each 't' in 'marr' in ascending order as determined by 'smaller' -- 'state' is the current state, and 'lo' and 'hi' limit the bounds of where to look. -- 'markIfDone' might alter 'marr' and return a new 'lo' limit, if the 'lo' index in -- 'marr' has no more 't' objects to offer. -inOrder :: - (Int -> Int -> t -> PA.MutableArray s t -> ST s Int) -> - (t -> t -> Bool) -> - state -> - Int -> - Int -> - (state -> t -> ST s state) -> - PA.MutableArray s t -> -- array of items to be merged, This should be small. At most 20 or so. - ST s state -inOrder markIfDone smaller initstate initlo hi action marr = loop initlo initstate where - loop lo state | lo > hi = pure state - loop lo state = - do t <- mindex marr lo - (i,small) <- smallestIndex smaller marr (lo,t) (lo+1) hi - state' <- action state small - lo' <- markIfDone lo i small marr - loop lo' state' +inOrder :: + (Int -> Int -> t -> PA.MutableArray s t -> ST s Int) -> + (t -> t -> Bool) -> + state -> + Int -> + Int -> + (state -> t -> ST s state) -> + PA.MutableArray s t -> -- array of items to be merged, This should be small. At most 20 or so. + ST s state +inOrder markIfDone smaller initstate initlo hi action marr = loop initlo initstate + where + loop lo state | lo > hi = pure state + loop lo state = + do + t <- mindex marr lo + (i, small) <- smallestIndex smaller marr (lo, t) (lo + 1) hi + state' <- action state small + lo' <- markIfDone lo i small marr + loop lo' state' -- | A commonly used 'markIfDone' function. Test if 'next' is still in bounds for 'arr' -- If so, them mutate 'marr' to indicate that next time we should look at index 'next+1' in arr. -- If it is out of bounds, then swap the pairs in 'marr' at indexs 'i' and 'lo', and then -- increment lo, so the pair that has no more to offer, is no longer in an active position. -mark1:: (Indexable arr t) => - Int -> Int -> (Int,arr t) -> PA.MutableArray s (Int,arr t) -> ST s Int -mark1 lo i (next,arr) marr = - do let next' = next+1 - if next' < isize arr - then mwrite marr i (next',arr) >> pure lo - else swap marr lo i >> pure(lo+1) +mark1 :: + (Indexable arr t) => + Int -> + Int -> + (Int, arr t) -> + PA.MutableArray s (Int, arr t) -> + ST s Int +mark1 lo i (next, arr) marr = + do + let next' = next + 1 + if next' < isize arr + then mwrite marr i (next', arr) >> pure lo + else swap marr lo i >> pure (lo + 1) -- | A commonly used 'smaller' function smaller1 :: (Ord a, Indexable arr a) => (Int, arr a) -> (Int, arr a) -> Bool -smaller1 (i,arr1) (j,arr2) = index arr1 i < index arr2 j +smaller1 (i, arr1) (j, arr2) = index arr1 i < index arr2 j -- | A commonly used 'action' function. Appropriate when the 'arr' is simple with -- no bells or whistles. Good for PrimArray, PA.Array, A.Array, Any array with a ArrayPair instance. -- If we use an exotic array with no ArrayPair instance, we can stil merge, but we can't use this -- action function. -action1:: (ArrayPair arr marr a, Indexable t a) => marr s a -> Int -> (Int, t a) -> ST s Int -action1 marr i (j,arr) = (mwrite marr i (index arr j) >> pure(i+1)) - +action1 :: (ArrayPair arr marr a, Indexable t a) => marr s a -> Int -> (Int, t a) -> ST s Int +action1 marr i (j, arr) = (mwrite marr i (index arr j) >> pure (i + 1)) -- | Merge a list of array-like objects using 'action' The 'action' will differ depending on -- what kind of arrays are begin merged. -mergeWithAction :: forall a arr marr. +mergeWithAction :: + forall a arr marr. (ArrayPair arr marr a, Ord a) => Int -> [arr a] -> - (forall s. marr s a -> Int -> (Int,arr a) -> ST s Int) -> + (forall s. marr s a -> Int -> (Int, arr a) -> ST s Int) -> arr a -mergeWithAction size inputs action = fst $ withMutArray size build where - build:: forall s. marr s a -> ST s Int - build moutput = do - minputs <- mfromlist (map (\ x -> (0,x)) inputs) - inOrder mark1 smaller1 (0::Int) 0 (length inputs-1) (action moutput) minputs +mergeWithAction size inputs action = fst $ withMutArray size build + where + build :: forall s. marr s a -> ST s Int + build moutput = do + minputs <- mfromlist (map (\x -> (0, x)) inputs) + inOrder mark1 smaller1 (0 :: Int) 0 (length inputs -1) (action moutput) minputs -- | Merge a list of array like objects by allocating the target and then merging the sources. -- mergeArray maintains ascending order. But catArray maintains index order. -- mergeArray [[1,2],[14],[5,6,11]] --> [1,2,5,6,11,14] -- catArray [[2,1],[14],[6,5,11]] --> [2,1,14,6,5,11] - -mergeArray :: (Ord a,ArrayPair arr marr a) => Int -> [arr a] -> arr a +mergeArray :: (Ord a, ArrayPair arr marr a) => Int -> [arr a] -> arr a mergeArray size xs = mergeWithAction size xs action1 testmerge :: PrimArray Int -testmerge = mergeArray (sum(map isize xs)) xs - where xs = [fromlist[2,7], fromlist[1,6,19], fromlist[4,9], fromlist[3,8,12,17]] +testmerge = mergeArray (sum (map isize xs)) xs + where + xs = [fromlist [2, 7], fromlist [1, 6, 19], fromlist [4, 9], fromlist [3, 8, 12, 17]] {- -- | Merge 2 parallel arrays with 'action'. The order of merging depends only on @@ -482,5 +501,3 @@ mergeMapNode size nodes = mergeArray2 size inputs action where action mkeys mvals i (n,arrkeys) = undefined -} - - diff --git a/libs/compact-map/src/Data/Compact/HashMap.hs b/libs/compact-map/src/Data/Compact/HashMap.hs index 48bf3eb70bf..cec0d079002 100644 --- a/libs/compact-map/src/Data/Compact/HashMap.hs +++ b/libs/compact-map/src/Data/Compact/HashMap.hs @@ -2,9 +2,9 @@ module Data.Compact.HashMap where +import Data.Compact.KeyMap (Key, KeyMap) import qualified Data.Compact.KeyMap as KM -import Data.Compact.KeyMap(Key,KeyMap) -import Data.Set(Set) +import Data.Set (Set) import qualified Data.Set as Set -- ========================================================================== @@ -14,41 +14,44 @@ class Keyed t where fromKey :: Key -> t data HashMap k v where - HashMap :: Keyed k => KeyMap v -> HashMap k v + HashMap :: Keyed k => KeyMap v -> HashMap k v lookup :: k -> HashMap k v -> Maybe v lookup k (HashMap m) = KM.lookupHM (toKey k) m insert :: k -> v -> HashMap k v -> HashMap k v -insert k v (HashMap m) = HashMap(KM.insert (toKey k) v m) +insert k v (HashMap m) = HashMap (KM.insert (toKey k) v m) insertWithKey :: (k -> v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v -insertWithKey combine key v (HashMap m) = HashMap(KM.insertWithKey comb (toKey key) v m) - where comb k v1 v2 = combine (fromKey k) v1 v2 +insertWithKey combine key v (HashMap m) = HashMap (KM.insertWithKey comb (toKey key) v m) + where + comb k v1 v2 = combine (fromKey k) v1 v2 restrictKeys :: HashMap k v -> Set k -> HashMap k v -restrictKeys (HashMap m) set = HashMap(KM.domainRestrict m (Set.map toKey set)) +restrictKeys (HashMap m) set = HashMap (KM.domainRestrict m (Set.map toKey set)) -splitLookup:: k -> HashMap k a -> (HashMap k a, Maybe a, HashMap k a) -splitLookup k (HashMap m) = (HashMap a,b,HashMap c) - where (a,b,c) = KM.splitKeyMap (KM.keyPath key) key m - key = toKey k +splitLookup :: k -> HashMap k a -> (HashMap k a, Maybe a, HashMap k a) +splitLookup k (HashMap m) = (HashMap a, b, HashMap c) + where + (a, b, c) = KM.splitKeyMap (KM.keyPath key) key m + key = toKey k -intersection:: HashMap k v -> HashMap k v -> HashMap k v -intersection (HashMap m1) (HashMap m2) = HashMap(KM.intersect m1 m2) +intersection :: HashMap k v -> HashMap k v -> HashMap k v +intersection (HashMap m1) (HashMap m2) = HashMap (KM.intersect m1 m2) foldlWithKey' :: (ans -> k -> v -> ans) -> ans -> HashMap k v -> ans foldlWithKey' accum a (HashMap m) = KM.foldWithAscKey accum2 a m - where accum2 ans k v = accum ans (fromKey k) v + where + accum2 ans k v = accum ans (fromKey k) v size :: HashMap k v -> Int size (HashMap m) = KM.sizeKeyMap m fromList :: Keyed k => [(k, v)] -> HashMap k v -fromList xs = HashMap(KM.fromList (map (\ (k,v) -> (toKey k,v)) xs)) +fromList xs = HashMap (KM.fromList (map (\(k, v) -> (toKey k, v)) xs)) toList :: HashMap k v -> [(k, v)] -toList (HashMap m) = KM.foldWithAscKey (\ ans k v -> (fromKey k,v):ans) [] m +toList (HashMap m) = KM.foldWithAscKey (\ans k v -> (fromKey k, v) : ans) [] m mapWithKey :: (k -> v -> u) -> HashMap k v -> HashMap k u -mapWithKey f (HashMap m) = HashMap (KM.mapWithKey (\ key v -> f (fromKey key) v) m) +mapWithKey f (HashMap m) = HashMap (KM.mapWithKey (\key v -> f (fromKey key) v) m) diff --git a/libs/compact-map/src/Data/Compact/KeyMap.hs b/libs/compact-map/src/Data/Compact/KeyMap.hs index aa33a028524..8dedd86291d 100644 --- a/libs/compact-map/src/Data/Compact/KeyMap.hs +++ b/libs/compact-map/src/Data/Compact/KeyMap.hs @@ -1,48 +1,46 @@ {-# LANGUAGE BangPatterns #-} -{-# LANGUAGE UnboxedTuples #-} -{-# LANGUAGE MagicHash #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE MagicHash #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE FlexibleContexts #-} - - +{-# LANGUAGE UnboxedTuples #-} {-# OPTIONS_GHC -Wno-orphans #-} - module Data.Compact.KeyMap where -import Data.Foldable (foldl') -import Cardano.Prelude (HeapWords (..),Generic,runST,ST) -import Data.Word(Word64) -import qualified Data.Primitive.Array as PA +import Cardano.Prelude (Generic, HeapWords (..), ST, runST) +import Control.DeepSeq (NFData (..)) import Data.Bits - ( Bits,(.&.), - (.|.), + ( Bits, + clearBit, complement, popCount, - unsafeShiftL, - shiftR, setBit, + shiftR, testBit, - clearBit, + unsafeShiftL, zeroBits, + (.&.), + (.|.), ) import Data.Compact.Class -import GHC.Exts ((==#), reallyUnsafePtrEquality#, isTrue# ) +import Data.Foldable (foldl') +import Data.List (sortBy) import qualified Data.Map as Map -import Control.DeepSeq (NFData(..)) +import qualified Data.Primitive.Array as PA +import Data.Primitive.SmallArray () import qualified Data.Primitive.SmallArray as Small -import Data.Primitive.SmallArray() -import System.Random(RandomGen,genWord64,mkStdGen) +import Data.Set (Set) +import qualified Data.Set as Set +import Data.Text (Text, pack) +import Data.Word (Word64) +import GHC.Exts (isTrue#, reallyUnsafePtrEquality#, (==#)) import Prettyprinter -import Data.Text(Text,pack) import qualified Prettyprinter.Internal as Pretty -import Data.Set(Set) -import qualified Data.Set as Set -import Data.List(sortBy) +import System.Random (RandomGen, genWord64, mkStdGen) + -- import Debug.Trace - -- type PArray = PA.Array type PArray = Small.SmallArray @@ -59,11 +57,10 @@ type Bitmap = Word64 -- | The number of bits in a segment. Can't be more than 6, because using Word64 -- as Bitmap can only accomodate 2^6 = 64 bits -bitsPerSegment :: Int +bitsPerSegment :: Int bitsPerSegment = 6 {-# INLINE bitsPerSegment #-} - -- | Ints in the range [0.. intSize], represents one 'bitsPerSegment' wide portion of a key type Segment = Int @@ -71,31 +68,33 @@ type Segment = Int type Path = [Segment] -- | The maximum value of a Segment, as an Int -intSize :: Int +intSize :: Int intSize = 2 ^ bitsPerSegment {-# INLINE intSize #-} -- | The maximum value of a segment, as a Word64 wordSize :: Word64 -wordSize = 2 ^ ((fromIntegral bitsPerSegment)::Word64) +wordSize = 2 ^ ((fromIntegral bitsPerSegment) :: Word64) {-# INLINE wordSize #-} -- | The length of a list of segments representing a key. Need to be -- carefull if a Key isn't evenly divisible by bitsPerSegment pathSize :: Word64 -pathSize = (if (mod 64 wbits)==0 then (div 64 wbits) else (div 64 wbits) + 1) - where wbits = fromIntegral bitsPerSegment :: Word64 - +pathSize = (if (mod 64 wbits) == 0 then (div 64 wbits) else (div 64 wbits) + 1) + where + wbits = fromIntegral bitsPerSegment :: Word64 -- ======================================================================== --- Keys +-- Keys -- | Represents 32 Bytes, (wordsPerKey * 8) Bytes compactly -data Key = Key {-# UNPACK #-} !Word64 - {-# UNPACK #-} !Word64 - {-# UNPACK #-} !Word64 - {-# UNPACK #-} !Word64 - deriving (Eq,Ord,Show,NFData,Generic) +data Key + = Key + {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + deriving (Eq, Ord, Show, NFData, Generic) -- | The number of Word64 per key wordsPerKey :: Int @@ -106,11 +105,12 @@ keyPathSize :: Int keyPathSize = wordsPerKey * (fromIntegral pathSize) genKey :: RandomGen b => b -> (Key, b) -genKey g = (Key w0 w1 w2 w3,g4) - where (w0,g1) = genWord64 g - (w1,g2) = genWord64 g1 - (w2,g3) = genWord64 g2 - (w3,g4) = genWord64 g3 +genKey g = (Key w0 w1 w2 w3, g4) + where + (w0, g1) = genWord64 g + (w1, g2) = genWord64 g1 + (w2, g3) = genWord64 g2 + (w3, g4) = genWord64 g3 -- | Note that (mod n wordSize) and (n .&. modMask) are the same modMask :: Word64 @@ -122,21 +122,23 @@ modMask = wordSize - 1 -- But much faster. getpath :: Word64 -> Path getpath w64 = loop pathSize w64 [] - where loop :: Word64 -> Word64 -> [Int] -> [Int] - loop 0 _ ans = ans - loop cnt n ans = loop (cnt - 1) (shiftR n bitsPerSegment) ((fromIntegral(n .&. modMask)):ans) + where + loop :: Word64 -> Word64 -> [Int] -> [Int] + loop 0 _ ans = ans + loop cnt n ans = loop (cnt - 1) (shiftR n bitsPerSegment) ((fromIntegral (n .&. modMask)) : ans) -- | Break up a Key into a Path keyPath :: Key -> Path keyPath (Key w0 w1 w2 w3) = getpath w0 ++ getpath w1 ++ getpath w2 ++ getpath w3 showBM :: Bitmap -> String -showBM bm = show(bitmapToList bm) +showBM bm = show (bitmapToList bm) bitmapToList :: Bits a => a -> [Int] bitmapToList bm = loop 63 [] - where loop i ans | i < 0 = ans - loop i ans = if testBit bm i then loop (i-1) (i:ans) else loop (i-1) ans + where + loop i ans | i < 0 = ans + loop i ans = if testBit bm i then loop (i -1) (i : ans) else loop (i -1) ans instance HeapWords Key where heapWords (Key _ _ _ _) = 5 @@ -144,14 +146,15 @@ instance HeapWords Key where -- =============================================================== data KeyMap v - = Empty - | Leaf {-# UNPACK #-} !Key !v - | One {-# UNPACK #-} !Int !(KeyMap v) -- 1 subtree - | Two {-# UNPACK #-} !Bitmap !(KeyMap v) !(KeyMap v) -- 2 subtrees - | BitmapIndexed {-# UNPACK #-} !Bitmap -- 3 - (intSize - 1) subtrees - {-# UNPACK #-} !(Small.SmallArray (KeyMap v)) - | Full {-# UNPACK #-} !(Small.SmallArray (KeyMap v)) -- intSize subtrees - deriving (NFData,Generic) + = Empty + | Leaf {-# UNPACK #-} !Key !v + | One {-# UNPACK #-} !Int !(KeyMap v) -- 1 subtree + | Two {-# UNPACK #-} !Bitmap !(KeyMap v) !(KeyMap v) -- 2 subtrees + | BitmapIndexed + {-# UNPACK #-} !Bitmap -- 3 - (intSize - 1) subtrees + {-# UNPACK #-} !(Small.SmallArray (KeyMap v)) + | Full {-# UNPACK #-} !(Small.SmallArray (KeyMap v)) -- intSize subtrees + deriving (NFData, Generic) instance Eq v => Eq (KeyMap v) where (==) x y = toList x == toList y @@ -159,19 +162,19 @@ instance Eq v => Eq (KeyMap v) where heapAdd :: HeapWords a => a -> Int -> Int heapAdd x ans = heapWords x + ans -heapPlus:: HeapWords a => Int -> a -> Int +heapPlus :: HeapWords a => Int -> a -> Int heapPlus ans x = heapWords x + ans instance HeapWords t => HeapWords (PA.Array t) where - heapWords arr = foldl' heapPlus (2 + isize arr) arr + heapWords arr = foldl' heapPlus (2 + isize arr) arr instance HeapWords v => HeapWords (KeyMap v) where heapWords Empty = 1 heapWords (One _ xs) = 3 + heapWords xs - heapWords (Leaf _ v) = 6 + heapWords v -- Change when Key changes + heapWords (Leaf _ v) = 6 + heapWords v -- Change when Key changes heapWords (BitmapIndexed _ arr) = foldl' heapPlus 2 arr heapWords (Full arr) = foldl' heapPlus 1 arr - heapWords (Two _ a b) = 4 + heapWords a + heapWords b + heapWords (Two _ a b) = 4 + heapWords a + heapWords b instance HeapWords () where heapWords () = 1 @@ -183,7 +186,7 @@ tag (Leaf _ _v) = "Leaf" tag (BitmapIndexed _ _arr) = "BitmapedIndexed" tag (Full _arr) = "Full" tag (Two _ _a _b) = "Two" - + -- ====================================================================== -- Insertion @@ -195,76 +198,85 @@ insertWithKey' combine path k x kmap = go 0 kmap where go _ Empty = Leaf k x go n (One j node) = - case compare j i of - EQ -> One j (go (n+1) node) - LT -> Two (setBits [i,j]) node (go (n+1) Empty) - GT -> Two (setBits [i,j]) (go (n+1) Empty) node - where i = path !! n + case compare j i of + EQ -> One j (go (n + 1) node) + LT -> Two (setBits [i, j]) node (go (n + 1) Empty) + GT -> Two (setBits [i, j]) (go (n + 1) Empty) node + where + i = path !! n go n t@(Leaf k2 y) - | k == k2 = if x `ptrEq` y - then t - else (Leaf k (combine k x y)) + | k == k2 = + if x `ptrEq` y + then t + else (Leaf k (combine k x y)) | otherwise = twoLeaf (drop n (keyPath k2)) t (drop n path) k x go n t@(BitmapIndexed bmap arr) - | not(testBit bmap j) = - let !arr' = insertAt arr i $! (Leaf k x) - in bitmapIndexedOrFull (bmap .|. (setBit 0 j)) arr' - | otherwise = - let !st = index arr i - !st' = go (n+1) st - in if st' `ptrEq` st - then t - else BitmapIndexed bmap (update arr i st') - where i = indexFromSegment bmap j - j = (path !! n) + | not (testBit bmap j) = + let !arr' = insertAt arr i $! (Leaf k x) + in bitmapIndexedOrFull (bmap .|. (setBit 0 j)) arr' + | otherwise = + let !st = index arr i + !st' = go (n + 1) st + in if st' `ptrEq` st + then t + else BitmapIndexed bmap (update arr i st') + where + i = indexFromSegment bmap j + j = (path !! n) go n t@(Two bmap x0 x1) - | not(testBit bmap j) = - let !arr' = insertAt (fromlist [x0,x1]) i $! (Leaf k x) - in bitmapIndexedOrFull (bmap .|. (setBit 0 j)) arr' - | otherwise = - let !st = if i==0 then x0 else x1 - !st' = go (n+1) st - in if st' `ptrEq` st - then t - else if i==0 - then Two bmap st' x1 - else Two bmap x0 st' - where i = indexFromSegment bmap j - j = path !! n - go n t@(Full arr) = - let !st = index arr i - !st' = go (n+1) st - in if st' `ptrEq` st + | not (testBit bmap j) = + let !arr' = insertAt (fromlist [x0, x1]) i $! (Leaf k x) + in bitmapIndexedOrFull (bmap .|. (setBit 0 j)) arr' + | otherwise = + let !st = if i == 0 then x0 else x1 + !st' = go (n + 1) st + in if st' `ptrEq` st then t - else Full (update arr i st') - where i = indexFromSegment fullNodeMask j - j = path !! n + else + if i == 0 + then Two bmap st' x1 + else Two bmap x0 st' + where + i = indexFromSegment bmap j + j = path !! n + go n t@(Full arr) = + let !st = index arr i + !st' = go (n + 1) st + in if st' `ptrEq` st + then t + else Full (update arr i st') + where + i = indexFromSegment fullNodeMask j + j = path !! n twoLeaf :: Path -> KeyMap v -> Path -> Key -> v -> KeyMap v twoLeaf [] _ _ _ _ = error ("the path ran out of segments in twoLeaf case 1.") twoLeaf _ _ [] _ _ = error ("the path ran out of segments in twoLeaf case 1.") -twoLeaf (i:is) leaf1 (j:js) k2 v2 - | i==j = One i (twoLeaf is leaf1 js k2 v2) - | otherwise = if i < j - then Two (setBits [i,j]) leaf1 (Leaf k2 v2) - else Two (setBits [i,j]) (Leaf k2 v2) leaf1 +twoLeaf (i : is) leaf1 (j : js) k2 v2 + | i == j = One i (twoLeaf is leaf1 js k2 v2) + | otherwise = + if i < j + then Two (setBits [i, j]) leaf1 (Leaf k2 v2) + else Two (setBits [i, j]) (Leaf k2 v2) leaf1 insertWithKey :: (Key -> v -> v -> v) -> Key -> v -> KeyMap v -> KeyMap v insertWithKey f k v m = insertWithKey' f (keyPath k) k v m insertWith :: (t -> t -> t) -> Key -> t -> KeyMap t -> KeyMap t -insertWith f k v m = insertWithKey' (\ _ key val -> f key val) (keyPath k) k v m +insertWith f k v m = insertWithKey' (\_ key val -> f key val) (keyPath k) k v m insert :: Key -> v -> KeyMap v -> KeyMap v -insert k v m = insertWithKey' (\ _key new _old -> new) (keyPath k) k v m +insert k v m = insertWithKey' (\_key new _old -> new) (keyPath k) k v m -fromList :: [(Key,v)] -> KeyMap v +fromList :: [(Key, v)] -> KeyMap v fromList ps = foldl' accum Empty ps - where accum ans (k,v) = insert k v ans + where + accum ans (k, v) = insert k v ans -toList :: KeyMap v -> [(Key,v)] +toList :: KeyMap v -> [(Key, v)] toList km = foldWithDescKey accum [] km - where accum k v ans = (k,v):ans + where + accum k v ans = (k, v) : ans -- ================================================================= -- Deletion @@ -272,31 +284,32 @@ toList km = foldWithDescKey accum [] km -- | Delete the Key encoded in the Path from the KeyMap delete' :: Path -> Key -> KeyMap v -> KeyMap v delete' [] _key hm = hm -- Removing a bogus key, leaves 'hm' unchanged -delete' _ _key Empty = Empty -delete' _ k (hm@(Leaf k2 _)) = if k==k2 then Empty else hm -delete' (i:is) k (hm@(One j x)) = if i==j then oneE j (delete' is k x) else hm -delete' (i:is) k (hm@(Two bmap x y)) = - if testBit bmap i - then twoE bmap (delete' is k x) (delete' is k y) - else hm -delete' (i:is) k (hm@(BitmapIndexed bmap arr)) = - if testBit bmap i - then let m = setBit 0 i - j = sparseIndex bmap m - result = delete' is k (index arr j) - -- Consume an upwards floating Empty by removing that element from the array - in case result of - Empty -> bitmapE (clearBit bmap i) (remove arr j) - _ -> BitmapIndexed bmap (update arr j result) - else hm -delete' (i:is) k (Full arr) = - let m = setBit 0 i - j = sparseIndex fullNodeMask m - result = delete' is k (index arr j) - -- Consume an upwards floating Empty by removing that element from the array - in case result of +delete' _ _key Empty = Empty +delete' _ k (hm@(Leaf k2 _)) = if k == k2 then Empty else hm +delete' (i : is) k (hm@(One j x)) = if i == j then oneE j (delete' is k x) else hm +delete' (i : is) k (hm@(Two bmap x y)) = + if testBit bmap i + then twoE bmap (delete' is k x) (delete' is k y) + else hm +delete' (i : is) k (hm@(BitmapIndexed bmap arr)) = + if testBit bmap i + then + let m = setBit 0 i + j = sparseIndex bmap m + result = delete' is k (index arr j) + in -- Consume an upwards floating Empty by removing that element from the array + case result of + Empty -> bitmapE (clearBit bmap i) (remove arr j) + _ -> BitmapIndexed bmap (update arr j result) + else hm +delete' (i : is) k (Full arr) = + let m = setBit 0 i + j = sparseIndex fullNodeMask m + result = delete' is k (index arr j) + in -- Consume an upwards floating Empty by removing that element from the array + case result of Empty -> BitmapIndexed (clearBit fullNodeMask i) (remove arr j) - _ -> Full(update arr j result) + _ -> Full (update arr j result) delete :: Key -> KeyMap v -> KeyMap v delete k hm = delete' (keyPath k) k hm @@ -328,69 +341,76 @@ bitmapE bmap arr = bitmapIndexedOrFull bmap arr -- ================================================================ -- aggregation in ascending order of keys -foldWithAscKey :: (ans -> Key -> v -> ans) -> ans -> KeyMap v -> ans +foldWithAscKey :: (ans -> Key -> v -> ans) -> ans -> KeyMap v -> ans foldWithAscKey _ !ans Empty = ans foldWithAscKey accum !ans (Leaf k v) = accum ans k v foldWithAscKey accum !ans (One _ x) = foldWithAscKey accum ans x foldWithAscKey accum !ans (Two _ x y) = foldWithAscKey accum (foldWithAscKey accum ans x) y foldWithAscKey accum !ans0 (BitmapIndexed _ arr) = loop ans0 0 - where n = isize arr - loop !ans i | i >= n = ans - loop !ans i = loop (foldWithAscKey accum ans (index arr i)) (i+1) + where + n = isize arr + loop !ans i | i >= n = ans + loop !ans i = loop (foldWithAscKey accum ans (index arr i)) (i + 1) foldWithAscKey accum !ans0 (Full arr) = loop ans0 0 - where n = isize arr - loop !ans i | i >= n = ans - loop !ans i = loop (foldWithAscKey accum ans (index arr i)) (i+1) + where + n = isize arr + loop !ans i | i >= n = ans + loop !ans i = loop (foldWithAscKey accum ans (index arr i)) (i + 1) sizeKeyMap :: KeyMap v -> Int -sizeKeyMap x = foldWithAscKey (\ ans _k _v -> ans+1) 0 x +sizeKeyMap x = foldWithAscKey (\ans _k _v -> ans + 1) 0 x -- ================================================================ -- aggregation in descending order of keys -foldWithDescKey :: (Key -> v -> ans -> ans) -> ans -> KeyMap v -> ans +foldWithDescKey :: (Key -> v -> ans -> ans) -> ans -> KeyMap v -> ans foldWithDescKey _ !ans Empty = ans foldWithDescKey accum !ans (Leaf k v) = accum k v ans foldWithDescKey accum !ans (One _ x) = foldWithDescKey accum ans x foldWithDescKey accum !ans (Two _ x y) = foldWithDescKey accum (foldWithDescKey accum ans y) x -foldWithDescKey accum !ans0 (BitmapIndexed _ arr) = loop ans0 (n-1) - where n = isize arr - loop !ans i | i < 0 = ans - loop !ans i = loop (foldWithDescKey accum ans (index arr i)) (i-1) -foldWithDescKey accum !ans0 (Full arr) = loop ans0 (n-1) - where n = isize arr - loop !ans i | i < 0 = ans - loop !ans i = loop (foldWithDescKey accum ans (index arr i)) (i-1) +foldWithDescKey accum !ans0 (BitmapIndexed _ arr) = loop ans0 (n -1) + where + n = isize arr + loop !ans i | i < 0 = ans + loop !ans i = loop (foldWithDescKey accum ans (index arr i)) (i -1) +foldWithDescKey accum !ans0 (Full arr) = loop ans0 (n -1) + where + n = isize arr + loop !ans i | i < 0 = ans + loop !ans i = loop (foldWithDescKey accum ans (index arr i)) (i -1) -- ================================================================== --- Lookup a key +-- Lookup a key -lookupHM :: Key-> KeyMap v -> Maybe v +lookupHM :: Key -> KeyMap v -> Maybe v lookupHM key km = go (keyPath key) km - where go _ Empty = Nothing - go _ (Leaf key2 v) = if key == key2 then Just v else Nothing - go [] _ = Nothing -- Path is empty, we will never find it. - go (j:js) (One i x) = if i==j then go js x else Nothing - go (j:js) (Two bm x0 x1) = - if testBit bm j - then (if i==0 then go js x0 else go js x1) - else Nothing - where i = indexFromSegment bm j - - go (j:js) (BitmapIndexed bm arr) = - if testBit bm j - then go js (index arr i) - else Nothing - where i = indexFromSegment bm j - - go (j:js) (Full arr) = -- Every possible bit is set, so no testBit call necessary - go js (index arr i) - where i = indexFromSegment fullNodeMask j - + where + go _ Empty = Nothing + go _ (Leaf key2 v) = if key == key2 then Just v else Nothing + go [] _ = Nothing -- Path is empty, we will never find it. + go (j : js) (One i x) = if i == j then go js x else Nothing + go (j : js) (Two bm x0 x1) = + if testBit bm j + then (if i == 0 then go js x0 else go js x1) + else Nothing + where + i = indexFromSegment bm j + go (j : js) (BitmapIndexed bm arr) = + if testBit bm j + then go js (index arr i) + else Nothing + where + i = indexFromSegment bm j + go (j : js) (Full arr) = + -- Every possible bit is set, so no testBit call necessary + go js (index arr i) + where + i = indexFromSegment fullNodeMask j + -- ========================================================= -- map -mapWithKey :: (Key -> a -> b) -> KeyMap a -> KeyMap b +mapWithKey :: (Key -> a -> b) -> KeyMap a -> KeyMap b mapWithKey _ Empty = Empty mapWithKey f (Leaf k2 v) = (Leaf k2 (f k2 v)) mapWithKey f (One i x) = One i (mapWithKey f x) @@ -399,136 +419,140 @@ mapWithKey f (BitmapIndexed bm arr) = BitmapIndexed bm (fmap (mapWithKey f) arr) mapWithKey f (Full arr) = Full (fmap (mapWithKey f) arr) instance Functor KeyMap where - fmap f x = mapWithKey (\ _ v -> f v) x + fmap f x = mapWithKey (\_ v -> f v) x -- ========================================================== -- Split a KeyMap into 3 parts -- | return (smaller than 'key', has key?, greater than 'key') -splitKeyMap:: Path -> Key -> KeyMap v -> (KeyMap v,Maybe v,KeyMap v) -splitKeyMap [] _key hm = (hm,Nothing,Empty) -splitKeyMap (i:is) key hm = +splitKeyMap :: Path -> Key -> KeyMap v -> (KeyMap v, Maybe v, KeyMap v) +splitKeyMap [] _key hm = (hm, Nothing, Empty) +splitKeyMap (i : is) key hm = case splitBySegment i hm of - (less,x,greater) -> + (less, x, greater) -> case x of - Empty -> (build less,Nothing,build greater) - (Leaf k v) -> (build less,if key==k then (Just v) else Nothing,build greater) - other -> (reconstruct i less less1,ans,reconstruct i greater greater1) - where (less1,ans,greater1) = splitKeyMap is key other - -splitBySegment :: Segment -> KeyMap v -> ([(Segment,KeyMap v)],KeyMap v, [(Segment,KeyMap v)]) -splitBySegment i _x | i < 0 = ([],Empty,[]) -splitBySegment i _x | i > intSize = ([],Empty,[]) -splitBySegment _ Empty = ([],Empty,[]) -splitBySegment _ (x@(Leaf _ _)) = ([],x,[]) + Empty -> (build less, Nothing, build greater) + (Leaf k v) -> (build less, if key == k then (Just v) else Nothing, build greater) + other -> (reconstruct i less less1, ans, reconstruct i greater greater1) + where + (less1, ans, greater1) = splitKeyMap is key other + +splitBySegment :: Segment -> KeyMap v -> ([(Segment, KeyMap v)], KeyMap v, [(Segment, KeyMap v)]) +splitBySegment i _x | i < 0 = ([], Empty, []) +splitBySegment i _x | i > intSize = ([], Empty, []) +splitBySegment _ Empty = ([], Empty, []) +splitBySegment _ (x@(Leaf _ _)) = ([], x, []) splitBySegment i (x@(One j y)) = - case compare i j of - LT -> ([],Empty,[(i,x)]) - EQ -> ([],y,[]) - GT -> ([(i,x)],Empty,[]) -splitBySegment i (Two bmap l h) = splitArrAtSeg i bmap (fromlist [l,h]) + case compare i j of + LT -> ([], Empty, [(i, x)]) + EQ -> ([], y, []) + GT -> ([(i, x)], Empty, []) +splitBySegment i (Two bmap l h) = splitArrAtSeg i bmap (fromlist [l, h]) splitBySegment i (BitmapIndexed bmap arr) = splitArrAtSeg i bmap arr -splitBySegment i (Full arr) = splitArrAtSeg i fullNodeMask arr +splitBySegment i (Full arr) = splitArrAtSeg i fullNodeMask arr -- | Split an PArray at a particular Segment. -splitArrAtSeg:: Segment -> Bitmap -> PArray (KeyMap v) -> ([(Int, KeyMap v)], KeyMap v, [(Int, KeyMap v)]) +splitArrAtSeg :: Segment -> Bitmap -> PArray (KeyMap v) -> ([(Int, KeyMap v)], KeyMap v, [(Int, KeyMap v)]) splitArrAtSeg i bmap arr = (takeWhile smaller ps, match, dropWhile tooSmall ps) - where ps = zip (bitmapToList bmap) (tolist arr) - smaller (j,_) = j < i - tooSmall (j,_) = j <= i - same (j,_) = i==j - match = case filter same ps of - [] -> Empty - ((_,x):_) -> x - --- | reconstruct a KeyMap from list of previous Segments, and a single KeyMap from the next Segment + where + ps = zip (bitmapToList bmap) (tolist arr) + smaller (j, _) = j < i + tooSmall (j, _) = j <= i + same (j, _) = i == j + match = case filter same ps of + [] -> Empty + ((_, x) : _) -> x + +-- | reconstruct a KeyMap from list of previous Segments, and a single KeyMap from the next Segment reconstruct :: Segment -> [(Segment, KeyMap v)] -> KeyMap v -> KeyMap v reconstruct _ xs Empty = build xs -reconstruct seg xs x = build (insertAscending (seg,x) xs) +reconstruct seg xs x = build (insertAscending (seg, x) xs) -- | insert a Segment pair in ascending order of Segments, Keep it sorted. -insertAscending:: (Segment, KeyMap v) -> [(Segment, KeyMap v)] -> [(Segment, KeyMap v)] -insertAscending (i,x) [] = [(i,x)] -insertAscending (i,x) (ws@((y@(j,_)):ys)) = +insertAscending :: (Segment, KeyMap v) -> [(Segment, KeyMap v)] -> [(Segment, KeyMap v)] +insertAscending (i, x) [] = [(i, x)] +insertAscending (i, x) (ws@((y@(j, _)) : ys)) = case compare i j of - LT -> (i,x):ws - GT -> y : insertAscending (i,x) ys - EQ -> (i,x):ys -- We know that the Segement i should never appear in the list + LT -> (i, x) : ws + GT -> y : insertAscending (i, x) ys + EQ -> (i, x) : ys -- We know that the Segement i should never appear in the list -- | Build a KeyMap out of a list of Segment pairs. build :: [(Segment, KeyMap v)] -> KeyMap v build [] = Empty -build [(_,x)] = x -build [(j,x),(k,y)] = Two (setBits [j,k]) x y +build [(_, x)] = x +build [(j, x), (k, y)] = Two (setBits [j, k]) x y build ps = bitmapIndexedOrFull (setBits (map fst ps)) (fromlist (map snd ps)) - testSplit2 :: Int -> IO () -testSplit2 i = putStrLn (unlines [show hm, " ",show pathx," ",show a, " ",show b, " ",show c]) - where keys = makeKeys 99 1000 - ps = zip (take 12 keys) [0..] - hm :: KeyMap Int - hm = fromList ps - pathx = (keyPath (keys !! i)) - (a,b,c) = splitKeyMap pathx (keys !! i) hm +testSplit2 i = putStrLn (unlines [show hm, " ", show pathx, " ", show a, " ", show b, " ", show c]) + where + keys = makeKeys 99 1000 + ps = zip (take 12 keys) [0 ..] + hm :: KeyMap Int + hm = fromList ps + pathx = (keyPath (keys !! i)) + (a, b, c) = splitKeyMap pathx (keys !! i) hm -- ========================================================= -- UnionWith -- | Make an array of size 1, with 'x' stored at index 0. array1 :: a -> PArray a -array1 x = fst(withMutArray 1 (\ marr -> mwrite marr 0 x)) +array1 x = fst (withMutArray 1 (\marr -> mwrite marr 0 x)) -- | Make an array of size 2, with 'x' stored at index 0. array2 :: a -> a -> PArray a -array2 x y = fst(withMutArray 2 (\ marr -> mwrite marr 0 x >> mwrite marr 1 y)) - +array2 x y = fst (withMutArray 2 (\marr -> mwrite marr 0 x >> mwrite marr 1 y)) -- | Turn a (KeyMap v) into a BitMap and an PArray (KeyMap v) -toSegArray :: Int -> KeyMap v -> (Bitmap,PArray (KeyMap v)) -toSegArray _ Empty = error ("not possible: Empty in toSegArray") -toSegArray n (l@(Leaf k _)) = (setBit 0 (keyPath k !! n),array1 l) -toSegArray _ (One i x) = (setBits [i],array1 x) +toSegArray :: Int -> KeyMap v -> (Bitmap, PArray (KeyMap v)) +toSegArray _ Empty = error ("not possible: Empty in toSegArray") +toSegArray n (l@(Leaf k _)) = (setBit 0 (keyPath k !! n), array1 l) +toSegArray _ (One i x) = (setBits [i], array1 x) toSegArray _ (Two bm x y) = (bm, array2 x y) -toSegArray _ (BitmapIndexed bm arr) = (bm,arr) -toSegArray _ (Full arr) = (fullNodeMask,arr) +toSegArray _ (BitmapIndexed bm arr) = (bm, arr) +toSegArray _ (Full arr) = (fullNodeMask, arr) union2 :: Int -> (Key -> v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v union2 _ _ Empty Empty = Empty union2 _ _ x Empty = x union2 _ _ Empty y = y union2 n combine x y = bitmapIndexedOrFull bmap arrAll - where (bmx,arrx) = toSegArray n x - (bmy,arry) = toSegArray n y - (bmap,arrAll) = mergeArrayWithBitMaps union3 bmx arrx bmy arry - union3 (Leaf k1 v1) (Leaf k2 v2) | k1==k2 = Leaf k1 (combine k1 v1 v2) - union3 a b = union2 (n+1) combine a b - -mergeArrayWithBitMaps :: (v -> v -> v) -> Bitmap -> PArray v -> Bitmap -> PArray v -> (Bitmap,PArray v) -mergeArrayWithBitMaps combine bm1 arr1 bm2 arr2 = (bmBoth,fst (withMutArray size action)) - where bmBoth = bm1 .|. bm2 - size = popCount bmBoth - segments = bitmapToList bmBoth - action marr3 = (loop segments) - where loop [] = pure () - loop (i:is) = do - let j1 = (indexFromSegment bm1 i) - j2 = (indexFromSegment bm2 i) - j3 = indexFromSegment bmBoth i - case (testBit bm1 i, testBit bm2 i) of - (True, True) -> mwrite marr3 j3 (combine (index arr1 j1) (index arr2 j2)) - (True,False) -> mwrite marr3 j3 (index arr1 j1) - (False,True) -> mwrite marr3 j3 (index arr2 j2) - (False,False) -> pure () - loop is + where + (bmx, arrx) = toSegArray n x + (bmy, arry) = toSegArray n y + (bmap, arrAll) = mergeArrayWithBitMaps union3 bmx arrx bmy arry + union3 (Leaf k1 v1) (Leaf k2 v2) | k1 == k2 = Leaf k1 (combine k1 v1 v2) + union3 a b = union2 (n + 1) combine a b + +mergeArrayWithBitMaps :: (v -> v -> v) -> Bitmap -> PArray v -> Bitmap -> PArray v -> (Bitmap, PArray v) +mergeArrayWithBitMaps combine bm1 arr1 bm2 arr2 = (bmBoth, fst (withMutArray size action)) + where + bmBoth = bm1 .|. bm2 + size = popCount bmBoth + segments = bitmapToList bmBoth + action marr3 = (loop segments) + where + loop [] = pure () + loop (i : is) = do + let j1 = (indexFromSegment bm1 i) + j2 = (indexFromSegment bm2 i) + j3 = indexFromSegment bmBoth i + case (testBit bm1 i, testBit bm2 i) of + (True, True) -> mwrite marr3 j3 (combine (index arr1 j1) (index arr2 j2)) + (True, False) -> mwrite marr3 j3 (index arr1 j1) + (False, True) -> mwrite marr3 j3 (index arr2 j2) + (False, False) -> pure () + loop is bmapA, bmapB :: Bitmap -bmapA = setBits [0,3,6,11,15] -bmapB = setBits [1,3,5,9,11,14] +bmapA = setBits [0, 3, 6, 11, 15] +bmapB = setBits [1, 3, 5, 9, 11, 14] arrA, arrB :: PArray Int -arrA = fromlist [0,3,6,11,15] -arrB = fromlist [1,3,5,9,11,14] +arrA = fromlist [0, 3, 6, 11, 15] +arrB = fromlist [1, 3, 5, 9, 11, 14] testmergeBm :: (Bitmap, PArray Int) testmergeBm = mergeArrayWithBitMaps (+) bmapA arrA bmapB arrB @@ -537,9 +561,9 @@ unionWithKey :: (Key -> v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v unionWithKey comb x y = union2 0 comb x y unionWith :: (v -> v -> v) -> KeyMap v -> KeyMap v -> KeyMap v -unionWith comb x y = union2 0 (\ _k a b -> comb a b) x y +unionWith comb x y = union2 0 (\_k a b -> comb a b) x y -hm10, hm11, hm12:: KeyMap Int +hm10, hm11, hm12 :: KeyMap Int hm10 = fromList (take 5 pairs) hm11 = fromList (take 5 (drop 4 pairs)) hm12 = unionWith (+) hm10 hm11 @@ -548,18 +572,18 @@ hm12 = unionWith (+) hm10 hm11 -- Maximum and Minimum Key -- | Get the smallest key, NOT the smallest value -getMin :: KeyMap v -> Maybe (Key,v) +getMin :: KeyMap v -> Maybe (Key, v) getMin Empty = Nothing -getMin (Leaf k v) = Just (k,v) +getMin (Leaf k v) = Just (k, v) getMin (One _ x) = getMin x getMin (Two _ x _) = getMin x getMin (BitmapIndexed _ arr) = getMin (index arr 0) getMin (Full arr) = getMin (index arr 0) -- | Get the largest key, NOT the largest value -getMax :: KeyMap v -> Maybe (Key,v) +getMax :: KeyMap v -> Maybe (Key, v) getMax Empty = Nothing -getMax (Leaf k v) = Just (k,v) +getMax (Leaf k v) = Just (k, v) getMax (One _ x) = getMax x getMax (Two _ _ y) = getMax y getMax (BitmapIndexed _ arr) = getMax (index arr (isize arr - 1)) @@ -570,28 +594,27 @@ getMax (Full arr) = getMax (index arr (isize arr - 1)) -- | The (key,value) pairs (i.e. a subset) of 'h1' where key is in the domain of both 'h1' and 'h2' intersect :: KeyMap v -> KeyMap v -> KeyMap v intersect map1 map2 = - case maxMinOf map1 map2 of - Nothing -> Empty - Just k -> leapfrog k map1 map2 Empty + case maxMinOf map1 map2 of + Nothing -> Empty + Just k -> leapfrog k map1 map2 Empty --- | Accumulate a new Key map, by adding the key value pairs to 'ans', for +-- | Accumulate a new Key map, by adding the key value pairs to 'ans', for -- the Keys that appear in both maps 'x' and 'y'. The key 'k' should -- be the smallest key in either 'x' or 'y', used to get started. leapfrog :: Key -> KeyMap v -> KeyMap v -> KeyMap v -> KeyMap v leapfrog k x y ans = - case (lub k x,lub k y) of - (Just(k1,v1,h1),Just(k2,_,h2)) -> - case maxMinOf h1 h2 of - Just k3 -> leapfrog k3 h1 h2 (if k1==k2 then insert k1 v1 ans else ans) - Nothing -> (if k1==k2 then insert k1 v1 ans else ans) - _ -> ans - + case (lub k x, lub k y) of + (Just (k1, v1, h1), Just (k2, _, h2)) -> + case maxMinOf h1 h2 of + Just k3 -> leapfrog k3 h1 h2 (if k1 == k2 then insert k1 v1 ans else ans) + Nothing -> (if k1 == k2 then insert k1 v1 ans else ans) + _ -> ans -- | Get the larger of the two min keys of 'x' and 'y'. Nothing if either is Empty. maxMinOf :: KeyMap v1 -> KeyMap v2 -> Maybe Key -maxMinOf x y = case (getMin x,getMin y) of - (Just (k1,_),Just (k2,_)) -> Just(max k1 k2) - _ -> Nothing +maxMinOf x y = case (getMin x, getMin y) of + (Just (k1, _), Just (k2, _)) -> Just (max k1 k2) + _ -> Nothing -- ================================================================================== -- Given a Key, Split a KeyMap into a least upper bound on the Key and everything else @@ -602,94 +625,98 @@ maxMinOf x y = case (getMin x,getMin y) of lub :: Key -> KeyMap v -> Maybe (Key, v, KeyMap v) lub key hm = case splitKeyMap (keyPath key) key hm of - (_,Just v,Empty) -> Just(key,v,Empty) - (_,Just v,hm2) -> Just(key,v,hm2) - (_,Nothing,hm1) -> - case getMin hm1 of - Just (k,v) -> Just(k,v,hm1) - Nothing -> Nothing - + (_, Just v, Empty) -> Just (key, v, Empty) + (_, Just v, hm2) -> Just (key, v, hm2) + (_, Nothing, hm1) -> + case getMin hm1 of + Just (k, v) -> Just (k, v, hm1) + Nothing -> Nothing -- | The smallest (key and value) greater-or-equal to 'key', plus a new KeyMap -- that includes everything greater than that lub key. mylub :: Key -> KeyMap v -> Maybe (Key, v, KeyMap v) mylub key mp = go (keyPath key) mp - where go [] _ = Nothing - go _ Empty = Nothing - go _ (Leaf k v) = if k >= key then Just(k,v,Empty) else Nothing - go (i:is) (One j x) = - case compare i j of - EQ -> go is x - LT -> go is x - GT -> Nothing - go path (Two bm x y) = mylubArray path bm (fromlist [x,y]) - go path (BitmapIndexed bm arr) = mylubArray path bm arr - go path (Full arr) = mylubArray path fullNodeMask arr - mylubArray [] _ _ = Nothing - mylubArray (i:is) bm arr = - case findFirstLargerSegment key arr i bm of + where + go [] _ = Nothing + go _ Empty = Nothing + go _ (Leaf k v) = if k >= key then Just (k, v, Empty) else Nothing + go (i : is) (One j x) = + case compare i j of + EQ -> go is x + LT -> go is x + GT -> Nothing + go path (Two bm x y) = mylubArray path bm (fromlist [x, y]) + go path (BitmapIndexed bm arr) = mylubArray path bm arr + go path (Full arr) = mylubArray path fullNodeMask arr + mylubArray [] _ _ = Nothing + mylubArray (i : is) bm arr = + case findFirstLargerSegment key arr i bm of + Nothing -> Nothing + Just (n, j, newbm) -> + case go is (index arr n) of Nothing -> Nothing - Just (n,j,newbm) -> - case go is (index arr n) of - Nothing -> Nothing - Just (k,v,Empty) -> -- This case occurs only when (index arr n) is a (Leaf kk v) - if k==key -- And kk >= key, but the two cases: 1) kk=key and kk>key differ - then let arr2 = (slice (n+1) (isize arr - 1) arr) - in Just (k,v, bitmapIndexedOrFull (clearBit newbm j) arr2) - else Just (k,v, bitmapIndexedOrFull newbm (suffixAfterIndex n (Leaf k v) arr)) - Just (k,v,keymap) -> Just (k,v, bitmapIndexedOrFull newbm (suffixAfterIndex n keymap arr)) - + Just (k, v, Empty) -> + -- This case occurs only when (index arr n) is a (Leaf kk v) + if k == key -- And kk >= key, but the two cases: 1) kk=key and kk>key differ + then + let arr2 = (slice (n + 1) (isize arr - 1) arr) + in Just (k, v, bitmapIndexedOrFull (clearBit newbm j) arr2) + else Just (k, v, bitmapIndexedOrFull newbm (suffixAfterIndex n (Leaf k v) arr)) + Just (k, v, keymap) -> Just (k, v, bitmapIndexedOrFull newbm (suffixAfterIndex n keymap arr)) -- | 'seg' is the current Segment in the Path of 'key'. 'bm' is the set of Segments that -- are stored in 'arr'. We are looking for the index, 'i', of the first KeyMap in 'arr' where -- there is some key that is greater than or equal to 'key'. Since 'j' is the first segment -- of things stored at index 'i', we can skip any index whose first segment 'j' is less than 'seg'. -findFirstLargerSegment :: Key -> PArray (KeyMap v) -> Segment -> Bitmap -> Maybe (Int,Segment,Bitmap) +findFirstLargerSegment :: Key -> PArray (KeyMap v) -> Segment -> Bitmap -> Maybe (Int, Segment, Bitmap) findFirstLargerSegment key arr seg bm - | not(isize arr == length segmentsFromArray) = error ("bitmp does not describe array") - | otherwise = loop 0 bm segmentsFromArray - where segmentsFromArray = (bitmapToList bm) - loop _ _ [] = Nothing - loop i b (j:js) = - if (j < seg) - then loop (i+1) (clearBit b j) js - else case getMax (index arr i) of - Nothing -> loop (i+1) (clearBit b j) js - Just (k,_) -> - if k < key - then loop (i+1) (clearBit b j) js - else Just(i,j,b) + | not (isize arr == length segmentsFromArray) = error ("bitmp does not describe array") + | otherwise = loop 0 bm segmentsFromArray + where + segmentsFromArray = (bitmapToList bm) + loop _ _ [] = Nothing + loop i b (j : js) = + if (j < seg) + then loop (i + 1) (clearBit b j) js + else case getMax (index arr i) of + Nothing -> loop (i + 1) (clearBit b j) js + Just (k, _) -> + if k < key + then loop (i + 1) (clearBit b j) js + else Just (i, j, b) testlub :: [(Int, Bool)] -testlub = [ (i,mylub key kmap == lub key kmap) | i <- [0..55], key <- [bpairs !! i] ] - where kmap = fromList (take 50 pairs) +testlub = [(i, mylub key kmap == lub key kmap) | i <- [0 .. 55], key <- [bpairs !! i]] + where + kmap = fromList (take 50 pairs) kmap12 :: KeyMap Int kmap12 = fromList (take 12 pairs) - testIntersect :: KeyMap Int testIntersect = intersect h1x h2x h1x, h2x :: KeyMap Int -h1x = fromList [pairs !! 3,pairs !! 5, pairs !! 11, pairs !! 6, pairs !! 4] -h2x = fromList [pairs !! 3,pairs !! 7, pairs !! 4, pairs !! 6, pairs !! 8] +h1x = fromList [pairs !! 3, pairs !! 5, pairs !! 11, pairs !! 6, pairs !! 4] +h2x = fromList [pairs !! 3, pairs !! 7, pairs !! 4, pairs !! 6, pairs !! 8] + -- ========================================================= -- | Domain restrict 'hkm' to those Keys found in 's'. This algorithm -- assumes the set 's' is small compared to 'hm'. domainRestrict :: KeyMap v -> Set Key -> KeyMap v domainRestrict hm s = Set.foldl' accum Empty s - where accum ans key = - case lookupHM key hm of - Nothing -> ans - Just v -> insert key v ans + where + accum ans key = + case lookupHM key hm of + Nothing -> ans + Just v -> insert key v ans hmdr :: KeyMap Int hmdr = fromList (take 10 pairs) -set:: Set Key -set = Set.fromList [ bpairs !! 3, bpairs !! 8, bpairs !! 20] +set :: Set Key +set = Set.fromList [bpairs !! 3, bpairs !! 8, bpairs !! 20] -- ========================================== -- Operations on Bits and Bitmaps @@ -718,8 +745,8 @@ bitmapIndexedOrFull _ arr | isize arr == 0 = Empty bitmapIndexedOrFull b arr | isize arr == 1 = One (head (bitmapToList b)) (index arr 0) bitmapIndexedOrFull b arr | isize arr == 2 = Two b (index arr 0) (index arr 1) bitmapIndexedOrFull b arr - | b == fullNodeMask = Full arr - | otherwise = BitmapIndexed b arr + | b == fullNodeMask = Full arr + | otherwise = BitmapIndexed b arr {-# INLINE bitmapIndexedOrFull #-} -- | A bitmask with the 'bitsPerSegment' least significant bits set. @@ -728,7 +755,7 @@ fullNodeMask = complement (complement 0 `unsafeShiftL` maxChildren) {-# INLINE fullNodeMask #-} setBits :: [Int] -> Bitmap -setBits xs = foldl' setBit 0 xs +setBits xs = foldl' setBit 0 xs oneBits :: Bitmap oneBits = (complement (zeroBits :: Word64)) @@ -737,13 +764,12 @@ oneBits = (complement (zeroBits :: Word64)) ith :: Bitmap -> Int -> Int ith bmap i = (bitmapToList bmap !! i) - -- | A Bitmap represents a set. Split it into 3 parts (set1,present,set2) -- where 'set1' is all elements in 'bm' less than 'i' -- 'present' is if 'i' is in the set 'bm' -- 'set2' is all elements in 'bm' greater than 'i' -- We do this by using the precomputed masks: lessMasks, greaterMasks -splitBitmap :: Bitmap -> Int -> (Bitmap,Bool,Bitmap) +splitBitmap :: Bitmap -> Int -> (Bitmap, Bool, Bitmap) splitBitmap bm i = (bm .&. (index lessMasks i), testBit bm i, bm .&. (index greaterMasks i)) {- @@ -771,77 +797,80 @@ at position i=4 -} lessMasks, greaterMasks :: PArray Bitmap -lessMasks = fromlist [ setBits [0 .. i-1] | i <- [0..63]] -greaterMasks = fromlist [ setBits [i+1 .. 63] | i <- [0..63]] +lessMasks = fromlist [setBits [0 .. i -1] | i <- [0 .. 63]] +greaterMasks = fromlist [setBits [i + 1 .. 63] | i <- [0 .. 63]] testsplitBitmap :: Int -> ([Int], Bool, [Int]) -testsplitBitmap i = (bitmapToList l,b,bitmapToList g) - where (l,b,g) = splitBitmap (complement (zeroBits :: Word64)) i - +testsplitBitmap i = (bitmapToList l, b, bitmapToList g) + where + (l, b, g) = splitBitmap (complement (zeroBits :: Word64)) i -- ======================================================================= -- Operations to make new arrays out off old ones with small changes -- | /O(n)/ Make a copy of an Array that removes the 'i'th element. Decreasing the size by 1. remove :: PArray a -> Int -> PArray a -remove arr i = if i<0 || i > n - then error ("index out of bounds in 'remove' "++show i++" not in range (0,"++show (isize arr -1)++")") - else fst(withMutArray n action) - where n = (isize arr) - 1 - action marr = do - mcopy marr 0 arr 0 i - mcopy marr i arr (i+1) (n-i) - +remove arr i = + if i < 0 || i > n + then error ("index out of bounds in 'remove' " ++ show i ++ " not in range (0," ++ show (isize arr -1) ++ ")") + else fst (withMutArray n action) + where + n = (isize arr) - 1 + action marr = do + mcopy marr 0 arr 0 i + mcopy marr i arr (i + 1) (n - i) -- | /O(n)/ Overwrite the element at the given position in this array, update :: PArray t -> Int -> t -> PArray t update arr i _ - | i<0 || i >= (isize arr) - = error ("index out of bounds in 'update' "++show i++" not in range (0,"++show (isize arr -1)++")") -update arr i t = fst(withMutArray size1 action) - where size1 = isize arr - action marr = do - mcopy marr 0 arr 0 i - mwrite marr i t - mcopy marr (i+1) arr (i+1) (size1 - (i+1)) - + | i < 0 || i >= (isize arr) = + error ("index out of bounds in 'update' " ++ show i ++ " not in range (0," ++ show (isize arr -1) ++ ")") +update arr i t = fst (withMutArray size1 action) + where + size1 = isize arr + action marr = do + mcopy marr 0 arr 0 i + mwrite marr i t + mcopy marr (i + 1) arr (i + 1) (size1 - (i + 1)) -- | /O(n)/ Insert an element at the given position in this array, -- increasing its size by one. insertM :: PArray e -> Int -> e -> ST s (PArray e) insertM ary idx b - | idx < 0 || idx > counter = error ("Bounds check in 'insertAt' "++show idx++" not in range 0.."++show (counter)) - | otherwise = do - mary <- mnew (counter+1) - mcopy mary 0 ary 0 idx - mwrite mary idx b - mcopy mary (idx+1) ary idx (counter-idx) - mfreeze mary - where !counter = isize ary + | idx < 0 || idx > counter = error ("Bounds check in 'insertAt' " ++ show idx ++ " not in range 0.." ++ show (counter)) + | otherwise = do + mary <- mnew (counter + 1) + mcopy mary 0 ary 0 idx + mwrite mary idx b + mcopy mary (idx + 1) ary idx (counter - idx) + mfreeze mary + where + !counter = isize ary {-# INLINE insertM #-} -- | /O(n)/ Insert an element at the given position in this array, -- increasing its size by one. insertAt :: PArray e -> Int -> e -> PArray e -insertAt arr idx b = runST(insertM arr idx b) +insertAt arr idx b = runST (insertM arr idx b) {-# INLINE insertAt #-} -- | /O(n)/ Make a new array which has a repacement 'v' for index 'n', and copies the values -- at indices greater than 'n'. The values at indices before 'n' are thrown away. -- The size of the output, is n smaller than the size of the input. suffixAfterIndex :: Int -> v -> PArray v -> PArray v -suffixAfterIndex n v arr = fst(withMutArray size action) - where size = ((isize arr) - n) - action marr = mwrite marr 0 v >> mcopy marr 1 arr (n+1) (size - 1) +suffixAfterIndex n v arr = fst (withMutArray size action) + where + size = ((isize arr) - n) + action marr = mwrite marr 0 v >> mcopy marr 1 arr (n + 1) (size - 1) {-# INLINE suffixAfterIndex #-} -- | Create a new Array of size 'n' filled with objects 'a' arrayOf :: Int -> a -> PArray a -arrayOf n a = runST $ do +arrayOf n a = runST $ do marr <- mnew n let loop i - | i < n = mwrite marr i a >> loop (i+1) - | otherwise = pure () + | i < n = mwrite marr i a >> loop (i + 1) + | otherwise = pure () loop 0 arr <- mfreeze marr pure arr @@ -851,123 +880,135 @@ arrayOf n a = runST $ do makeKeys :: Int -> Int -> [Key] makeKeys seed cnt = loop (mkStdGen seed) cnt [] - where loop _g i ans | i <= 0 = ans - loop g i ans = case genKey g of - (key,g2) -> loop g2 (i-1) (key : ans) + where + loop _g i ans | i <= 0 = ans + loop g i ans = case genKey g of + (key, g2) -> loop g2 (i -1) (key : ans) testt :: Int -> IO () testt n = do - let (hmap,output) = tests n - histArr = histo hmap - putStrLn output - putStrLn ("histogram "++show(tolist histArr)) - + let (hmap, output) = tests n + histArr = histo hmap + putStrLn output + putStrLn ("histogram " ++ show (tolist histArr)) tests :: Int -> (KeyMap Int, String) -tests n = (hashmap,unlines - [ "bits per level = "++show bitsPerSegment - , "num levels = "++show keyPathSize - , "empty = "++show empty - , "leaf = "++show leaf - , "one = "++show one - , "two = "++show two - , "bits = "++show bit - , "full = "++show full - , "hwords = "++show hwords - , "mwords = "++show mwords - , "diff = "++show(hwords - mwords)++" %"++show((hwords*100) `div` mwords) - , "depth = "++show (hdepth hashmap) - ]) - where hashmap = fromList (take n pairs) - mapmap = Map.fromList (take n pairs) - (empty,one,two,leaf,bit,full) = count hashmap - hwords = heapWords hashmap - mwords = heapWords mapmap - -count :: KeyMap v -> (Int,Int,Int,Stat Int,Stat Int,Int) -count x = go 0 x (0,0,0,mempty,mempty,0) - where go _ Empty (e,o,t,l,b,f) = (e+1,o,t,l,b,f) - go d (One _ y) (e,o,t,l,b,f) = go (1 + d) y (e,1+o,t,l,b,f) - go d (Two _ z y) (e,o,t,l,b,f) = go (1+d) y (go (1+d) z (e,o,1+t,l,b,f)) - go d (Leaf _ _) (e,o,t,l,b,f) = (e,o,t,add d l,b,f) - go d (BitmapIndexed _ arr) (e,o,t,l,b,f) = - foldr (go (length arr + d)) (e,o,t,l,add (length arr) b,f) arr - go d (Full arr) (e,o,t,l,b,f) = foldr (go (length arr + d)) (e,o,t,l,b,f+1) arr - -countIO:: HeapWords a => KeyMap a -> IO () +tests n = + ( hashmap, + unlines + [ "bits per level = " ++ show bitsPerSegment, + "num levels = " ++ show keyPathSize, + "empty = " ++ show empty, + "leaf = " ++ show leaf, + "one = " ++ show one, + "two = " ++ show two, + "bits = " ++ show bit, + "full = " ++ show full, + "hwords = " ++ show hwords, + "mwords = " ++ show mwords, + "diff = " ++ show (hwords - mwords) ++ " %" ++ show ((hwords * 100) `div` mwords), + "depth = " ++ show (hdepth hashmap) + ] + ) + where + hashmap = fromList (take n pairs) + mapmap = Map.fromList (take n pairs) + (empty, one, two, leaf, bit, full) = count hashmap + hwords = heapWords hashmap + mwords = heapWords mapmap + +count :: KeyMap v -> (Int, Int, Int, Stat Int, Stat Int, Int) +count x = go 0 x (0, 0, 0, mempty, mempty, 0) + where + go _ Empty (e, o, t, l, b, f) = (e + 1, o, t, l, b, f) + go d (One _ y) (e, o, t, l, b, f) = go (1 + d) y (e, 1 + o, t, l, b, f) + go d (Two _ z y) (e, o, t, l, b, f) = go (1 + d) y (go (1 + d) z (e, o, 1 + t, l, b, f)) + go d (Leaf _ _) (e, o, t, l, b, f) = (e, o, t, add d l, b, f) + go d (BitmapIndexed _ arr) (e, o, t, l, b, f) = + foldr (go (length arr + d)) (e, o, t, l, add (length arr) b, f) arr + go d (Full arr) (e, o, t, l, b, f) = foldr (go (length arr + d)) (e, o, t, l, b, f + 1) arr + +countIO :: HeapWords a => KeyMap a -> IO () countIO hashmap = do - putStrLn $ unlines - [ "bits per level = "++show bitsPerSegment - , "num levels = "++show keyPathSize - , "empty = "++show empty - , "leaf = "++show leaf - , "one = "++show one - , "two = "++show two - , "bits = "++show bit - , "full = "++show full - , "hwords = "++show hwords - , "depth = "++show (hdepth hashmap) - , "histogram ="++show hist + putStrLn $ + unlines + [ "bits per level = " ++ show bitsPerSegment, + "num levels = " ++ show keyPathSize, + "empty = " ++ show empty, + "leaf = " ++ show leaf, + "one = " ++ show one, + "two = " ++ show two, + "bits = " ++ show bit, + "full = " ++ show full, + "hwords = " ++ show hwords, + "depth = " ++ show (hdepth hashmap), + "histogram =" ++ show hist ] - where (empty,one,two,leaf,bit,full) = count hashmap - hist = histo hashmap - hwords = heapWords hashmap - + where + (empty, one, two, leaf, bit, full) = count hashmap + hist = histo hashmap + hwords = heapWords hashmap + hdepth :: KeyMap v -> Int hdepth Empty = 0 hdepth (One _ x) = 1 + hdepth x hdepth (Leaf _ _) = 1 -hdepth (BitmapIndexed _ arr) = 1+ maximum(foldr (\ x ans -> hdepth x : ans) [] arr) -hdepth (Full arr) = 1+ maximum(foldr (\ x ans -> hdepth x : ans) [] arr) +hdepth (BitmapIndexed _ arr) = 1 + maximum (foldr (\x ans -> hdepth x : ans) [] arr) +hdepth (Full arr) = 1 + maximum (foldr (\x ans -> hdepth x : ans) [] arr) hdepth (Two _ x y) = 1 + max (hdepth x) (hdepth y) increment :: (ArrayPair arr marr a, Num a) => marr s a -> Int -> ST s () -increment marr i = do { n <- mindex marr i; mwrite marr i (n+1) } +increment marr i = do n <- mindex marr i; mwrite marr i (n + 1) histogram :: KeyMap v -> PA.MutableArray s Int -> ST s () histogram Empty _ = pure () -histogram (One _ x) marr = increment marr 1 >> histogram x marr +histogram (One _ x) marr = increment marr 1 >> histogram x marr histogram (Leaf _ _) _ = pure () -histogram (BitmapIndexed _ arr) marr = increment marr (isize arr-1) >> mapM_ (\ x -> histogram x marr) arr -histogram (Full arr) marr = increment marr (intSize-1) >> mapM_ (\ x -> histogram x marr) arr +histogram (BitmapIndexed _ arr) marr = increment marr (isize arr -1) >> mapM_ (\x -> histogram x marr) arr +histogram (Full arr) marr = increment marr (intSize -1) >> mapM_ (\x -> histogram x marr) arr histogram (Two _ x y) marr = increment marr 2 >> histogram x marr >> histogram y marr histo :: KeyMap v -> PA.Array Int -histo x = fst(withMutArray intSize process) - where process marr = do { initialize (intSize - 1) ; histogram x marr } - where initialize n | n <0 = pure () - initialize n = mwrite marr n 0 >> initialize (n-1) +histo x = fst (withMutArray intSize process) + where + process marr = do initialize (intSize - 1); histogram x marr + where + initialize n | n < 0 = pure () + initialize n = mwrite marr n 0 >> initialize (n -1) bpairs :: [Key] bpairs = makeKeys 99 1500000 - -- makeKeys 3 15 -pairs :: [ (Key,Int) ] -pairs = zip bpairs [0..] +-- makeKeys 3 15 + +pairs :: [(Key, Int)] +pairs = zip bpairs [0 ..] -- =================================================== data Stat n = Stat n n (Maybe n) (Maybe n) -liftM:: (t -> t -> t) -> Maybe t -> Maybe t -> Maybe t -liftM f (Just x) (Just y) = Just(f x y) +liftM :: (t -> t -> t) -> Maybe t -> Maybe t -> Maybe t +liftM f (Just x) (Just y) = Just (f x y) liftM _ Nothing (Just y) = Just y liftM _ (Just x) Nothing = Just x liftM _ Nothing Nothing = Nothing -instance (Ord n,Num n) => Semigroup (Stat n) where - (Stat c1 s1 mx1 mn1) <> (Stat c2 s2 mx2 mn2) = - Stat (c1+c2) (s1 + s2) (liftM max mx1 mx2) (liftM min mn1 mn2) +instance (Ord n, Num n) => Semigroup (Stat n) where + (Stat c1 s1 mx1 mn1) <> (Stat c2 s2 mx2 mn2) = + Stat (c1 + c2) (s1 + s2) (liftM max mx1 mx2) (liftM min mn1 mn2) -instance (Ord n,Num n) => Monoid (Stat n) where +instance (Ord n, Num n) => Monoid (Stat n) where mempty = Stat 0 0 Nothing Nothing -instance (Integral n,Show n) => Show (Stat n) where - show (Stat c s mx mn) = "{count= "++show c++", sum="++show s++", max="++show mx++ - ", min="++show mn++ - (if c==0 then "}" else ", avg="++show(div s c)++"}") +instance (Integral n, Show n) => Show (Stat n) where + show (Stat c s mx mn) = + "{count= " ++ show c ++ ", sum=" ++ show s ++ ", max=" ++ show mx + ++ ", min=" + ++ show mn + ++ (if c == 0 then "}" else ", avg=" ++ show (div s c) ++ "}") -add :: (Num n,Ord n) => n -> Stat n -> Stat n +add :: (Num n, Ord n) => n -> Stat n -> Stat n add n stat = (Stat 1 n (Just n) (Just n)) <> stat -- ==================== @@ -975,33 +1016,39 @@ add n stat = (Stat 1 n (Just n) (Just n)) <> stat bug :: Int -> IO (KeyMap Int) bug n = do - let ps = take n pairs -- zip (makeKeys 3 n) [0..] - hh (k@(Key m0 m1 _ _ ),v) = show m0++" "++show m1++" "++show (keyPath k)++" "++show v - putStrLn (unlines (map hh ps)) + let ps = take n pairs -- zip (makeKeys 3 n) [0..] + hh (k@(Key m0 m1 _ _), v) = show m0 ++ " " ++ show m1 ++ " " ++ show (keyPath k) ++ " " ++ show v + putStrLn (unlines (map hh ps)) - -- putStrLn (show (fromList ps)) - pure (fromList ps) + -- putStrLn (show (fromList ps)) + pure (fromList ps) -try :: [(Key,Int)] -> IO () +try :: [(Key, Int)] -> IO () try ps = do - let hh (k@(Key m0 m1 _ _),v) = show m0++" "++show m1++" "++show (keyPath k)++" "++show v - putStrLn (unlines (map hh ps)) - putStrLn (show (fromList ps)) - + let hh (k@(Key m0 m1 _ _), v) = show m0 ++ " " ++ show m1 ++ " " ++ show (keyPath k) ++ " " ++ show v + putStrLn (unlines (map hh ps)) + putStrLn (show (fromList ps)) testlookup :: Int -> Int -> Bool testlookup seed n = all ok results - where ps = zip (makeKeys seed n) [0..] - keymap :: KeyMap Int - keymap = fromList ps - results = [ (i,lookupHM (fst(ps !! i)) keymap) | i <- [0..(n-1)]] - ok (_,Just _) = True - ok (i,Nothing) = error ("testlookup failure: "++show i++" "++show pair++"\n"++ - show (keyPath (fst pair))++"\n "++show keymap) - where pair = (ps !! i) + where + ps = zip (makeKeys seed n) [0 ..] + keymap :: KeyMap Int + keymap = fromList ps + results = [(i, lookupHM (fst (ps !! i)) keymap) | i <- [0 .. (n -1)]] + ok (_, Just _) = True + ok (i, Nothing) = + error + ( "testlookup failure: " ++ show i ++ " " ++ show pair ++ "\n" + ++ show (keyPath (fst pair)) + ++ "\n " + ++ show keymap + ) + where + pair = (ps !! i) -- ====================================================================================== --- Helper functions for Pretty Printers +-- Helper functions for Pretty Printers newtype PrettyAnn = Width Int @@ -1020,7 +1067,7 @@ instance PrettyA Word64 where instance PrettyA v => PrettyA (KeyMap v) where prettyA km = ppKeyMap prettyA km - + ppWord64 :: Word64 -> Doc a ppWord64 = viaShow @@ -1078,21 +1125,23 @@ ppKey :: Key -> PDoc ppKey (Key w0 _ _ _) = ppWord64 w0 ppBitmap :: Word64 -> PDoc -ppBitmap x = text (pack(showBM x)) +ppBitmap x = text (pack (showBM x)) ppKeyMap :: (v -> PDoc) -> KeyMap v -> PDoc -ppKeyMap p (Leaf k v) = ppSexp "L" [ppKey k,p v] +ppKeyMap p (Leaf k v) = ppSexp "L" [ppKey k, p v] ppKeyMap _ Empty = text "E" -ppKeyMap p (One x mp) = ppSexp "O" [ppInt x,ppKeyMap p mp] -ppKeyMap p (Two x m1 m2) = ppSexp "T" [ppBitmap x ,ppKeyMap p m1, ppKeyMap p m2] +ppKeyMap p (One x mp) = ppSexp "O" [ppInt x, ppKeyMap p mp] +ppKeyMap p (Two x m1 m2) = ppSexp "T" [ppBitmap x, ppKeyMap p m1, ppKeyMap p m2] ppKeyMap p (BitmapIndexed x arr) = ppSexp "B" [ppList q (zip (bitmapToList x) (tolist arr))] - where q (i,a) = ppInt i <+> ppKeyMap p a + where + q (i, a) = ppInt i <+> ppKeyMap p a ppKeyMap p (Full arr) = ppSexp "F" [ppList q (zip (bitmapToList fullNodeMask) (tolist arr))] - where q (i,a) = ppInt i <+> ppKeyMap p a + where + q (i, a) = ppInt i <+> ppKeyMap p a instance PrettyA v => Show (KeyMap v) where - show x = show(ppKeyMap prettyA x) - showList xs x = unlines (map (\ y -> "\n"++ show(ppKeyMap prettyA y)) xs) ++ x + show x = show (ppKeyMap prettyA x) + showList xs x = unlines (map (\y -> "\n" ++ show (ppKeyMap prettyA y)) xs) ++ x -- ==================================================================== -- Bulk insert @@ -1117,34 +1166,37 @@ On the right is the Key and 'v' as a (Leaf KeyMap 'v'). The rows are sorted by t ([14,13,49, 4, 7,37, 4,23, 5,28], (L 16388898632001935134 6)) ([14,43,28,10,55,12,51,63,56,24], (L 16923449273545098794 7)) - For each column we break the code into groups where the Segment matches on that column. Above we have grouped the 6's, 9's and 14's together by column 1 -} -- | Make a (KeyMap v) out of the input. Works by focusing on a particular range of rows ('lo' .. 'hi') -- It calls it self recursively, by chooing a smaller range, and increasing the column number 'n' by 1. -bulkInsert :: Int -> PArray (Path,KeyMap v) -> Int -> Int -> KeyMap v -bulkInsert _n arr lo hi | lo < 0 || lo > n || hi <0 || hi > n = - error ("lo or hi out of bounds (0 .. "++show n++") lo="++show lo++" hi="++show hi) - where n = isize arr - 1 -bulkInsert _n arr lo hi | lo==hi = snd (index arr lo) -bulkInsert n arr lo hi = bitmapIndexedOrFull bmap (fst(withMutArray size (action 0 segmentRanges))) - where (size,segments,bmap) = getBitmap n arr lo hi - segmentRanges = ranges n arr lo hi segments - action _j [] _marr = pure () - action j ((lox,hix):more) marr = do - mwrite marr j (bulkInsert (n+1) arr lox hix) - action (j+1) more marr +bulkInsert :: Int -> PArray (Path, KeyMap v) -> Int -> Int -> KeyMap v +bulkInsert _n arr lo hi + | lo < 0 || lo > n || hi < 0 || hi > n = + error ("lo or hi out of bounds (0 .. " ++ show n ++ ") lo=" ++ show lo ++ " hi=" ++ show hi) + where + n = isize arr - 1 +bulkInsert _n arr lo hi | lo == hi = snd (index arr lo) +bulkInsert n arr lo hi = bitmapIndexedOrFull bmap (fst (withMutArray size (action 0 segmentRanges))) + where + (size, segments, bmap) = getBitmap n arr lo hi + segmentRanges = ranges n arr lo hi segments + action _j [] _marr = pure () + action j ((lox, hix) : more) marr = do + mwrite marr j (bulkInsert (n + 1) arr lox hix) + action (j + 1) more marr -- | get the bitmap of column 'n' for the rows 'lo' to 'hi' of arr. -- This is a set of all the segments present for that range. -getBitmap :: Int -> PArray (Path,KeyMap v) -> Int -> Int -> (Int,[Segment],Bitmap) -getBitmap n arr lo hi = (size,segments,bitmap) - where accum bm (path,_) = setBit bm (path !! n) - bitmap = foldRange accum 0 arr lo hi - segments = bitmapToList bitmap - size = length segments +getBitmap :: Int -> PArray (Path, KeyMap v) -> Int -> Int -> (Int, [Segment], Bitmap) +getBitmap n arr lo hi = (size, segments, bitmap) + where + accum bm (path, _) = setBit bm (path !! n) + bitmap = foldRange accum 0 arr lo hi + segments = bitmapToList bitmap + size = length segments -- | Given starting row 'i' find the last row 'j', such that column 'n' has 'val' in all rows 'i' to 'j' -- Both 'i' and 'j' must be in the range (i .. maxi), which denote the beginning and end of the @@ -1152,35 +1204,37 @@ getBitmap n arr lo hi = (size,segments,bitmap) contiguous :: Int -> Int -> Int -> Int -> PArray ([Int], b) -> Int contiguous _n _val i _maxi _arr | i < 0 = i contiguous _n _val i _maxi arr | i >= isize arr = isize arr - 1 -contiguous _n _val i maxi _arr | i > maxi = i-1 -- Do not look outside the valid range for matching val -contiguous n val i maxi arr = if (fst(index arr i) !! n) == val then contiguous n val (i+1) maxi arr else (i-1) +contiguous _n _val i maxi _arr | i > maxi = i -1 -- Do not look outside the valid range for matching val +contiguous n val i maxi arr = if (fst (index arr i) !! n) == val then contiguous n val (i + 1) maxi arr else (i -1) -- | compute the row ranges where the 'n' column has the same value 'val', we assume the rows are sorted -- in ascending order, and so is the list of 'vals' ranges :: Int -> PArray ([Int], b) -> Int -> Int -> [Int] -> [(Int, Int)] ranges _n _arr _i _hi [] = [] -ranges n arr i hi (val:vals) = (i,j) : ranges n arr (j+1) hi vals - where j = contiguous n val i hi arr +ranges n arr i hi (val : vals) = (i, j) : ranges n arr (j + 1) hi vals + where + j = contiguous n val i hi arr -- like foldl, except we fold only a limited range ('lo' .. 'hi') of the indices of 'arr' -foldRange :: (ans -> t -> ans) -> ans -> PArray t -> Int -> Int -> ans +foldRange :: (ans -> t -> ans) -> ans -> PArray t -> Int -> Int -> ans foldRange _accum ans _arr lo hi | lo > hi = ans -foldRange accum ans arr lo hi = foldRange accum (accum ans (index arr lo)) arr (lo+1) hi +foldRange accum ans arr lo hi = foldRange accum (accum ans (index arr lo)) arr (lo + 1) hi -- ========================================== -- test that incremental and bulk loading create the same KeyMap testbulk :: Int -> Int -> (KeyMap Int, Bool) testbulk seed n = (bulk, bulk == incremental) - where keys = makeKeys seed n - f (k,v) = (keyPath k,Leaf k v) - cmp (p1,_) (p2,_) = compare p1 p2 - pairsb = zip keys [0..] - paths:: [(Path,KeyMap Int)] - paths = sortBy cmp $ map f pairsb - pathArr = fromlist paths - incremental = fromList pairsb - bulk = bulkInsert 0 pathArr 0 (isize pathArr - 1) + where + keys = makeKeys seed n + f (k, v) = (keyPath k, Leaf k v) + cmp (p1, _) (p2, _) = compare p1 p2 + pairsb = zip keys [0 ..] + paths :: [(Path, KeyMap Int)] + paths = sortBy cmp $ map f pairsb + pathArr = fromlist paths + incremental = fromList pairsb + bulk = bulkInsert 0 pathArr 0 (isize pathArr - 1) -- =================================================== -- try and measure that bulk loading allocates less memory @@ -1191,26 +1245,27 @@ testbulk seed n = (bulk, bulk == incremental) keysbulk :: [Key] keysbulk = makeKeys 199 50000 -pairsbulk :: [(Key,Int)] -pairsbulk = zip keysbulk [0..] +pairsbulk :: [(Key, Int)] +pairsbulk = zip keysbulk [0 ..] -pathsbulk :: [(Path,KeyMap Int)] +pathsbulk :: [(Path, KeyMap Int)] pathsbulk = sortBy cmpbulk $ map fbulk pairsbulk - where fbulk (k,v) = (keyPath k,Leaf k v) - cmpbulk (p1,_) (p2,_) = compare p1 p2 + where + fbulk (k, v) = (keyPath k, Leaf k v) + cmpbulk (p1, _) (p2, _) = compare p1 p2 -- use the ghci command :set +s to enable statistics -- (2.30 secs, 1,159,454,816 bytes) size = 10000 -- (13.16 secs, 6,829,808,992 bytes) size = 50000 -- (1.74 secs, 1,623,304,184 bytes) size = 50000, no print tryincremental :: Int -tryincremental = sizeKeyMap(fromList pairsbulk) +tryincremental = sizeKeyMap (fromList pairsbulk) -- (1.93 secs, 968,147,688 bytes) size = 10000 -- (11.96 secs, 5,937,089,424 bytes) size = 50000 -- (0.94 secs, 661,637,584 bytes) size = 50000, no print trybulk :: Int -trybulk = sizeKeyMap(bulkInsert 0 pathArr 0 (isize pathArr - 1)) - where pathArr :: PArray (Path,KeyMap Int) - pathArr = fromlist pathsbulk - +trybulk = sizeKeyMap (bulkInsert 0 pathArr 0 (isize pathArr - 1)) + where + pathArr :: PArray (Path, KeyMap Int) + pathArr = fromlist pathsbulk diff --git a/libs/compact-map/test/Test/Compact/KeyMap.hs b/libs/compact-map/test/Test/Compact/KeyMap.hs index e86be500f4d..6bc553a6cdb 100644 --- a/libs/compact-map/test/Test/Compact/KeyMap.hs +++ b/libs/compact-map/test/Test/Compact/KeyMap.hs @@ -1,16 +1,15 @@ module Test.Compact.KeyMap where import Data.Compact.KeyMap as KeyMap +import Test.QuickCheck import Test.Tasty import Test.Tasty.QuickCheck -import Test.QuickCheck - instance Arbitrary Key where arbitrary = oneof - [ Key <$> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary - , Key <$> chooseAny <*> chooseAny <*> chooseAny <*> chooseAny + [ Key <$> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary, + Key <$> chooseAny <*> chooseAny <*> chooseAny <*> chooseAny ] instance Arbitrary a => Arbitrary (KeyMap a) where From 02f85c101a85acac37144fd9122e1fb176e53509 Mon Sep 17 00:00:00 2001 From: Alexey Kuleshevich Date: Tue, 26 Oct 2021 18:44:37 +0300 Subject: [PATCH 19/19] Take care of comilation warnings --- cabal.project | 3 +++ libs/compact-map/compact-map.cabal | 6 +++--- libs/compact-map/src/Data/Compact/KeyMap.hs | 3 --- libs/compact-map/test/Test/Compact/KeyMap.hs | 2 ++ libs/small-steps/small-steps.cabal | 5 ----- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/cabal.project b/cabal.project index d13637ab159..074cd924ea5 100644 --- a/cabal.project +++ b/cabal.project @@ -173,6 +173,9 @@ package cardano-ledger-test package cardano-protocol-tpraos ghc-options: -Werror +package compact-map + ghc-options: -Werror + package non-integral ghc-options: -Werror diff --git a/libs/compact-map/compact-map.cabal b/libs/compact-map/compact-map.cabal index dd3e1c6d01d..7c403a0d7ad 100644 --- a/libs/compact-map/compact-map.cabal +++ b/libs/compact-map/compact-map.cabal @@ -52,11 +52,11 @@ test-suite tests type: exitcode-stdio-1.0 default-language: Haskell2010 build-depends: base - , containers + -- , containers , tasty - , tasty-expected-failure + -- , tasty-expected-failure , tasty-quickcheck - , tasty-hunit + -- , tasty-hunit , compact-map , QuickCheck ghc-options: -threaded diff --git a/libs/compact-map/src/Data/Compact/KeyMap.hs b/libs/compact-map/src/Data/Compact/KeyMap.hs index 8dedd86291d..377a72e0f55 100644 --- a/libs/compact-map/src/Data/Compact/KeyMap.hs +++ b/libs/compact-map/src/Data/Compact/KeyMap.hs @@ -40,9 +40,6 @@ import Prettyprinter import qualified Prettyprinter.Internal as Pretty import System.Random (RandomGen, genWord64, mkStdGen) --- import Debug.Trace - --- type PArray = PA.Array type PArray = Small.SmallArray -- | Show 'n' as a binary number with most significant bits on the left. diff --git a/libs/compact-map/test/Test/Compact/KeyMap.hs b/libs/compact-map/test/Test/Compact/KeyMap.hs index 6bc553a6cdb..3f3d5e8fd00 100644 --- a/libs/compact-map/test/Test/Compact/KeyMap.hs +++ b/libs/compact-map/test/Test/Compact/KeyMap.hs @@ -1,3 +1,5 @@ +{-# OPTIONS_GHC -Wno-orphans #-} + module Test.Compact.KeyMap where import Data.Compact.KeyMap as KeyMap diff --git a/libs/small-steps/small-steps.cabal b/libs/small-steps/small-steps.cabal index 5e79194580c..9bcacbc07a8 100644 --- a/libs/small-steps/small-steps.cabal +++ b/libs/small-steps/small-steps.cabal @@ -52,10 +52,8 @@ library , Control.SetAlgebra build-depends: aeson , ansi-wl-pprint - , array , base >=4.11 && <5 , bytestring - , cardano-prelude , cborg , containers , cryptonite @@ -65,9 +63,6 @@ library , free , mtl , nothunks - , prettyprinter - , primitive - , random , strict-containers , text , transformers >= 0.5