Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve {Set,Map}.fromDistinct{Asc,Desc}List #950

Merged
merged 5 commits into from
Jun 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions containers-tests/benchmarks/Map.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ main = do
m_even = M.fromAscList elems_even :: M.Map Int Int
m_odd = M.fromAscList elems_odd :: M.Map Int Int
evaluate $ rnf [m, m_even, m_odd]
evaluate $ rnf elems_rev
defaultMain
[ bench "lookup absent" $ whnf (lookup evens) m_odd
, bench "lookup present" $ whnf (lookup evens) m_even
Expand Down Expand Up @@ -90,13 +91,17 @@ main = do
, bench "fromList-desc" $ whnf M.fromList (reverse elems)
, bench "fromAscList" $ whnf M.fromAscList elems
, bench "fromDistinctAscList" $ whnf M.fromDistinctAscList elems
, bench "fromDistinctAscList:fusion" $ whnf (\n -> M.fromDistinctAscList [(i,i) | i <- [1..n]]) bound
, bench "fromDistinctDescList" $ whnf M.fromDistinctDescList elems_rev
, bench "fromDistinctDescList:fusion" $ whnf (\n -> M.fromDistinctDescList [(i,i) | i <- [n,n-1..1]]) bound
, bench "minView" $ whnf (\m' -> case M.minViewWithKey m' of {Nothing -> 0; Just ((k,v),m'') -> k+v+M.size m''}) (M.fromAscList $ zip [1..10::Int] [100..110::Int])
]
where
bound = 2^12
elems = zip keys values
elems_even = zip evens evens
elems_odd = zip odds odds
elems_rev = reverse elems
keys = [1..bound]
evens = [2,4..bound]
odds = [1,3..bound]
Expand Down
12 changes: 9 additions & 3 deletions containers-tests/benchmarks/Set.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ main = do
s_odd = S.fromAscList elems_odd :: S.Set Int
strings_s = S.fromList strings
evaluate $ rnf [s, s_even, s_odd]
evaluate $ rnf elems_rev
defaultMain
[ bench "member" $ whnf (member elems) s
, bench "insert" $ whnf (ins elems) S.empty
Expand All @@ -34,6 +35,9 @@ main = do
, bench "fromList-desc" $ whnf S.fromList (reverse elems)
, bench "fromAscList" $ whnf S.fromAscList elems
, bench "fromDistinctAscList" $ whnf S.fromDistinctAscList elems
, bench "fromDistinctAscList:fusion" $ whnf (\n -> S.fromDistinctAscList [1..n]) bound
, bench "fromDistinctDescList" $ whnf S.fromDistinctDescList elems_rev
, bench "fromDistinctDescList:fusion" $ whnf (\n -> S.fromDistinctDescList [n,n-1..1]) bound
, bench "disjoint:false" $ whnf (S.disjoint s) s_even
, bench "disjoint:true" $ whnf (S.disjoint s_odd) s_even
, bench "null.intersection:false" $ whnf (S.null. S.intersection s) s_even
Expand All @@ -53,9 +57,11 @@ main = do
, bench "member.powerSet (18)" $ whnf (\ s -> all (flip S.member s) s) (S.powerSet (S.fromList [1..18]))
]
where
elems = [1..2^12]
elems_even = [2,4..2^12]
elems_odd = [1,3..2^12]
bound = 2^12
elems = [1..bound]
elems_even = [2,4..bound]
elems_odd = [1,3..bound]
elems_rev = reverse elems
strings = map show elems

member :: [Int] -> S.Set Int -> Int
Expand Down
22 changes: 16 additions & 6 deletions containers-tests/tests/map-properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ main = defaultMain $ testGroup "map-properties"
, testProperty "mergeWithKey model" prop_mergeWithKeyModel
, testProperty "mergeA effects" prop_mergeA_effects
, testProperty "fromAscList" prop_ordered
, testProperty "fromDistinctAscList" prop_fromDistinctAscList
, testProperty "fromDescList" prop_rev_ordered
, testProperty "fromDistinctDescList" prop_fromDistinctDescList
, testProperty "fromList then toList" prop_list
Expand Down Expand Up @@ -1243,10 +1244,13 @@ prop_list xs = (sort (nub xs) == [x | (x,()) <- toList (fromList [(x,()) | x <-
prop_descList :: [Int] -> Bool
prop_descList xs = (reverse (sort (nub xs)) == [x | (x,()) <- toDescList (fromList [(x,()) | x <- xs])])

prop_fromDistinctDescList :: Int -> [A] -> Property
prop_fromDistinctDescList top lst = valid converted .&&. (toList converted === reverse original) where
original = zip [top, (top-1)..0] lst
converted = fromDistinctDescList original
prop_fromDistinctDescList :: [(Int, A)] -> Property
prop_fromDistinctDescList xs =
valid t .&&.
toList t === nub_sort_xs
where
t = fromDistinctDescList (reverse nub_sort_xs)
nub_sort_xs = List.map List.head $ List.groupBy ((==) `on` fst) $ List.sortBy (comparing fst) xs

prop_ascDescList :: [Int] -> Bool
prop_ascDescList xs = toAscList m == reverse (toDescList m)
Expand All @@ -1256,10 +1260,16 @@ prop_fromList :: [Int] -> Bool
prop_fromList xs
= case fromList (zip xs xs) of
t -> t == fromAscList (zip sort_xs sort_xs) &&
t == fromDistinctAscList (zip nub_sort_xs nub_sort_xs) &&
t == List.foldr (uncurry insert) empty (zip xs xs)
where sort_xs = sort xs
nub_sort_xs = List.map List.head $ List.group sort_xs

prop_fromDistinctAscList :: [(Int, A)] -> Property
prop_fromDistinctAscList xs =
valid t .&&.
toList t === nub_sort_xs
where
t = fromDistinctAscList nub_sort_xs
nub_sort_xs = List.map List.head $ List.groupBy ((==) `on` fst) $ List.sortBy (comparing fst) xs

----------------------------------------------------------------

Expand Down
20 changes: 18 additions & 2 deletions containers-tests/tests/set-properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ main = defaultMain $ testGroup "set-properties"
, testProperty "prop_DescList" prop_DescList
, testProperty "prop_AscDescList" prop_AscDescList
, testProperty "prop_fromList" prop_fromList
, testProperty "prop_fromDistinctAscList" prop_fromDistinctAscList
, testProperty "prop_fromListDesc" prop_fromListDesc
, testProperty "prop_fromDistinctDescList" prop_fromDistinctDescList
, testProperty "prop_isProperSubsetOf" prop_isProperSubsetOf
, testProperty "prop_isProperSubsetOf2" prop_isProperSubsetOf2
, testProperty "prop_isSubsetOf" prop_isSubsetOf
Expand Down Expand Up @@ -514,11 +516,17 @@ prop_AscDescList xs = toAscList s == reverse (toDescList s)
prop_fromList :: [Int] -> Property
prop_fromList xs =
t === fromAscList sort_xs .&&.
t === fromDistinctAscList nub_sort_xs .&&.
t === List.foldr insert empty xs
where t = fromList xs
sort_xs = sort xs
nub_sort_xs = List.map List.head $ List.group sort_xs

prop_fromDistinctAscList :: [Int] -> Property
prop_fromDistinctAscList xs =
valid t .&&.
toList t === nub_sort_xs
where
t = fromDistinctAscList nub_sort_xs
nub_sort_xs = List.map List.head $ List.group $ sort xs

prop_fromListDesc :: [Int] -> Property
prop_fromListDesc xs =
Expand All @@ -529,6 +537,14 @@ prop_fromListDesc xs =
sort_xs = reverse (sort xs)
nub_sort_xs = List.map List.head $ List.group sort_xs

prop_fromDistinctDescList :: [Int] -> Property
prop_fromDistinctDescList xs =
valid t .&&.
toList t === nub_sort_xs
where
t = fromDistinctDescList (reverse nub_sort_xs)
nub_sort_xs = List.map List.head $ List.group $ sort xs

{--------------------------------------------------------------------
Set operations are like IntSet operations
--------------------------------------------------------------------}
Expand Down
87 changes: 57 additions & 30 deletions containers/src/Data/Map/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -355,8 +355,15 @@ module Data.Map.Internal (
, link
, link2
, glue
, fromDistinctAscList_linkTop
, fromDistinctAscList_linkAll
, fromDistinctDescList_linkTop
, fromDistinctDescList_linkAll
, MaybeS(..)
, Identity(..)
, FromDistinctMonoState(..)
, Stack(..)
, foldl'Stack

-- Used by Map.Merge.Lazy
, mapWhenMissing
Expand Down Expand Up @@ -3410,8 +3417,7 @@ instance (Ord k) => GHCExts.IsList (Map k v) where
-- If the list contains more than one value for the same key, the last value
-- for the key is retained.
--
-- If the keys of the list are ordered, linear-time implementation is used,
-- with the performance equal to 'fromDistinctAscList'.
-- If the keys of the list are ordered, a linear-time implementation is used.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you use the same approach (and share code) here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made a rough attempt but it performed worse for the not-sorted case. I'll try to see if it can be improved, but it'd probably be best in a separate PR.

--
-- > fromList [] == empty
-- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")]
Expand Down Expand Up @@ -3701,22 +3707,26 @@ fromDescListWithKey f xs

-- For some reason, when 'singleton' is used in fromDistinctAscList or in
-- create, it is not inlined, so we inline it manually.

-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
fromDistinctAscList :: [(k,a)] -> Map k a
fromDistinctAscList [] = Tip
fromDistinctAscList ((kx0, x0) : xs0) = go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
fromDistinctAscList = fromDistinctAscList_linkAll . Foldable.foldl' next (State0 Nada)
where
go !_ t [] = t
go s l ((kx, x) : xs) = case create s xs of
(r :*: ys) -> let !t' = link kx x l r
in go (s `shiftL` 1) t' ys

create !_ [] = (Tip :*: [])
create s xs@(x' : xs')
| s == 1 = case x' of (kx, x) -> (Bin 1 kx x Tip Tip :*: xs')
| otherwise = case create (s `shiftR` 1) xs of
res@(_ :*: []) -> res
(l :*: (ky, y):ys) -> case create (s `shiftR` 1) ys of
(r :*: zs) -> (link ky y l r :*: zs)
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
next (State0 stk) (!kx, x) = fromDistinctAscList_linkTop (Bin 1 kx x Tip Tip) stk
next (State1 l stk) (kx, x) = State0 (Push kx x l stk)
{-# INLINE fromDistinctAscList #-} -- INLINE for fusion

fromDistinctAscList_linkTop :: Map k a -> Stack k a -> FromDistinctMonoState k a
fromDistinctAscList_linkTop r@(Bin rsz _ _ _ _) (Push kx x l@(Bin lsz _ _ _ _) stk)
| rsz == lsz = fromDistinctAscList_linkTop (bin kx x l r) stk
fromDistinctAscList_linkTop l stk = State1 l stk
{-# INLINABLE fromDistinctAscList_linkTop #-}

fromDistinctAscList_linkAll :: FromDistinctMonoState k a -> Map k a
fromDistinctAscList_linkAll (State0 stk) = foldl'Stack (\r kx x l -> link kx x l r) Tip stk
fromDistinctAscList_linkAll (State1 r0 stk) = foldl'Stack (\r kx x l -> link kx x l r) r0 stk
{-# INLINABLE fromDistinctAscList_linkAll #-}

-- | \(O(n)\). Build a map from a descending list of distinct elements in linear time.
-- /The precondition is not checked./
Expand All @@ -3729,22 +3739,39 @@ fromDistinctAscList ((kx0, x0) : xs0) = go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0

-- For some reason, when 'singleton' is used in fromDistinctDescList or in
-- create, it is not inlined, so we inline it manually.

-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
fromDistinctDescList :: [(k,a)] -> Map k a
fromDistinctDescList [] = Tip
fromDistinctDescList ((kx0, x0) : xs0) = go (1 :: Int) (Bin 1 kx0 x0 Tip Tip) xs0
fromDistinctDescList = fromDistinctDescList_linkAll . Foldable.foldl' next (State0 Nada)
where
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
next (State0 stk) (!kx, x) = fromDistinctDescList_linkTop (Bin 1 kx x Tip Tip) stk
next (State1 r stk) (kx, x) = State0 (Push kx x r stk)
{-# INLINE fromDistinctDescList #-} -- INLINE for fusion

fromDistinctDescList_linkTop :: Map k a -> Stack k a -> FromDistinctMonoState k a
fromDistinctDescList_linkTop l@(Bin lsz _ _ _ _) (Push kx x r@(Bin rsz _ _ _ _) stk)
| lsz == rsz = fromDistinctDescList_linkTop (bin kx x l r) stk
fromDistinctDescList_linkTop r stk = State1 r stk
{-# INLINABLE fromDistinctDescList_linkTop #-}

fromDistinctDescList_linkAll :: FromDistinctMonoState k a -> Map k a
fromDistinctDescList_linkAll (State0 stk) = foldl'Stack (\l kx x r -> link kx x l r) Tip stk
fromDistinctDescList_linkAll (State1 l0 stk) = foldl'Stack (\l kx x r -> link kx x l r) l0 stk
{-# INLINABLE fromDistinctDescList_linkAll #-}

data FromDistinctMonoState k a
= State0 !(Stack k a)
| State1 !(Map k a) !(Stack k a)

data Stack k a = Push !k a !(Map k a) !(Stack k a) | Nada

foldl'Stack :: (b -> k -> a -> Map k a -> b) -> b -> Stack k a -> b
foldl'Stack f = go
where
go !_ t [] = t
go s r ((kx, x) : xs) = case create s xs of
(l :*: ys) -> let !t' = link kx x l r
in go (s `shiftL` 1) t' ys

create !_ [] = (Tip :*: [])
create s xs@(x' : xs')
| s == 1 = case x' of (kx, x) -> (Bin 1 kx x Tip Tip :*: xs')
| otherwise = case create (s `shiftR` 1) xs of
res@(_ :*: []) -> res
(r :*: (ky, y):ys) -> case create (s `shiftR` 1) ys of
(l :*: zs) -> (link ky y l r :*: zs)
go !z Nada = z
go z (Push kx x t stk) = go (f z kx x t) stk
{-# INLINE foldl'Stack #-}

{-
-- Functions very similar to these were used to implement
Expand Down
53 changes: 21 additions & 32 deletions containers/src/Data/Map/Strict/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,12 @@ import Data.Map.Internal
, filterAMissing
, merge
, mergeA
, fromDistinctAscList_linkTop
, fromDistinctAscList_linkAll
, fromDistinctDescList_linkTop
, fromDistinctDescList_linkAll
, FromDistinctMonoState (..)
, Stack (..)
, (!)
, (!?)
, (\\)
Expand Down Expand Up @@ -1489,8 +1495,7 @@ fromArgSet (Set.Bin sz (Arg x v) l r) = v `seq` Bin sz x v (fromArgSet l) (fromA
-- If the list contains more than one value for the same key, the last value
-- for the key is retained.
--
-- If the keys of the list are ordered, linear-time implementation is used,
-- with the performance equal to 'fromDistinctAscList'.
-- If the keys of the list are ordered, a linear-time implementation is used.
--
-- > fromList [] == empty
-- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")]
Expand Down Expand Up @@ -1697,23 +1702,15 @@ fromDescListWithKey f xs

-- For some reason, when 'singleton' is used in fromDistinctAscList or in
-- create, it is not inlined, so we inline it manually.

-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
fromDistinctAscList :: [(k,a)] -> Map k a
fromDistinctAscList [] = Tip
fromDistinctAscList ((kx0, x0) : xs0) = x0 `seq` go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
fromDistinctAscList = fromDistinctAscList_linkAll . Foldable.foldl' next (State0 Nada)
where
go !_ t [] = t
go s l ((kx, x) : xs) =
case create s xs of
(r :*: ys) -> x `seq` let !t' = link kx x l r
in go (s `shiftL` 1) t' ys

create !_ [] = (Tip :*: [])
create s xs@(x' : xs')
| s == 1 = case x' of (kx, x) -> x `seq` (Bin 1 kx x Tip Tip :*: xs')
| otherwise = case create (s `shiftR` 1) xs of
res@(_ :*: []) -> res
(l :*: (ky, y):ys) -> case create (s `shiftR` 1) ys of
(r :*: zs) -> y `seq` (link ky y l r :*: zs)
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
next (State0 stk) (!kx, !x) = fromDistinctAscList_linkTop (Bin 1 kx x Tip Tip) stk
next (State1 l stk) (kx, x) = State0 (Push kx x l stk)
{-# INLINE fromDistinctAscList #-} -- INLINE for fusion

-- | \(O(n)\). Build a map from a descending list of distinct elements in linear time.
-- /The precondition is not checked./
Expand All @@ -1724,20 +1721,12 @@ fromDistinctAscList ((kx0, x0) : xs0) = x0 `seq` go (1::Int) (Bin 1 kx0 x0 Tip T

-- For some reason, when 'singleton' is used in fromDistinctDescList or in
-- create, it is not inlined, so we inline it manually.

-- See Note [fromDistinctAscList implementation] in Data.Set.Internal.
fromDistinctDescList :: [(k,a)] -> Map k a
fromDistinctDescList [] = Tip
fromDistinctDescList ((kx0, x0) : xs0) = x0 `seq` go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0
fromDistinctDescList = fromDistinctDescList_linkAll . Foldable.foldl' next (State0 Nada)
where
go !_ t [] = t
go s r ((kx, x) : xs) =
case create s xs of
(l :*: ys) -> x `seq` let !t' = link kx x l r
in go (s `shiftL` 1) t' ys

create !_ [] = (Tip :*: [])
create s xs@(x' : xs')
| s == 1 = case x' of (kx, x) -> x `seq` (Bin 1 kx x Tip Tip :*: xs')
| otherwise = case create (s `shiftR` 1) xs of
res@(_ :*: []) -> res
(r :*: (ky, y):ys) -> case create (s `shiftR` 1) ys of
(l :*: zs) -> y `seq` (link ky y l r :*: zs)
next :: FromDistinctMonoState k a -> (k,a) -> FromDistinctMonoState k a
next (State0 stk) (!kx, !x) = fromDistinctDescList_linkTop (Bin 1 kx x Tip Tip) stk
next (State1 r stk) (kx, x) = State0 (Push kx x r stk)
{-# INLINE fromDistinctDescList #-} -- INLINE for fusion
Loading