Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tagless representation for Term #15

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 54 additions & 44 deletions src/Kanren/Core.hs
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilyDependencies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilyDependencies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -Wno-redundant-constraints #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE ViewPatterns #-}

-- | The very core of miniKanren. So core that it basically deals with
-- unification only. For writing relational programs, you will need @"Goal"@ as
Expand All @@ -24,7 +26,9 @@ module Kanren.Core (
-- * Values and terms
Logical (..),
VarId,
Term (..),
Term,
pattern Var,
pattern Value,
Atomic (..),

-- ** Operations on terms
Expand All @@ -46,17 +50,17 @@ module Kanren.Core (
makeVariable,
) where

import Control.DeepSeq
import Control.Monad (ap)
import Data.Bifunctor (first)
import Data.Coerce (coerce)
import Data.Foldable (foldrM)
import Data.IntMap.Strict (IntMap)
import Control.DeepSeq
import Control.Monad (ap)
import Data.Bifunctor (first)
import Data.Coerce (coerce)
import Data.Foldable (foldrM)
import Data.IntMap.Strict (IntMap)
import qualified Data.IntMap.Strict as IntMap
import Data.Maybe (fromMaybe)
import GHC.Exts (IsList (..))
import GHC.Generics (Generic)
import Unsafe.Coerce (unsafeCoerce)
import Data.Maybe (fromMaybe)
import GHC.Exts (IsList (..))
import Kanren.Tagless (Tagless, extra, inspectTagless, tagless)
import Unsafe.Coerce (unsafeCoerce)

-- $setup
-- >>> :set -package typedKanren
Expand Down Expand Up @@ -220,20 +224,26 @@ instance Show (VarId a) where
-- Int))@ will correctly use @LogicList Char@ and @LogicTree Int@ deep inside.
-- This way, you do not need to know what the logic representation for a type is
-- named, and deriving the logical representation for a type is trivial.
data Term a
= Var !(VarId a)
| Value !(Logic a)
deriving (Generic)
newtype Term a = Term { _getTerm :: Tagless (VarId a) (Logic a) }

deriving instance (NFData (Logic a)) => NFData (Term a)
deriving newtype instance Eq (Logic a) => Eq (Term a)
deriving newtype instance NFData (Logic a) => NFData (Term a)

deriving instance (Eq (Logic a)) => Eq (Term a)
pattern Var :: VarId a -> Term a
pattern Var varId <- Term (inspectTagless -> Left varId)
where Var varId = Term (extra varId)

pattern Value :: Logic a -> Term a
pattern Value value <- Term (inspectTagless -> Right value)
where Value value = Term (tagless value)

{-# COMPLETE Var, Value #-}

-- | This instance doesn't print the 'Var' and 'Value' tags. While this reduces
-- noise in the output, this may also be confusing since fully instantiated
-- terms may look indistinguishable from regular values.
instance (Show (Logic a)) => Show (Term a) where
showsPrec d (Var var) = showsPrec d var
showsPrec d (Var var) = showsPrec d var
showsPrec d (Value value) = showsPrec d value

instance (IsList (Logic a)) => IsList (Term a) where
Expand Down Expand Up @@ -291,14 +301,14 @@ unify' l r state =
occursCheck' :: (Logical a) => VarId b -> Term a -> State -> Bool
occursCheck' x t state =
case shallowWalk state t of
Var y -> coerce x == y
Var y -> coerce x == y
Value v -> occursCheck x v state

-- | 'walk', but on 'Term's instead of 'Logic' values. The actual substitution
-- of variables with values happens here.
walk' :: (Logical a) => State -> Term a -> Term a
walk' state x = case shallowWalk state x of
Var i -> Var i
Var i -> Var i
Value v -> Value (walk state v)

-- | 'inject', but to a 'Term' instead of a 'Logic' value. You will use this
Expand All @@ -318,7 +328,7 @@ inject' = Value . inject
-- >>> extract' <$> run (\x -> x === inject' (Left 42 :: Either Int Bool))
-- [Just (Left 42)]
extract' :: (Logical a) => Term a -> Maybe a
extract' Var{} = Nothing
extract' Var{} = Nothing
extract' (Value x) = extract x

data Normalization = Normalization (IntMap Int) Int
Expand All @@ -342,7 +352,7 @@ class (Logical a) => Normalizable a where

normalize' :: (Normalizable a) => (forall i. VarId i -> Normalizer (VarId i)) -> Term a -> Normalizer (Term a)
normalize' f (Var varId) = Var <$> f varId
normalize' f (Value x) = Value <$> normalize f x
normalize' f (Value x) = Value <$> normalize f x

runNormalize :: (Normalizable a) => Term a -> Term a
runNormalize x = normalized
Expand All @@ -360,7 +370,7 @@ runNormalize x = normalized
-- | Add a constraint that two terms must not be equal.
disequality :: (Logical a) => Term a -> Term a -> State -> Maybe State
disequality left right state = case addedSubst left right state of
Nothing -> Just state
Nothing -> Just state
Just added -> stateInsertDisequality added state

-- | Since 'Term's are polymorphic, we cannot easily store them in the
Expand All @@ -371,7 +381,7 @@ data ErasedTerm where

instance Show ErasedTerm where
show (ErasedTerm (Var varId)) = "Var " ++ show varId
show (ErasedTerm (Value _)) = "Value _"
show (ErasedTerm (Value _)) = "Value _"

-- | Cast an 'ErasedTerm' back to a 'Term a'. Hopefully, you will cast it to the
-- correct type, or bad things will happen.
Expand Down Expand Up @@ -478,17 +488,17 @@ updateComponents state subst = case substExtractArbitrary subst of
Just ((varId, ErasedTerm value), subst') ->
case substLookupArbitraryId subst'' of
Just varId' | varId == varId' -> subst''
_ -> updateComponents state subst''
_ -> updateComponents state subst''
where
added = fromMaybe substEmpty (addedSubst (Var (VarId varId)) value state)
subst'' = subst' <> added

-- | One branch in the search tree. Keeps track of known substitutions and
-- variables.
data State = State
{ knownSubst :: !Subst
{ knownSubst :: !Subst
, disequalities :: !Disequalities
, maxVarId :: !Int
, maxVarId :: !Int
}
deriving (Show)

Expand All @@ -513,7 +523,7 @@ shallowWalk _ (Value v) = Value v
shallowWalk state@State{knownSubst = Subst m} (Var (VarId i)) =
case IntMap.lookup i m of
Nothing -> Var (VarId i)
Just v -> shallowWalk state (unsafeReconstructTerm v)
Just v -> shallowWalk state (unsafeReconstructTerm v)

addSubst :: (Logical a) => VarId a -> Term a -> State -> Maybe State
addSubst (VarId i) value State{knownSubst = Subst m, ..} =
Expand Down
4 changes: 3 additions & 1 deletion src/Kanren/GenericLogical.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -Wno-redundant-constraints #-}

-- | 'Generic' implementations of 'Logical' methods.
--
Expand Down Expand Up @@ -53,6 +54,7 @@ import Data.Proxy (Proxy (..))
import GHC.Generics

import Kanren.Core
import Unsafe.Coerce (unsafeCoerce)

class GLogical f f' where
gunify :: Proxy f -> f' p -> f' p -> State -> Maybe State
Expand Down Expand Up @@ -153,7 +155,7 @@ genericInject
:: (Generic a, Generic (Logic a), GLogical (Rep a) (Rep (Logic a)))
=> a
-> Logic a
genericInject x = to (ginject (from x))
genericInject = unsafeCoerce -- to (ginject (from x))

-- | The generic implementation of 'extract'.
genericExtract
Expand Down
119 changes: 119 additions & 0 deletions src/Kanren/Tagless.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
-- | "Tagless" sum types allow to skip explicit tag for at least one of the alternatives in a sum type,
-- assuming that we can differentiate between the alternatives at runtime by other means.
--
-- The benefit of this representation is pronounced for recursive data types:
--
-- * In the fastest miniKanren implementations, this approach is used
-- to avoid expensive recursive injection of a regular (non-logical) value into a miniKanren program.
-- See Section 6 of [1] for more details about typed implementation of tagless approach in OCanren.
-- Note that faster-minikanren implementation (in Scheme/Racket) is also using a similar technique in an untyped setting.
--
-- * An efficient bignum representation can be done by "unboxing" the (small) integer constructor.
-- See the details of this example in [2].
--
-- This "tagless" representation approach is a variation of
-- * niche filling optimization [3] (which can be found in Rust, for instance)
-- * unboxed constructors [2]
--
-- See also "flat Maybe" implementation by Andras Kovacs: <https://github.com/AndrasKovacs/flat-maybe>
--
-- Note that this approach is NOT the same as unboxed sum typed in Haskell (at least not in their current form).
--
-- [1] Dmitrii Kosarev, Dmitry Boulytchev. Typed Embedding of a Relational Language in OCaml <https://arxiv.org/abs/1805.11006>
-- [2] Nicolas Chataing, Stephen Dolan, Gabriel Scherer, and Jeremy Yallop. 2024. Unboxed Data Constructors: Or, How cpp Decides a Halting Problem. Proc. ACM Program. Lang. 8, POPL, Article 51 (January 2024), 31 pages. https://doi.org/10.1145/3632893
-- [3] Bartell-Mangel, N. L. (2022). Filling a Niche: Using Spare Bits to Optimize Data Representations.
module Kanren.Tagless (Tagless, tagless, extra, inspectTagless, inspectAsTagless) where

import Control.DeepSeq (NFData (rnf))
import qualified Foreign.Ptr as Foreign
import qualified Foreign.StablePtr as Foreign
import GHC.Exts (Int (I#), closureSize#)
import System.IO.Unsafe (unsafePerformIO)
import Unsafe.Coerce (unsafeCoerce)

-- | A value of type 'Anchor' is used
-- as a runtime tag for the extra data in the 'Tagless' sum type.
type Anchor = Int

-- | The anchor
theAnchor :: Anchor
{-# NOINLINE theAnchor #-}
theAnchor = unsafePerformIO $ do
sptr <- Foreign.newStablePtr theAnchor
let ptr = Foreign.castStablePtrToPtr sptr
Foreign.IntPtr n = Foreign.ptrToIntPtr ptr
return $! (n - 2837641298) -- scrambled integer value of a stable pointer to the anchor itself
-- the hope is that it is sufficiently unique and will not interfere with any user data

-- | Extra data packed with an 'Anchor' (a runtime tag, specifying that these are indeed extra data).
data Extra extra = Extra
{ _extraAnchor :: {-# UNPACK #-} !Anchor
, _getExtra :: !extra
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should also have this unboxed and unpacked.

}

-- | Make a sum type, but do not use any runtime tag for @a@.
-- This effectively allows injecting (tagged) values of type @extra@
-- into the type of @a@, meaning that coercion from @a@ to @'Tagless' extra a@ is zero-cost.
newtype Tagless extra a = Tagless a

instance (Eq extra, Eq a) => Eq (Tagless extra a) where
x == y = inspectTagless x == inspectTagless y

instance (NFData extra, NFData a) => NFData (Tagless extra a) where
rnf x = case inspectTagless x of
Left l -> rnf l
Right r -> rnf r

-- | Returns the size of the given closure in machine words.
--
-- A wrapper around 'closureSize#'.
--
-- Note that in many situations it will return the size of a thunk.
-- This might be especially surprising/inconsistent when dealing with polymorphic constants:
--
-- >>> closureSize ([] :: [Int])
-- 4
-- >>> closureSize $! ([] :: [Int])
-- 2
-- >>> closureSize (Nothing :: Maybe Int)
-- 2
-- >>> >>> closureSize $! (Nothing :: Maybe Int)
-- 2
closureSize :: a -> Int
closureSize x = I# (closureSize# x)

-- | Inspect a value of type @a@ to see if it contains @extra@ data
-- via 'Tagless' representation.
inspectAsTagless :: a -> Either extra a
{-# INLINE inspectAsTagless #-}
inspectAsTagless = inspectTagless . tagless

-- | Check if a given 'Tagless' sum type is @extra@ or not.
--
-- >>> inspectTagless (extra 123 :: Tagless Int [Int])
-- Left 123
-- >>> inspectTagless (tagless [] :: Tagless Int [Int])
-- Right []
-- >>> inspectTagless (tagless [123] :: Tagless Int [Int])
-- Right [123]
inspectTagless :: Tagless extra a -> Either extra a
{-# NOINLINE inspectTagless #-}
inspectTagless (Tagless !x)
| closureSize x < 3 = Right x
| otherwise =
case unsafeCoerce x of
Extra anchor extra_ | anchor == theAnchor ->
Left extra_
_ -> Right x

-- | Wrap a value into a 'Tagless' sum type.
-- This operation is zero-cost and amounts to a simple coercion.
tagless :: a -> Tagless extra a
tagless = Tagless

-- | Wrap @extra@ data into a 'Tagless' sum type.
-- This operation adds a runtime tag which is inspectable via 'inspectTagless'.
extra :: extra -> Tagless extra a
extra x = unsafeCoerce (Extra theAnchor x)
1 change: 1 addition & 0 deletions typedKanren.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ library
Kanren.LogicalBase
Kanren.Match
Kanren.Stream
Kanren.Tagless
Kanren.TH
other-modules:
Paths_typedKanren
Expand Down
Loading