snejugal · fizruk · Oct 9, 2024 · Oct 9, 2024 · Oct 9, 2024 · fizruk
diff --git a/src/Kanren/Core.hs b/src/Kanren/Core.hs
@@ -1,21 +1,23 @@
-{-# LANGUAGE DefaultSignatures #-}
-{-# LANGUAGE DeriveAnyClass #-}
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE DeriveGeneric #-}
-{-# LANGUAGE DerivingStrategies #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE GADTs #-}
+{-# LANGUAGE DefaultSignatures          #-}
+{-# LANGUAGE DeriveAnyClass             #-}
+{-# LANGUAGE DeriveFunctor              #-}
+{-# LANGUAGE DeriveGeneric              #-}
+{-# LANGUAGE DerivingStrategies         #-}
+{-# LANGUAGE FlexibleContexts           #-}
+{-# LANGUAGE GADTs                      #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE KindSignatures #-}
-{-# LANGUAGE NamedFieldPuns #-}
-{-# LANGUAGE RankNTypes #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE StandaloneDeriving #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE TypeFamilyDependencies #-}
-{-# LANGUAGE TypeOperators #-}
-{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE KindSignatures             #-}
+{-# LANGUAGE NamedFieldPuns             #-}
+{-# LANGUAGE RankNTypes                 #-}
+{-# LANGUAGE RecordWildCards            #-}
+{-# LANGUAGE StandaloneDeriving         #-}
+{-# LANGUAGE TupleSections              #-}
+{-# LANGUAGE TypeFamilyDependencies     #-}
+{-# LANGUAGE TypeOperators              #-}
+{-# LANGUAGE UndecidableInstances       #-}
 {-# OPTIONS_GHC -Wno-redundant-constraints #-}
+{-# LANGUAGE PatternSynonyms            #-}
+{-# LANGUAGE ViewPatterns               #-}
 
 -- | The very core of miniKanren. So core that it basically deals with
 -- unification only. For writing relational programs, you will need @"Goal"@ as
@@ -24,7 +26,9 @@ module Kanren.Core (
   -- * Values and terms
   Logical (..),
   VarId,
-  Term (..),
+  Term,
+  pattern Var,
+  pattern Value,
   Atomic (..),
 
   -- ** Operations on terms
@@ -46,17 +50,17 @@ module Kanren.Core (
   makeVariable,
 ) where
 
-import Control.DeepSeq
-import Control.Monad (ap)
-import Data.Bifunctor (first)
-import Data.Coerce (coerce)
-import Data.Foldable (foldrM)
-import Data.IntMap.Strict (IntMap)
+import           Control.DeepSeq
+import           Control.Monad      (ap)
+import           Data.Bifunctor     (first)
+import           Data.Coerce        (coerce)
+import           Data.Foldable      (foldrM)
+import           Data.IntMap.Strict (IntMap)
 import qualified Data.IntMap.Strict as IntMap
-import Data.Maybe (fromMaybe)
-import GHC.Exts (IsList (..))
-import GHC.Generics (Generic)
-import Unsafe.Coerce (unsafeCoerce)
+import           Data.Maybe         (fromMaybe)
+import           GHC.Exts           (IsList (..))
+import           Kanren.Tagless     (Tagless, extra, inspectTagless, tagless)
+import           Unsafe.Coerce      (unsafeCoerce)
 
 -- $setup
 -- >>> :set -package typedKanren
@@ -220,20 +224,26 @@ instance Show (VarId a) where
 -- Int))@ will correctly use @LogicList Char@ and @LogicTree Int@ deep inside.
 -- This way, you do not need to know what the logic representation for a type is
 -- named, and deriving the logical representation for a type is trivial.
-data Term a
-  = Var !(VarId a)
-  | Value !(Logic a)
-  deriving (Generic)
+newtype Term a = Term { _getTerm :: Tagless (VarId a) (Logic a) }
 
-deriving instance (NFData (Logic a)) => NFData (Term a)
+deriving newtype instance Eq (Logic a) => Eq (Term a)
+deriving newtype instance NFData (Logic a) => NFData (Term a)
 
-deriving instance (Eq (Logic a)) => Eq (Term a)
+pattern Var :: VarId a -> Term a
+pattern Var varId <- Term (inspectTagless -> Left varId)
+  where Var varId = Term (extra varId)
+
+pattern Value :: Logic a -> Term a
+pattern Value value <- Term (inspectTagless -> Right value)
+  where Value value = Term (tagless value)
+
+{-# COMPLETE Var, Value #-}
 
 -- | This instance doesn't print the 'Var' and 'Value' tags. While this reduces
 -- noise in the output, this may also be confusing since fully instantiated
 -- terms may look indistinguishable from regular values.
 instance (Show (Logic a)) => Show (Term a) where
-  showsPrec d (Var var) = showsPrec d var
+  showsPrec d (Var var)     = showsPrec d var
   showsPrec d (Value value) = showsPrec d value
 
 instance (IsList (Logic a)) => IsList (Term a) where
@@ -291,14 +301,14 @@ unify' l r state =
 occursCheck' :: (Logical a) => VarId b -> Term a -> State -> Bool
 occursCheck' x t state =
   case shallowWalk state t of
-    Var y -> coerce x == y
+    Var y   -> coerce x == y
     Value v -> occursCheck x v state
 
 -- | 'walk', but on 'Term's instead of 'Logic' values. The actual substitution
 -- of variables with values happens here.
 walk' :: (Logical a) => State -> Term a -> Term a
 walk' state x = case shallowWalk state x of
-  Var i -> Var i
+  Var i   -> Var i
   Value v -> Value (walk state v)
 
 -- | 'inject', but to a 'Term' instead of a 'Logic' value. You will use this
@@ -318,7 +328,7 @@ inject' = Value . inject
 -- >>> extract' <$> run (\x -> x === inject' (Left 42 :: Either Int Bool))
 -- [Just (Left 42)]
 extract' :: (Logical a) => Term a -> Maybe a
-extract' Var{} = Nothing
+extract' Var{}     = Nothing
 extract' (Value x) = extract x
 
 data Normalization = Normalization (IntMap Int) Int
@@ -342,7 +352,7 @@ class (Logical a) => Normalizable a where
 
 normalize' :: (Normalizable a) => (forall i. VarId i -> Normalizer (VarId i)) -> Term a -> Normalizer (Term a)
 normalize' f (Var varId) = Var <$> f varId
-normalize' f (Value x) = Value <$> normalize f x
+normalize' f (Value x)   = Value <$> normalize f x
 
 runNormalize :: (Normalizable a) => Term a -> Term a
 runNormalize x = normalized
@@ -360,7 +370,7 @@ runNormalize x = normalized
 -- | Add a constraint that two terms must not be equal.
 disequality :: (Logical a) => Term a -> Term a -> State -> Maybe State
 disequality left right state = case addedSubst left right state of
-  Nothing -> Just state
+  Nothing    -> Just state
   Just added -> stateInsertDisequality added state
 
 -- | Since 'Term's are polymorphic, we cannot easily store them in the
@@ -371,7 +381,7 @@ data ErasedTerm where
 
 instance Show ErasedTerm where
   show (ErasedTerm (Var varId)) = "Var " ++ show varId
-  show (ErasedTerm (Value _)) = "Value _"
+  show (ErasedTerm (Value _))   = "Value _"
 
 -- | Cast an 'ErasedTerm' back to a 'Term a'. Hopefully, you will cast it to the
 -- correct type, or bad things will happen.
@@ -478,17 +488,17 @@ updateComponents state subst = case substExtractArbitrary subst of
   Just ((varId, ErasedTerm value), subst') ->
     case substLookupArbitraryId subst'' of
       Just varId' | varId == varId' -> subst''
-      _ -> updateComponents state subst''
+      _                             -> updateComponents state subst''
    where
     added = fromMaybe substEmpty (addedSubst (Var (VarId varId)) value state)
     subst'' = subst' <> added
 
 -- | One branch in the search tree. Keeps track of known substitutions and
 -- variables.
 data State = State
-  { knownSubst :: !Subst
+  { knownSubst    :: !Subst
   , disequalities :: !Disequalities
-  , maxVarId :: !Int
+  , maxVarId      :: !Int
   }
   deriving (Show)
 
@@ -513,7 +523,7 @@ shallowWalk _ (Value v) = Value v
 shallowWalk state@State{knownSubst = Subst m} (Var (VarId i)) =
   case IntMap.lookup i m of
     Nothing -> Var (VarId i)
-    Just v -> shallowWalk state (unsafeReconstructTerm v)
+    Just v  -> shallowWalk state (unsafeReconstructTerm v)
 
 addSubst :: (Logical a) => VarId a -> Term a -> State -> Maybe State
 addSubst (VarId i) value State{knownSubst = Subst m, ..} =

diff --git a/src/Kanren/GenericLogical.hs b/src/Kanren/GenericLogical.hs
@@ -5,6 +5,7 @@
 {-# LANGUAGE ScopedTypeVariables   #-}
 {-# LANGUAGE TypeApplications      #-}
 {-# LANGUAGE TypeOperators         #-}
+{-# OPTIONS_GHC -Wno-redundant-constraints #-}
 
 -- | 'Generic' implementations of 'Logical' methods.
 --
@@ -53,6 +54,7 @@ import           Data.Proxy    (Proxy (..))
 import           GHC.Generics
 
 import           Kanren.Core
+import           Unsafe.Coerce (unsafeCoerce)
 
 class GLogical f f' where
   gunify :: Proxy f -> f' p -> f' p -> State -> Maybe State
@@ -153,7 +155,7 @@ genericInject
   :: (Generic a, Generic (Logic a), GLogical (Rep a) (Rep (Logic a)))
   => a
   -> Logic a
-genericInject x = to (ginject (from x))
+genericInject = unsafeCoerce -- to (ginject (from x))
 
 -- | The generic implementation of 'extract'.
 genericExtract

diff --git a/src/Kanren/Tagless.hs b/src/Kanren/Tagless.hs
@@ -0,0 +1,119 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash    #-}
+-- | "Tagless" sum types allow to skip explicit tag for at least one of the alternatives in a sum type,
+-- assuming that we can differentiate between the alternatives at runtime by other means.
+--
+-- The benefit of this representation is pronounced for recursive data types:
+--
+-- * In the fastest miniKanren implementations, this approach is used
+--   to avoid expensive recursive injection of a regular (non-logical) value into a miniKanren program.
+--   See Section 6 of [1] for more details about typed implementation of tagless approach in OCanren.
+--   Note that faster-minikanren implementation (in Scheme/Racket) is also using a similar technique in an untyped setting.
+--
+-- * An efficient bignum representation can be done by "unboxing" the (small) integer constructor.
+--   See the details of this example in [2].
+--
+-- This "tagless" representation approach is a variation of
+-- * niche filling optimization [3] (which can be found in Rust, for instance)
+-- * unboxed constructors [2]
+--
+-- See also "flat Maybe" implementation by Andras Kovacs: <https://github.com/AndrasKovacs/flat-maybe>
+--
+-- Note that this approach is NOT the same as unboxed sum typed in Haskell (at least not in their current form).
+--
+-- [1] Dmitrii Kosarev, Dmitry Boulytchev. Typed Embedding of a Relational Language in OCaml <https://arxiv.org/abs/1805.11006>
+-- [2] Nicolas Chataing, Stephen Dolan, Gabriel Scherer, and Jeremy Yallop. 2024. Unboxed Data Constructors: Or, How cpp Decides a Halting Problem. Proc. ACM Program. Lang. 8, POPL, Article 51 (January 2024), 31 pages. https://doi.org/10.1145/3632893
+-- [3] Bartell-Mangel, N. L. (2022). Filling a Niche: Using Spare Bits to Optimize Data Representations.
+module Kanren.Tagless (Tagless, tagless, extra, inspectTagless, inspectAsTagless) where
+
+import           Control.DeepSeq   (NFData (rnf))
+import qualified Foreign.Ptr       as Foreign
+import qualified Foreign.StablePtr as Foreign
+import           GHC.Exts          (Int (I#), closureSize#)
+import           System.IO.Unsafe  (unsafePerformIO)
+import           Unsafe.Coerce     (unsafeCoerce)
+
+-- | A value of type 'Anchor' is used
+-- as a runtime tag for the extra data in the 'Tagless' sum type.
+type Anchor = Int
+
+-- | The anchor
+theAnchor :: Anchor
+{-# NOINLINE theAnchor #-}
+theAnchor = unsafePerformIO $ do
+  sptr <- Foreign.newStablePtr theAnchor
+  let ptr = Foreign.castStablePtrToPtr sptr
+      Foreign.IntPtr n = Foreign.ptrToIntPtr ptr
+  return $! (n - 2837641298) -- scrambled integer value of a stable pointer to the anchor itself
+                             -- the hope is that it is sufficiently unique and will not interfere with any user data
+
+-- | Extra data packed with an 'Anchor' (a runtime tag, specifying that these are indeed extra data).
+data Extra extra = Extra
+  { _extraAnchor :: {-# UNPACK #-} !Anchor
+  , _getExtra    :: !extra
+  }
+
+-- | Make a sum type, but do not use any runtime tag for @a@.
+-- This effectively allows injecting (tagged) values of type @extra@
+-- into the type of @a@, meaning that coercion from @a@ to @'Tagless' extra a@ is zero-cost.
+newtype Tagless extra a = Tagless a
+
+instance (Eq extra, Eq a) => Eq (Tagless extra a) where
+  x == y = inspectTagless x == inspectTagless y
+
+instance (NFData extra, NFData a) => NFData (Tagless extra a) where
+  rnf x = case inspectTagless x of
+    Left l  -> rnf l
+    Right r -> rnf r
+
+-- | Returns the size of the given closure in machine words.
+--
+-- A wrapper around 'closureSize#'.
+--
+-- Note that in many situations it will return the size of a thunk.
+-- This might be especially surprising/inconsistent when dealing with polymorphic constants:
+--
+-- >>> closureSize ([] :: [Int])
+-- 4
+-- >>> closureSize $! ([] :: [Int])
+-- 2
+-- >>> closureSize (Nothing :: Maybe Int)
+-- 2
+-- >>> >>> closureSize $! (Nothing :: Maybe Int)
+-- 2
+closureSize :: a -> Int
+closureSize x = I# (closureSize# x)
+
+-- | Inspect a value of type @a@ to see if it contains @extra@ data
+-- via 'Tagless' representation.
+inspectAsTagless :: a -> Either extra a
+{-# INLINE inspectAsTagless #-}
+inspectAsTagless = inspectTagless . tagless
+
+-- | Check if a given 'Tagless' sum type is @extra@ or not.
+--
+-- >>> inspectTagless (extra 123 :: Tagless Int [Int])
+-- Left 123
+-- >>> inspectTagless (tagless [] :: Tagless Int [Int])
+-- Right []
+-- >>> inspectTagless (tagless [123] :: Tagless Int [Int])
+-- Right [123]
+inspectTagless :: Tagless extra a -> Either extra a
+{-# NOINLINE inspectTagless #-}
+inspectTagless (Tagless !x)
+  | closureSize x < 3 = Right x
+  | otherwise =
+      case unsafeCoerce x of
+        Extra anchor extra_ | anchor == theAnchor ->
+          Left extra_
+        _ -> Right x
+
+-- | Wrap a value into a 'Tagless' sum type.
+-- This operation is zero-cost and amounts to a simple coercion.
+tagless :: a -> Tagless extra a
+tagless = Tagless
+
+-- | Wrap @extra@ data into a 'Tagless' sum type.
+-- This operation adds a runtime tag which is inspectable via 'inspectTagless'.
+extra :: extra -> Tagless extra a
+extra x = unsafeCoerce (Extra theAnchor x)
diff --git a/typedKanren.cabal b/typedKanren.cabal
@@ -36,6 +36,7 @@ library
       Kanren.LogicalBase
       Kanren.Match
       Kanren.Stream
+      Kanren.Tagless
       Kanren.TH
   other-modules:
       Paths_typedKanren