Creating unique labels in Haskell

You need a "unique supply". The usual way to do this in Haskell is by threading a counter through the State monad, which automates the plumbing problem you describe.


I suppose it is tempting, if the only tool you have is a hammer, to treat everything as if it were a nail.

Abraham Maslow.

How about something different - a unique-supply that isn't a member of the Monad class. As it happens, you were almost there with your original type signature:

compileStatement :: Statement -> UniqueIDState -> [AbstractInstruction]

If the only requirement is that each label is unique - no need to count how many were used, providing the same identifiers given the same circumstances, etc - there's a less-invasive technique you can use.

From pages 39-40 of State in Haskell by John Launchbury and Simon Peyton Jones:

newUniqueSupply :: IO UniqueSupply
splitUniqueSupply :: UniqueSupply -> (UniqueSupply, UniqueSupply)
getUnique  :: UniqueSupply -> Unique

instance Eq Unique
instance Ord Unique
instance Text Unique

--    interface     --
-- ================ --
--  implementation  --

data UniqueSupply = US Unique UniqueSupply UniqueSupply

type Unique = Int

 -- where all the action happens!
newUniqueSupply :: IO UniqueSupply
newUniqueSupply
  = newVar 0             `thenST` \ uvar ->
    let
      next :: IO Unique
      next = interleaveST (
                readVar uvar         `thenST` \ u ->
                writeVar uvar (u+1)  `thenST_`
                returnStrictlyST u
              )

      supply :: IO UniqueSupply
      supply = interleaveST (
                 next         `thenST` \ u ->
                 supply       `thenST` \ s1 ->
                 supply       `thenST` \ s2 ->
                 returnST (US u s1 s2)
                )

    in
    supply

 -- bits so boring they're not even in the paper...
splitUniqueSupply (US _ s1 s2) = (s1, s2)
getUnique (US u _ _) = u

Yes...that sample of Haskell's from 1996 - let's freshen it up:

module UniqueSupply(
    Unique, UniqueSupply,
    newUniqueSupply, splitUniqueSupply, getUnique
) where

import Control.Monad    (liftM3)
import Data.IORef       (IORef, newIORef, atomicModifyIORef)
import System.IO.Unsafe (unsafeInterleaveIO)

newtype Unique    =  U Int deriving (Eq, Ord, Read, Show)

data UniqueSupply =  US Unique UniqueSupply UniqueSupply

newUniqueSupply   :: IO UniqueSupply
newUniqueSupply
  = do uvar <- newIORef 0
       let next   :: IO Unique
           next   =  unsafeInterleaveIO (atomicModifyIORef uvar (\u -> (u+1, U u)))

           supply :: IO UniqueSupply
           supply =  unsafeInterleaveIO (liftM3 US next supply supply)
       supply

splitUniqueSupply :: UniqueSupply -> (UniqueSupply, UniqueSupply)
splitUniqueSupply (US _ s1 s2) =  (s1, s2)

getUnique :: UniqueSupply -> Unique
getUnique (US u _ _) =  u

Now that it's working again, there are some annoyances to deal with:

  • the use of two types;

  • the lack of polymorphism;

  • the fixed mode of generation;

  • the possibility of errant reuse.

The last point is particularly interesting. Assuming:

data Statement =
    ... | If Statement Statement Statement | ...

then if:

compileStatement (If c t e) s =
    case splitUniqueSupply s of
      (s1, s2) -> case splitUniqueSupply s2 of
                    (s3, s4) -> buildCondJump (compileStatement c s1)
                                              (compileStatement t s3)
                                              (compileStatement e s4)

is mistakenly changed to:

compileStatement (If c t e) s =
    case splitUniqueSupply s of
      (s1, s2) -> case splitUniqueSupply s2 of
                    (s3, s4) -> buildCondJump (compileStatement c s)
                                              (compileStatement t s)
                                              (compileStatement e s)

not only are UniqueSupply and Unique values being erroneously reused, there's the potential for a space leak if any of the recursive calls to compileStatement uses the supply intensively.

We now consider the second point: the lack of polymorphism. Let's assume a suitable type exists:

data Fresh a =  Fresh a (Fresh a) (Fresh a)

freshNew     :: ... -> IO (Fresh a)

splitFresh   :: Fresh a -> (Fresh a, Fresh a)

pluckFresh   :: Fresh a -> a

That implies:

instance Functor Fresh where
    fmap h (Fresh u s1 s2) =  Fresh (h u) (fmap h s1) (fmap h s2)

which then inspires:

freshNew     :: (Int -> a) -> IO (Fresh a)
freshNew g   =  fmap (fmap g) freshInts

splitFresh   :: Fresh a -> (Fresh a, Fresh a)
splitFresh (Fresh _ s1 s2) =  (s1, s2)

pluckFresh   :: Fresh a -> a
pluckFresh (Fresh u _ _) =  u

We can then keep freshInts private:

freshInts    :: IO (Fresh Int)
freshInts    =  do uvar <- newIORef 0 
                   let incr u =  (u+1, u)
                       next   =  unsafeInterleaveIO $
                                 atomicModifyIORef uvar incr
                       supply =  unsafeInterleaveIO $
                                 liftM3 Fresh next supply supply
                   supply

If a user only needs Int values:

do            .
              .
              .
   int_supply <- freshNew id  {- id x = x -}
              .
              .
              .

As a bonus, this also remediates the use of two types and the fixed mode of generation (the first and third points). Time for a Fresh new module:

module Fresh(
    Fresh,
    freshNew, splitFresh, pluckFresh
) where

import Control.Monad    (liftM3)
import Data.IORef       (IORef, newIORef, atomicModifyIORef)
import System.IO.Unsafe (unsafeInterleaveIO)

data Fresh a =  Fresh a (Fresh a) (Fresh a)

instance Functor Fresh where
    fmap h (Fresh u s1 s2) =  Fresh (h u) (fmap h s1) (fmap h s2)

freshNew     :: (Int -> a) -> IO (Fresh a)
freshNew g   =  fmap (fmap g) freshInts

splitFresh   :: Fresh a -> (Fresh a, Fresh a)
splitFresh (Fresh _ s1 s2) =  (s1, s2)

pluckFresh   :: Fresh a -> a
pluckFresh (Fresh u _ _) =  u

-- local definition
freshInts    :: IO (Fresh Int)
freshInts    =  do uvar <- newIORef 0 
                   let incr u =  (u+1, u)
                       next   =  unsafeInterleaveIO $
                                 atomicModifyIORef uvar incr
                       supply =  unsafeInterleaveIO $
                                 liftM3 Fresh next supply supply
                   supply

Now for the riddle of reuse, and an initial attempt at an answer:

freshNew     :: (Int -> a) -> IO (Fresh a)
freshNew g   =  do z <- newIORef ()
                   fmap (fmap g) (freshInts z)

freshInts    :: IORef () -> IO (Fresh Int)
freshInts z  =  do let using   :: () -> (a, ())
                       using x =  (error "already used!", x)
                   () <- atomicModifyIORef z using
                   z1 <- newIORef ()
                   z2 <- newIORef ()
                   let incr u =  (u+1, u)
                       next   =  unsafeInterleaveIO $
                                 atomicModifyIORef uvar incr
                       supply =  unsafeInterleaveIO $
                                 liftM3 Fresh next (freshInts z1) (freshInts z2)
                   supply

...yeah, that's a prototype alright - can we do better?

Up to this point, splitFresh and pluckFresh have been trivial:

splitFresh   :: Fresh a -> (Fresh a, Fresh a)
splitFresh (Fresh _ s1 s2) =  (s1, s2)

pluckFresh   :: Fresh a -> a
pluckFresh (Fresh u _ _) =  u

Can some of the work freshInts and freshNew now do be transferred to them:

  • if splitFresh could generate the pair of subtrees directly, Fresh values would be simpler:

     data Fresh a =  Fresh ... {- no subtrees -}
    
     splitFresh :: Fresh a -> (Fresh a, Fresh a)
     splitFresh (Fresh g ...) =  (Fresh ..., Fresh ...)
    
  • if pluckFresh had access to the generator function - g in freshNew - it could then directly provide the required unique value:

     data Fresh a =  Fresh (... -> a) ...
    
     pluckFresh :: Fresh a -> a
     pluckFresh (Fresh g ...) =  (g ...)
    

What about something like:

data Fresh a =  Fresh (Int -> a) U

splitFresh   :: Fresh a -> (Fresh a, Fresh a)
splitFresh (Fresh g n) =  (Fresh g n1, Fresh g n2) where 
                          (n1, n2) =  splitU n
   
pluckFresh   :: Fresh a -> a
pluckFresh (Fresh g n) =  g (intOfU n)

where:

splitU :: U -> (U, U)
intOfU :: U -> Int

That could make freshInts as simple as:

freshInts    :: IO (Fresh Int)
freshInts    =  do n <- initialU
                   return (Fresh (\x -> x) n)

assuming:

initialU :: IO U

Hmm - something's not quite right about that definition of freshInts. Then there's intOfU - it has a curious resemblance to something seen elsewhere...

[...] In an imperative program one might simply call GenSym() for each identifier, to allocate a unique name from a global supply, and to side-effect the supply so that subsequent calls to GenSym() will deliver a new value.

(from page 39 of Launchbury's and Peyton-Jones's paper.)

Let's give this some more thought:

  • because of its outside (of Haskell) interaction with the global (and mutable) supply, genSym would have the type:

    genSym :: IO Int
    

    to prevent its use in pure contexts.

  • the earlier prototype has its own outside interactions - it uses mutable references:

    freshInts :: IORef () -> IO (Fresh Int)
    

    to prevent the reuse of Fresh values.

...with the abstract IO type indicating the presence of those outside interactions by its appearance in both type signatures.

If we were to (cautiously!) assume:

  • intOfU is genSym in disguise;

  • that U type also serves as an indicator of outside interactions;

i.e:

type U =  OI
genSym :: OI -> Int

intOfU :: U -> Int
intOfU =  ... $ genSym

...that would imply:

data Fresh a =  Fresh (Int -> a) OI

splitU       :: OI -> (OI, OI)
   

This looks promising - we can now relocate genSym to freshInts:

data Fresh a =  Fresh (OI -> a) OI

pluckFresh   :: Fresh a -> a
pluckFresh (Fresh g n) =  g n

freshInts    :: IO (Fresh Int)
freshInts    =  do n <- initialU
                   uvar <- newIORef 0
                   let incr n =  (n + 1, n)

                       genSym :: IO Int
                       genSym =  atomicModifyIORef uvar incr
                            
                       intOfU :: OI -> Int
                       intOfU =  ... $ genSym

                   return (Fresh intOfU n)

That looks more sensible - what about everything else?

instance Functor Fresh where
    fmap f (Fresh g n) =  Fresh (f . g) n

freshNew     :: (Int -> a) -> IO (Fresh a)
freshNew g   =  do n <- initialU
                   uvar <- newIORef 0
                   let incr n =  (n + 1, n)

                       genSym :: IO Int
                       genSym =  atomicModifyIORef uvar incr
                            
                       intOfU :: OI -> Int
                       intOfU =  ... $ genSym

                   return (Fresh (g . intOfU) n)

This is looking very promising - we no longer need the local definition freshInts! We just need to define U OI, initialU and splitU- in doing so, there are some matters to consider:

  • remember the problem with Fresh values being mistakenly reused in compileStatement? Well, the same problem exists with those OI values:

    pourFresh  :: Fresh a -> [a]
    pourFresh (Fresh g n) = map g (pourU n)
    
    pourU      :: OI -> [OI]
    pourU n    =  n1 : pourU n1 where (n1, n2) = splitU n
    

    This problem would be aggravated by the ready availability of constructors for the OI type.

  • we're still assuming this OI type indicates the presence of outside interactions - much like that abstruse type simply known as IO...

This suggests the OI type should be abstract. As we're dealing with prototypes and you're probably already using it, perhaps the easiest option is just to use Glasgol GHC's extensions as necessary.

Time for a deep breath, and a few name changes:

  -- for GHC 8.6.5
{-# LANGUAGE MagicHash, UnboxedTuples #-}
import Data.Char  (isSpace)
import Data.IORef (IORef, newIORef, atomicModifyIORef)
import Prelude    (Int, String, Eq(..), Functor(..), Num(..))
import Prelude    ((.), ($), (++), error, all)
import GHC.Base   (IO(..), State#, MutVar#, RealWorld)
import GHC.Base   (seq#, newMutVar#, atomicModifyMutVar#, noDuplicate#)

data OI                 =  OI OI#

type OI#                =  String -> State# RealWorld

type IO# a              =  State# RealWorld -> (# State# RealWorld, a #)


part#                   :: OI# -> (# OI#, OI# #)
part# h                 =  case h "partOI" of
                             s -> case dispense# s of
                                    (# s', h1 #) ->
                                      case dispense# s' of
                                        (# _, h2 #) -> (# h1, h2 #)

dispense#               :: IO# OI#
dispense# s             =  case newMutVar# () s of
                             (# s', r #) -> (# s', expire# s' r #)

expire#                 :: State# s -> MutVar# s () -> String -> State# s
expire# s r name        =  case atomicModifyMutVar# r use s of
                             (# s', () #) -> s'
                           where
                               use x   =  (error nowUsed, x)
                               nowUsed =  name' ++ ": already expired"
                               name'   =  if all isSpace name then "(unknown)"
                                          else name

invokes#                :: Monomo a => String -> IO# a -> OI# -> a
(name `invokes#` act) h =  case act (noDuplicate# (h name)) of (# _, t #) -> t

class Monomo a

You can now start breathing again; there's some name changes here too:

partFresh               :: Fresh a -> (Fresh a, Fresh a)
partFresh (Fresh g u)   =  case partOI u of
                             (u1, u2) -> (Fresh g u1, Fresh g u2)
   
pluckFresh              :: Fresh a -> a
pluckFresh (Fresh g u)  =  g u

freshNew                :: (Int -> a) -> IO (Fresh a)
freshNew g              =  do uvar <- newIORef 0
                              let incr n =  (n + 1, n)

                                  genSym :: IO Int
                                  genSym =  atomicModifyIORef uvar incr
                            
                                  gensym :: OI -> Int
                                  gensym =  "gensym" `invokes` genSym

                              runOI (Fresh (g . gensym))

instance Monomo Int

So there you have it: a simple unique-supply which (apart from one definition) is monad-free:

  • you just need to define e.g:

    nextID :: Int -> ID
    
  • the resulting change to your type signature is modest:

    compileStatement :: Statement -> Fresh ID -> [AbstractInstruction]
    

But, if you really do need to, you can use Fresh as the basis for a monadic type e.g:

type Supply i a = Fresh i -> a

unit     :: a -> Supply i a
unit x   =  \u -> partFresh u `seq` x

bind     :: Supply i a -> (a -> Supply i b) -> Supply i b
bind m k =  \u -> case partFresh u of (u1, u2) -> (\x -> x `seq` k x u2) (m u1)

where:

 -- for GHC 8.6.5
{-# LANGUAGE CPP #-}
#define during seq
import qualified Prelude(during)

{-# NOINLINE seq #-}
infixr 0 `seq`
seq     :: a -> b -> b
seq x y = Prelude.during x (case x of _ -> y)

or:

 -- for GHC 8.6.5
{-# LANGUAGE CPP #-}
#define during seq
import qualified Prelude(during)
import GHC.Base(lazy)

infixr 0 `seq`
seq     :: a -> b -> b
seq x y = Prelude.during x (lazy y)

...because Prelude.seq isn't actually sequential.

(Yes: those definitions are GHC-specific; for other Haskell implementations, the simplest option may well be to add a new primitive. As for the extensions themselves, they stay with each definition.)

Hmm...that's interesting:

  -- for GHC 8.6.5
{-# LANGUAGE MagicHash, UnboxedTuples #-}
module Fresh(
    Fresh,
    freshNew, partFresh, pluckFresh
) where
import Data.Char  (isSpace)
import Data.IORef (IORef, newIORef, atomicModifyIORef)
import Prelude    (Int, String, Eq(..), Functor(..), Num(..))
import Prelude    ((.), ($), (++), error, all)
import GHC.Base   (IO(..), State#, MutVar#, RealWorld)
import GHC.Base   (seq#, newMutVar#, atomicModifyMutVar#, noDuplicate#)

partFresh               :: Fresh a -> (Fresh a, Fresh a)
partFresh (Fresh g u)   =  case partOI u of
                             (u1, u2) -> (Fresh g u1, Fresh g u2)
   
pluckFresh              :: Fresh a -> a
pluckFresh (Fresh g u)  =  g u

freshNew                :: (Int -> a) -> IO (Fresh a)
freshNew g              =  do uvar <- newIORef 0
                              let incr n =  (n + 1, n)

                                  genSym :: IO Int
                                  genSym =  atomicModifyIORef uvar incr
                            
                                  gensym :: OI -> Int
                                  gensym =  "gensym" `invokes` genSym

                              runOI (Fresh (g . gensym))

instance Functor Fresh where
    fmap f (Fresh g n) =  Fresh (f . g) n

-- local definitions --
data Fresh a            =  Fresh (OI -> a) OI

partOI                  :: OI -> (OI, OI)
partOI (OI h)           =  case part# h of (# h1, h2 #) -> (OI h1, OI h2)

runOI                   :: (OI -> a) -> IO a
runOI g                 =  IO $ \s -> case dispense# s of
                                        (# s', h #) -> seq# (g (OI h)) s'

invokes                 :: Monomo a => String -> IO a -> OI -> a
(name `invokes` IO act) (OI h)
                        =  (name `invokes#` act) h

class Monomo a

 -- extended definitions --
data OI                 =  OI OI#

type OI#                =  String -> State# RealWorld

type IO# a              =  State# RealWorld -> (# State# RealWorld, a #)


part#                   :: OI# -> (# OI#, OI# #)
part# h                 =  case h "partOI" of
                             s -> case dispense# s of
                                    (# s', h1 #) ->
                                      case dispense# s' of
                                        (# _, h2 #) -> (# h1, h2 #)

dispense#               :: IO# OI#
dispense# s             =  case newMutVar# () s of
                             (# s', r #) -> (# s', expire# s' r #)

expire#                 :: State# s -> MutVar# s () -> String -> State# s
expire# s r name        =  case atomicModifyMutVar# r use s of
                             (# s', () #) -> s'
                           where
                               use x   =  (error nowUsed, x)
                               nowUsed =  name' ++ ": already expired"
                               name'   =  if all isSpace name then "(unknown)"
                                          else name

invokes#                :: Monomo a => String -> IO# a -> OI# -> a
(name `invokes#` act) h =  case act (noDuplicate# (h name)) of (# _, t #) -> t

 -- supplemental instances --
instance Monomo Int

...we've even managed to get rid of that unsafe... definition - nice!

P.S: If you're wondering about that peculiar Monomo class, you can find clues in the history of Standard ML...