Aelve Codesearch

grep over package repositories
ADPfusion-0.5.2.2
ADP/Fusion/SynVar/Fill.hs

module ADP.Fusion.SynVar.Fill where

import           Control.Monad
import           Control.Monad.Morph (hoist, MFunctor (..))
import           Control.Monad.Primitive (PrimMonad (..))
import           Control.Monad.ST
import           Control.Monad.Trans.Class (lift, MonadTrans (..))
import           Data.Vector.Fusion.Util (Id(..))
import           GHC.Exts (inline)
import qualified Data.Vector.Fusion.Stream.Monadic as SM
import           System.IO.Unsafe
import           Control.Monad (when,forM_)
import           Data.List (nub,sort,group)
import qualified Data.Vector.Unboxed as VU
import           Data.Proxy
import qualified GHC.Generics as G
import qualified Data.Typeable as T
import qualified Data.Data as D
import           Data.Dynamic
import           Data.Type.Equality
import qualified Data.List as L

import           Data.PrimitiveArray

import           ADP.Fusion.SynVar.Array -- TODO we want to keep only classes in here, move instances to the corresponding modules
import           ADP.Fusion.SynVar.Recursive.Type
import           ADP.Fusion.SynVar.TableWrap

import           Debug.Trace



-- | A vanilla context-free grammar

data CFG

-- | This grammar is a multi-cfg in a monotone setting

data MonotoneMCFG


-- * Unsafely mutate 'ITbls' and similar tables in the forward phase.

-- | Mutate a cell in a stack of syntactic variables.
--
-- TODO generalize to monad morphism via @mmorph@ package. This will allow
-- more interesting @mrph@ functions that can, for example, track some
-- state in the forward phase. (Note that this can be dangerous, we do
-- /not/ want to have this state influence forward results, unless that can
-- be made deterministic, or we'll break Bellman)

class MutateCell (h :: *) (s :: *) (im :: * -> *) i where
  mutateCell :: (Monad om, PrimMonad om) => Proxy h -> Int -> Int -> (forall a . im a -> om a) -> s -> i -> i -> om ()

-- |

class MutateTables (h :: *) (s :: *) (im :: * -> *) where
  mutateTables :: (Monad om, PrimMonad om) => Proxy h -> (forall a . im a -> om a) -> s -> om s

class TableOrder (s :: *) where
  tableLittleOrder :: s -> [Int]
  tableBigOrder :: s -> [Int]

instance TableOrder Z where
  tableLittleOrder Z = []
  tableBigOrder Z = []
  {-# Inline tableLittleOrder #-}
  {-# Inline tableBigOrder #-}

instance (TableOrder ts) => TableOrder (ts:.TwITbl im arr c i x) where
  tableLittleOrder (ts:.TW (ITbl _ tlo _ _) _) = tlo : tableLittleOrder ts
  tableBigOrder    (ts:.TW (ITbl tbo _ _ _) _) = tbo : tableBigOrder ts
  {-# Inline tableLittleOrder #-}
  {-# Inline tableBigOrder #-}

-- | @IRec@s do not need an order, given that they do not memoize.

instance (TableOrder ts) => TableOrder (ts:.TwIRec im c i x) where
  tableLittleOrder (ts:._) = tableLittleOrder ts
  tableBigOrder    (ts:._) = tableBigOrder ts
  {-# Inline tableLittleOrder #-}
  {-# Inline tableBigOrder #-}

-- ** individual instances for filling a *single cell*

instance
  (
  ) => MutateCell p Z im i where
  mutateCell _ _ _ _ Z _ _ = return ()
  {-# INLINE mutateCell #-}

instance
  ( MutateCell CFG ts im i
  ) => MutateCell CFG (ts:.TwIRec im c i x) im i where
  mutateCell h bo lo mrph (ts:._) lu i = do
    mutateCell h bo lo mrph ts lu i
  {-# Inline mutateCell #-}

instance
  ( PrimArrayOps  arr i x
  , MPrimArrayOps arr i x
  , MutateCell CFG ts im i
  ) => MutateCell CFG (ts:.TwITbl im arr c i x) im i where
  mutateCell h bo lo mrph (ts:.TW (ITbl tbo tlo c arr) f) lu i = do
    mutateCell h bo lo mrph ts lu i
    when (bo==tbo && lo==tlo) $ do
      marr <- unsafeThaw arr
      z <- (inline mrph) $ f lu i
      writeM marr i z
  {-# INLINE mutateCell #-}

type ZS2 = Z:.Subword I:.Subword I

instance
  ( PrimArrayOps  arr ZS2 x
  , MPrimArrayOps arr ZS2 x
  , MutateCell MonotoneMCFG ts im ZS2
  ) => MutateCell MonotoneMCFG (ts:.TwITbl im arr c ZS2 x) im ZS2 where
  mutateCell h bo lo mrph (ts:.TW (ITbl tbo tlo c arr) f) lu iklj@(Z:.Subword (i:.k):.Subword(l:.j)) = do
    mutateCell h bo lo mrph ts lu iklj
    when (bo==tbo && lo==tlo && k<=l) $ do
      marr <- unsafeThaw arr
      z <- (inline mrph) $ f lu iklj
      writeM marr iklj z
  {-# INLINE mutateCell #-}

instance
  ( PrimArrayOps arr (Subword I) x
  , MPrimArrayOps arr (Subword I) x
  , MutateCell h ts im (Z:.Subword I:.Subword I)
  ) => MutateCell h (ts:.TwITbl im arr c (Subword I) x) im (Z:.Subword I:.Subword I) where
  mutateCell h bo lo mrph (ts:.TW (ITbl tbo tlo c arr) f) lu@(Z:.Subword (l:._):.Subword(_:.u)) ix@(Z:.Subword (i1:.j1):.Subword (i2:.j2)) = do
    mutateCell h bo lo mrph ts lu ix
    when (bo==tbo && lo==tlo && i1==i2 && j1==j2) $ do
      let i = i1
      let j = j1
      marr <- unsafeThaw arr
      z <- (inline mrph) $ f (subword l u) (subword i j)
      writeM marr (subword i j) z
  {-# Inline mutateCell #-}



-- ** individual instances for filling a complete table and extracting the
-- bounds

instance
  ( MutateCell h (ts:.TwITbl im arr c i x) im i
  , PrimArrayOps arr i x
  , Show i
  , IndexStream i
  , TableOrder (ts:.TwITbl im arr c i x)
  ) => MutateTables h (ts:.TwITbl im arr c i x) im where
  mutateTables h mrph tt@(_:.TW (ITbl _ _ _ arr) _) = do
    let (from,to) = bounds arr
    -- TODO (1) find the set of orders for the synvars
    let !tbos = VU.fromList . nub . sort $ tableBigOrder tt
    let !tlos = VU.fromList . nub . sort $ tableLittleOrder tt
    VU.forM_ tbos $ \bo ->
      case (VU.length tlos) of
        1 -> let lo = VU.head tlos
             in  flip SM.mapM_ (streamUp from to) $ \k ->
                  mutateCell h bo lo (inline mrph) tt to k
        -- TODO each big-order group should be allowed to have its own sets
        -- of bounds. within a group, it doesn't make a lot of sense to
        -- have different bounds? Is there a use case for that even?
        _ -> flip SM.mapM_ (streamUp from to) $ \k ->
              VU.forM_ tlos $ \lo ->
                mutateCell h bo lo (inline mrph) tt to k
    return tt
  {-# INLINE mutateTables #-}

-- | Default table filling, assuming that the forward monad is just @IO@.
--
-- TODO generalize to @MonadIO@ or @MonadPrim@.

mutateTablesDefault :: MutateTables CFG t Id => t -> t
mutateTablesDefault t = unsafePerformIO $ mutateTables (Proxy :: Proxy CFG) (return . unId) t
{-# INLINE mutateTablesDefault #-}

-- | Mutate tables, but observe certain hints. We use this for monotone
-- mcfgs for now.

mutateTablesWithHints :: MutateTables h t Id => Proxy h -> t -> t
mutateTablesWithHints h t = unsafePerformIO $ mutateTables h (return . unId) t






mutateTablesST t = runST $ mutateTablesNew t
{-# Inline mutateTablesST #-}

-- | 
--
-- TODO new way how to do table filling. Because we now have heterogeneous
-- tables (i) group tables by @big order@ into different bins; (ii) check
-- that each bin has the same bounds (needed? -- could we have
-- smaller-sized tables once in a while); (iii) run each bin one after the
-- other
--
-- TODO measure performance penalty, if any. We might need liberal
-- INLINEABLE, and specialization. On the other hand, we can do the
-- freeze/unfreeze outside of table filling.

mutateTablesNew
  :: forall t m .
     ( TableOrder t
     , TSBO t
     , Monad m
     , PrimMonad m
     )
  => t
  -> m t
mutateTablesNew ts = do
  -- sort the tables according to [bigorder,type,littleorder]. For each
  -- @bigorder@, we should have only one @type@ and can therefor do the
  -- following (i) get subset of the @ts@, (ii) use outermost of @ts@ to
  -- get bounds, (iii) fill these tables
  let !tbos = VU.fromList . nub . sort $ tableBigOrder ts
  let ds = L.sort $ asDyn ts
  let goM :: (Monad m, PrimMonad m) => [Q] -> m ()
      goM [] = return ()
      goM xs = do
        ys <- fillWithDyn xs ts
        if null ys
          then return ()
          else goM ys
      {-# Inlinable goM #-}
  goM ds
  return ts
{-# Inline mutateTablesNew #-}

data Q = Q
  { qBigOrder     :: Int
  , qLittleOrder  :: Int
  , qTypeRep      :: T.TypeRep
  , qObject       :: Dynamic
  }
  deriving (Show)

instance Eq Q where
  Q bo1 lo1 tr1 _ == Q bo2 lo2 tr2 _ = (bo1,tr1,lo1) == (bo2,tr2,lo2)

instance Ord Q where
  Q bo1 lo1 tr1 _ `compare` Q bo2 lo2 tr2 _ = (bo1,tr1,lo1) `compare` (bo2,tr2,lo2)

-- | Find the outermost table that has a certain big order and then fill
-- from there.

class TSBO t where
  asDyn :: t -> [Q]
  fillWithDyn :: (Monad m, PrimMonad m) => [Q] -> t -> m [Q]

instance TSBO Z where
  asDyn Z = []
  fillWithDyn qs Z = return qs
  {-# Inlinable asDyn #-}
  {-# Inline fillWithDyn #-}

instance
 ( TSBO ts
 , Typeable arr
 , Typeable c
 , Typeable i
 , Typeable x
 , PrimArrayOps arr i x
 , MPrimArrayOps arr i x
 , IndexStream i
 ) => TSBO (ts:.TwITbl Id arr c i x) where
  asDyn (ts:.t@(TW (ITbl bo lo _ _) _)) = Q bo lo (T.typeOf t) (toDyn t) : asDyn ts
  fillWithDyn qs (ts:.t@(TW (ITbl bo lo _ arr) f)) = do
    let (from,to) = bounds arr
    -- @hs@ are all tables that can be filled here
    -- @ns@ are all tables we can't fill and need to process further down
    -- the line
    let (hs,ns) = L.span (\Q{..} -> qBigOrder == bo && qTypeRep == T.typeOf t) qs
    if null hs
      then fillWithDyn qs ts
      else do
        let ms = Prelude.map concrete hs
            concrete  = (maybe (error "fromDynamic should not fail!")
                         (\x -> x `asTypeOf` t)
                        . fromDynamic . qObject)
        -- We have a single table and should short-circuit here
        --
        -- TODO we should specialize for tables of lengh @1..k@ for some
        -- small k. For @1@ and Needleman-Wunsch, we have a very nice @1.8@
        -- seconds down to @1.25@ seconds. :-)
        case (length ms) of
          1 -> do marr <- unsafeThaw arr
                  flip SM.mapM_ (streamUp from to) $ \k -> do
                    -- TODO @inline mrph@ ...
                    z <- (return . unId) $ f to k
                    writeM marr k z
        -- We have more than one table in will work over the list of tables
          _ -> do marrfs <- Prelude.mapM (\(TW (ITbl _ _ _ arr) f) -> unsafeThaw arr >>= \marr -> return (marr,f)) ms
                  flip SM.mapM_ (streamUp from to) $ \k ->
                    forM_ marrfs $ \(marr,f) -> do
                      z <- (return . unId) $ f to k
                      writeM marr k z
        -- traceShow (hs,length ms) $
        return ns
  {-# Inline fillWithDyn #-}

-- We don't need to capture @IRec@ tables as no table-filling takes place
-- for those tables. @asDyn@ therefore just collects on the remaining @ts@,
-- while @fillWithDyn@ hands of to the next possible table.

instance
  ( TSBO ts
  ) => TSBO (ts:.TwIRec Id c i x) where
  asDyn (ts:.t@(TW (IRec _ _ _) _)) = asDyn ts
  fillWithDyn qs (ts:._) = fillWithDyn qs ts
  {-# Inlinable asDyn #-}
  {-# Inline fillWithDyn #-}