Aelve Codesearch

grep over package repositories
arbtt-0.10.2
src/Categorize.hs
{-# LANGUAGE Rank2Types, CPP, FlexibleContexts #-}
module Categorize where

import Data

import qualified Text.Regex.PCRE.Light.Text as RE
import qualified Data.MyText as T
import Data.MyText (Text)
import Control.Applicative (empty, (<*))
import Control.Monad
import Control.Monad.Instances()
import Control.Monad.Trans.Reader hiding (local)
import Control.Monad.Reader.Class (local)
import Control.Monad.Trans.Class
import Data.Functor.Identity

import Control.Applicative ((<$>))
import Control.DeepSeq
import Data.Char
import Data.List
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Maybe
import Data.Time.Calendar (toGregorian, fromGregorian)
import Data.Time.Calendar.WeekDate (toWeekDate)
import Data.Time.Clock
import Data.Time.Format (formatTime)
import Data.Time.LocalTime
import System.Exit
import System.IO
import Text.Show.Functions
import Text.Parsec
import Text.Parsec.ExprFail
import Text.Parsec.Token
#if MIN_VERSION_time(1,5,0)
import Data.Time.Format(defaultTimeLocale, iso8601DateFormat)
#else
import System.Locale (defaultTimeLocale, iso8601DateFormat)
#endif
import Debug.Trace
import Text.Printf

type Categorizer = TimeLog CaptureData -> TimeLog (Ctx, ActivityData)
type Rule = Ctx -> ActivityData
type Environment = Map String Cond

type Parser = ParsecT String () (ReaderT (TimeZone, Environment) Identity)

data Ctx = Ctx
        { cNow :: TimeLogEntry CaptureData
        , cCurrentWindow :: Maybe (Bool, Text, Text)
        , cWindowInScope :: Maybe (Bool, Text, Text)
        , cSubsts :: [Text]
        , cCurrentTime :: UTCTime
        , cTimeZone :: TimeZone
        , conditionBindings :: Map String Cond
        } deriving Show

instance NFData Ctx where
    rnf (Ctx a b c d e f g) = a `deepseq` b `deepseq` c `deepseq` d `deepseq` e `deepseq` f `deepseq` g `deepseq` ()

type Cond = CtxFun [Text]

type CtxFun a = Ctx -> Maybe a

data CondPrim
        = CondString (CtxFun Text)
        | CondRegex (CtxFun RE.Regex)
        | CondInteger (CtxFun Integer)
        | CondTime (CtxFun NominalDiffTime)
        | CondDate (CtxFun UTCTime)
        | CondCond (CtxFun [Text])
        | CondStringList (CtxFun [Text])
        | CondRegexList (CtxFun [RE.Regex])

newtype Cmp = Cmp (forall a. Ord a => a -> a -> Bool)

data DateVar = DvDate | DvNow

data TimeVar = TvTime | TvSampleAge

data NumVar = NvIdle

runParserStack :: Stream s (ReaderT r Identity) t
               => r
               -> ParsecT s () (ReaderT r Identity) a
               -> SourceName
               -> s
               -> Either ParseError a
runParserStack env p filename =
  runIdentity . flip runReaderT env . runParserT p () filename

readCategorizer :: FilePath -> IO Categorizer
readCategorizer filename = withFile filename ReadMode $ \h -> do
        hSetEncoding h utf8
        content <- hGetContents h
        time <- getCurrentTime
        tz <- getCurrentTimeZone
        case runParserStack (tz, Map.empty) (between (return ()) eof parseRules) filename content of
          Left err -> do
                putStrLn "Parser error:"
                print err
                exitFailure
          Right cat -> return
                (map (fmap (mkSecond (postpare . cat))) . prepare time tz)

applyCond :: String -> TimeZone -> Environment -> TimeLogEntry (Ctx, ActivityData) -> Bool
applyCond s tz env =
        case runParserStack (tz, env) (parseCond <* eof) "command line parameter" s of
          Left err -> error (show err)
          Right c  -> isJust . c . fst . tlData

prepare :: UTCTime -> TimeZone -> TimeLog CaptureData -> TimeLog Ctx
prepare time tz = map go
  where go now  = now {tlData = Ctx now (findActive (cWindows (tlData now))) Nothing [] time tz Map.empty }

-- | Here, we filter out tags appearing twice, and make sure that only one of
--   each category survives
postpare :: ActivityData -> ActivityData
postpare = nubBy go
  where go (Activity (Just c1) _) (Activity (Just c2) _) = c1 == c2
        go a1                     a2                     = a1 == a2

lang :: GenTokenParser String () (ReaderT (TimeZone, Environment) Identity)
lang = makeTokenParser LanguageDef
                { commentStart   = "{-"
                , commentEnd     = "-}"
                , commentLine    = "--"
                , nestedComments = True
                , identStart     = letter
                , identLetter    = alphaNum <|> oneOf "_'"
                , opStart        = oneOf ":!#$%&*+./<=>?@\\^|-~"
                , opLetter       = oneOf ":!#$%&*+./<=>?@\\^|-~"
                , reservedOpNames= []
                , reservedNames  = [ "title"
                                   , "program"
                                   , "active"
                                   , "idle"
                                   , "time"
                                   , "sampleage"
                                   , "date"
                                   , "now"
                                   , "desktop"
                                   ]
                , caseSensitive  = True
                }

parseRules :: Parser Rule
parseRules = do
        whiteSpace lang
        a <- option id (reserved lang "aliases" >> parens lang parseAliasSpecs)
        rb <- parseRulesBody
        return (a . rb)

parseAliasSpecs :: Parser (ActivityData -> ActivityData)
parseAliasSpecs = do as <- sepEndBy1 parseAliasSpec (comma lang)
                     return $ \ad -> foldr doAlias ad as

doAlias :: (Text, Text) -> ActivityData -> ActivityData
doAlias (s1,s2) = map go
  where go (Activity cat tag) = Activity (if cat == Just s1 then Just s2 else cat)
                                         (if tag == s1 then s2 else tag)

parseAliasSpec :: Parser (Text, Text)
parseAliasSpec = do s1 <- T.pack <$> stringLiteral lang
                    reservedOp lang "->"
                    s2 <- T.pack <$> stringLiteral lang
                    return (s1,s2)

parseRulesBody :: Parser Rule
parseRulesBody = do
        x <- parseRule
        choice [ do _ <- comma lang
                    xs <- parseRule `sepEndBy1` comma lang
                    return (matchAny (x:xs))
               , do _ <- semi lang
                    xs <- parseRule `sepEndBy1` semi lang
                    return (matchFirst (x:xs))
               ,    return x
               ]

withBinding :: String -> Cond -> Parser a -> Parser a
withBinding k v = local (\(tz,env) -> (tz, Map.insert k v env))

parseConditionBinding :: Parser Rule
parseConditionBinding = do
  _ <- reserved lang "condition"
  varname <- identifier lang
  _ <- reservedOp lang "="
  cond <- parseCond
  _ <- reserved lang "in"
  withBinding varname cond parseRule

parseRule :: Parser Rule
parseRule = choice
        [    braces lang parseRules
        , do cond <- parseCond
             reservedOp lang "==>"
             rule <- parseRule
             return (ifThenElse cond rule matchNone)
        , do reserved lang "if"
             cond <- parseCond
             reserved lang "then"
             rule1 <- parseRule
             reserved lang "else"
             rule2 <- parseRule
             return (ifThenElse cond rule1 rule2)
        , do reserved lang "tag"
             parseSetTag
        , parseConditionBinding
        ]

parseCond :: Parser Cond
parseCond = do cp <- parseCondExpr
               case cp of
                CondCond c -> return c
                _          -> fail $ printf "Expression of type %s" (cpType cp)

parseCondExpr :: Parser CondPrim
parseCondExpr = buildExpressionParser [
                [ Prefix (reservedOp lang "!" >> return checkNot) ],
                [ Prefix (reserved lang "day of week" >> return evalDayOfWeek)
                , Prefix (reserved lang "day of month" >> return evalDayOfMonth)
                , Prefix (reserved lang "month" >> return evalMonth)
                , Prefix (reserved lang "year" >> return evalYear)
                , Prefix (reserved lang "format" >> return formatDate) ],
                [ Infix (reservedOp lang "=~" >> return checkRegex) AssocNone
                , Infix (checkCmp <$> parseCmp) AssocNone
                ],
                [ Prefix (reserved lang "current window" >> return checkCurrentwindow)
                , Prefix (reserved lang "any window" >> return checkAnyWindow)
                ],
                [ Infix (reservedOp lang "&&" >> return checkAnd) AssocRight ],
                [ Infix (reservedOp lang "||" >> return checkOr) AssocRight ]
            ] parseCondPrim

cpType :: CondPrim -> String
cpType (CondString _) = "String"
cpType (CondRegex _) = "Regex"
cpType (CondInteger _) = "Integer"
cpType (CondTime _) = "Time"
cpType (CondDate _) = "Date"
cpType (CondCond _) = "Condition"
cpType (CondStringList _) = "List of Strings"
cpType (CondRegexList _) = "List of regular expressions"

checkRegex :: CondPrim -> CondPrim -> Erring CondPrim
checkRegex (CondString getStr) (CondRegex getRegex) = Right $ CondCond $ \ctx -> do
        str <- getStr ctx
        regex <- getRegex ctx
        tail <$> RE.match regex str [RE.exec_no_utf8_check]
checkRegex (CondString getStr) (CondRegexList getRegexList) = Right $ CondCond $ \ctx -> do
        str <- getStr ctx
        regexes <- getRegexList ctx
        tail <$> msum (map (\regex -> RE.match regex str [RE.exec_no_utf8_check]) regexes)
checkRegex cp1 cp2 = Left $
        printf "Cannot apply =~ to an expression of type %s and type %s"
               (cpType cp1) (cpType cp2)

checkAnd :: CondPrim-> CondPrim -> Erring CondPrim
checkAnd (CondCond c1) (CondCond c2) = Right $ CondCond $ do
        res1 <- c1
        res2 <- c2
        return $ res1 >> res2
checkAnd cp1 cp2 = Left $
        printf "Cannot apply && to an expression of type %s and type %s"
               (cpType cp1) (cpType cp2)

checkOr :: CondPrim-> CondPrim -> Erring CondPrim
checkOr (CondCond c1) (CondCond c2) = Right $ CondCond $ do
        res1 <- c1
        res2 <- c2
        return $ res1 `mplus` res2
checkOr cp1 cp2 = Left $
        printf "Cannot apply && to an expression of type %s and type %s"
               (cpType cp1) (cpType cp2)

checkNot :: CondPrim -> Erring CondPrim
checkNot (CondCond getCnd) = Right . CondCond $ fmap (maybe (Just []) (const Nothing)) getCnd
checkNot cp = Left $
        printf "Cannot apply ! to an expression of type %s"
               (cpType cp)

checkCmp :: Cmp -> CondPrim -> CondPrim -> Erring CondPrim
checkCmp (Cmp (?)) (CondInteger getN1) (CondInteger getN2) = Right $ CondCond $ \ctx -> do
        n1 <- getN1 ctx
        n2 <- getN2 ctx
        guard (n1 ? n2)
        return []
checkCmp (Cmp (?)) (CondTime getT1) (CondTime getT2) = Right $ CondCond $ \ctx -> do
        t1 <- getT1 ctx
        t2 <- getT2 ctx
        guard (t1 ? t2)
        return []
checkCmp (Cmp (?)) (CondDate getT1) (CondDate getT2) = Right $ CondCond $ \ctx -> do
        t1 <- getT1 ctx
        t2 <- getT2 ctx
        guard (t1 ? t2)
        return []
checkCmp (Cmp (?)) (CondString getS1) (CondString getS2) = Right $ CondCond $ \ctx -> do
        s1 <- getS1 ctx
        s2 <- getS2 ctx
        guard (s1 ? s2)
        return []
checkCmp (Cmp (?)) (CondString getS1) (CondStringList getS2) = Right $ CondCond $ \ctx -> do
        s1 <- getS1 ctx
        sl <- getS2 ctx
        guard (any (s1 ?) sl)
        return []
checkCmp _ cp1 cp2 = Left $
        printf "Cannot compare expressions of type %s and type %s"
               (cpType cp1) (cpType cp2)

checkCurrentwindow :: CondPrim -> Erring CondPrim
checkCurrentwindow (CondCond cond) = Right $ CondCond $ \ctx ->
        cond (ctx { cWindowInScope = cCurrentWindow ctx })
checkCurrentwindow cp = Left $
        printf "Cannot apply current window to an expression of type %s"
               (cpType cp)

checkAnyWindow :: CondPrim -> Erring CondPrim
checkAnyWindow (CondCond cond) = Right $ CondCond $ \ctx ->
        msum $ map (\w -> cond (ctx { cWindowInScope = Just w }))
                                     (cWindows (tlData (cNow ctx)))
checkAnyWindow cp = Left $
        printf "Cannot apply current window to an expression of type %s"
               (cpType cp)

fst3 :: (a,b,c) -> a
fst3 (a,_,_) = a

snd3 :: (a,b,c) -> b
snd3 (_,b,_) = b

trd3 :: (a,b,c) -> c
trd3 (_,_,c) = c

-- Day of week is an integer in [1..7].
evalDayOfWeek :: CondPrim -> Erring CondPrim
evalDayOfWeek (CondDate df) = Right $ CondInteger $ \ctx ->
  let tz = cTimeZone ctx in
  (toInteger . trd3 . toWeekDate . localDay . utcToLocalTime tz) `fmap` df ctx
evalDayOfWeek cp = Left $ printf
  "Cannot apply day of week to an expression of type %s, only to $date."
  (cpType cp)

-- Day of month is an integer in [1..31].
evalDayOfMonth :: CondPrim -> Erring CondPrim
evalDayOfMonth (CondDate df) = Right $ CondInteger $ \ctx ->
  let tz = cTimeZone ctx in
  (toInteger . trd3 . toGregorian . localDay . utcToLocalTime tz) `fmap` df ctx
evalDayOfMonth cp = Left $ printf
  "Cannot apply day of month to an expression of type %s, only to $date."
  (cpType cp)

-- Month is an integer in [1..12].
evalMonth :: CondPrim -> Erring CondPrim
evalMonth (CondDate df) = Right $ CondInteger $ \ctx ->
  let tz = cTimeZone ctx in
  (toInteger . snd3 . toGregorian . localDay . utcToLocalTime tz) `fmap` df ctx
evalMonth cp = Left $ printf
  "Cannot apply month to an expression of type %s, only to $date."
  (cpType cp)

evalYear :: CondPrim -> Erring CondPrim
evalYear (CondDate df) = Right $ CondInteger $ \ctx ->
  let tz = cTimeZone ctx in
  (fst3 . toGregorian . localDay . utcToLocalTime tz) `fmap` df ctx
evalYear cp = Left $ printf
  "Cannot apply year to an expression of type %s, only to $date."
  (cpType cp)

-- format date according to ISO 8601 (YYYY-MM-DD)
formatDate :: CondPrim -> Erring CondPrim
formatDate (CondDate df) = Right $ CondString $ \ctx ->
  let tz = cTimeZone ctx
      local = utcToLocalTime tz `fmap` df ctx
   in T.pack . formatTime defaultTimeLocale (iso8601DateFormat Nothing) <$> local
formatDate cp = Left $ printf
  "Cannot format an expression of type %s, only $date." (cpType cp)

parseCmp :: Parser Cmp
parseCmp = choice $ map (\(s,o) -> reservedOp lang s >> return o)
                        [(">=",Cmp (>=)),
                         (">", Cmp (>)),
                         ("==",Cmp (==)),
                         ("=", Cmp (==)),
                         ("!=",Cmp (/=)),
                         ("<", Cmp (<)),
                         ("<=",Cmp (<=))]

parseCondPrim :: Parser CondPrim
parseCondPrim = choice
        [ parens lang parseCondExpr
        , brackets lang (choice [
            (do list <- commaSep1 lang (stringLiteral lang)
                return $ CondStringList (const (Just (map T.pack list)))
            ) <?> "list of strings",
            (do list <- commaSep1 lang parseRegex
                return $ CondRegexList (const (Just list))
            ) <?> "list of regular expressions"
            ])
        , char '$' >> choice
             [ do backref <- read <$> many1 digit
                  return $ CondString (getBackref backref)
             , choice [ reserved lang "title" >> return (CondString (getVar "title"))
                      , reserved lang "program" >> return (CondString (getVar "program"))
                      , reserved lang "active" >> return (CondCond checkActive)
                      , reserved lang "idle" >> return (CondInteger (getNumVar NvIdle))
                      , reserved lang "time" >> return (CondTime (getTimeVar TvTime))
                      , reserved lang "sampleage" >> return (CondTime (getTimeVar TvSampleAge))
                      , reserved lang "date" >> return (CondDate (getDateVar DvDate))
                      , reserved lang "now" >> return (CondDate (getDateVar DvNow))
                      , reserved lang "desktop" >> return (CondString (getVar "desktop"))
                      , do varname <- identifier lang
                           inEnvironment <- (lift (asks (Map.lookup varname . snd)))
                           case inEnvironment of
                             Nothing -> fail ("Reference to unbound variable: '" ++ varname ++ "'")
                             Just cond -> return (CondCond cond)
                      ]
              ] <?> "variable"
        , do regex <- parseRegex <?> "regular expression"
             return $ CondRegex (const (Just regex))
        , do str <- T.pack <$> stringLiteral lang <?> "string"
             return $ CondString (const (Just str))
        , try $ do time <- parseTime <?> "time" -- backtrack here, it might have been a number
                   return $ CondTime (const (Just time))
        , try $ do date <- parseDate <?> "date" -- backtrack here, it might have been a number
                   return $ CondDate (const (Just date))
        , do num <- natural lang <?> "number"
             return $ CondInteger (const (Just num))
        ]

parseRegex :: Parser RE.Regex
parseRegex = fmap (flip RE.compile [] . T.pack) $ lexeme lang $ choice
        [ between (char '/') (char '/') (many1 (noneOf "/"))
        , do _ <- char 'm'
             c <- anyChar
             str <- many1 (noneOf [c])
             _ <- char c
             return str
        ]

-- | Parses a day-of-time specification (hh:mm)
parseTime :: Parser NominalDiffTime
parseTime = fmap fromIntegral $ lexeme lang $ do
               hour <- read <$> many1 digit
               _ <- char ':'
               minute <- read <$> count 2 digit
               return $ (hour * 60 + minute) * 60

parseDate :: Parser UTCTime
parseDate = lexeme lang $ do
    tz <- lift (asks fst)
    year <- read <$> count 4 digit
    _ <- char '-'
    month <- read <$> count 2 digit
    _ <- char '-'
    day <- read <$> count 2 digit
    time <- option 0 parseTime
    let date = LocalTime (fromGregorian year month day) (TimeOfDay 0 0 0)
    return $ addUTCTime time $ localTimeToUTC tz date


parseSetTag :: Parser Rule
parseSetTag = lexeme lang $ do
                 firstPart <- parseTagPart
                 choice [ do char ':'
                             secondPart <- parseTagPart
                             return $ do cat <- firstPart
                                         tag <- secondPart
                                         return $ maybeToList $ do
                                            cat <- cat
                                            tag <- tag
                                            return $ Activity (Just cat) tag
                        ,    return $ do tag <- firstPart
                                         return $ maybeToList $ do
                                            tag <- tag
                                            return $ Activity Nothing tag
                        ]

replaceForbidden :: Maybe Text -> Maybe Text
replaceForbidden = fmap $ T.map go
  where
    go c | isAlphaNum c  = c
         | c `elem` "-_" = c
         | otherwise     = '_'

parseTagPart :: Parser (Ctx -> Maybe Text)
parseTagPart = do parts <- many1 (choice
                        [ do char '$'
                             (replaceForbidden . ) <$> choice
                               [ do num <- read <$> many1 digit
                                    return $ getBackref num
                               , do varname <- many1 (letter <|> oneOf ".")
                                    return $ getVar varname
                               ] <?> "variable"
                        , do s <- many1 (alphaNum <|> oneOf "-_")
                             return $ const (Just (T.pack s))
                        ])
                  return $ (fmap T.concat . sequence) <$> sequence parts

ifThenElse :: Cond -> Rule -> Rule -> Rule
ifThenElse cond r1 r2 = do res <- cond
                           case res of
                            Just substs -> r1 . setSubsts substs
                            Nothing -> r2
  where setSubsts :: [Text] -> Ctx -> Ctx
        setSubsts substs ctx = ctx { cSubsts = substs }


matchAny :: [Rule] -> Rule
matchAny rules = concat <$> sequence rules
matchFirst :: [Rule] -> Rule
matchFirst rules = takeFirst <$> sequence rules
  where takeFirst [] = []
        takeFirst ([]:xs) = takeFirst xs
        takeFirst (x:_) = x


getBackref :: Integer -> CtxFun Text
getBackref n ctx = listToMaybe (drop (fromIntegral n-1) (cSubsts ctx))

getVar :: String -> CtxFun Text
getVar v ctx | "current" `isPrefixOf` v = do
                let var = drop (length "current.") v
                win <- cCurrentWindow ctx
                getVar var (ctx { cWindowInScope = Just win })
getVar "title"   ctx = do
                (_,t,_) <- cWindowInScope ctx
                return t
getVar "program" ctx = do
                (_,_,p) <- cWindowInScope ctx
                return p
getVar "desktop" ctx = return $ cDesktop (tlData (cNow ctx))
getVar v _ = error $ "Unknown variable " ++ v

getNumVar :: NumVar -> CtxFun Integer
getNumVar NvIdle ctx = Just $ cLastActivity (tlData (cNow ctx)) `div` 1000

getTimeVar :: TimeVar -> CtxFun NominalDiffTime
getTimeVar TvTime ctx = Just $
   let utc = tlTime . cNow $ ctx
       tz = cTimeZone ctx
       local = utcToLocalTime tz utc
       midnightUTC = localTimeToUTC tz $ local { localTimeOfDay = midnight }
    in utc `diffUTCTime` midnightUTC
getTimeVar TvSampleAge ctx = Just $ cCurrentTime ctx `diffUTCTime` tlTime (cNow ctx)

getDateVar :: DateVar -> CtxFun UTCTime
getDateVar DvDate = Just . tlTime . cNow
getDateVar DvNow = Just . cCurrentTime

findActive :: [(Bool, t, t1)] -> Maybe (Bool, t, t1)
findActive = find (\(a,_,_) -> a)

checkActive :: Cond
checkActive ctx = do (a,_,_) <- cWindowInScope ctx
                     guard a
                     return []

matchNone :: Rule
matchNone = const []

justIf :: a -> Bool -> Maybe a
justIf x True = Just x
justIf _ False = Nothing

mkSecond :: (a -> b) -> a -> (a, b)
mkSecond f a = (a, f a)