diff options
author | Yorhel <git@yorhel.nl> | 2017-10-15 15:15:16 +0200 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2017-10-15 15:16:23 +0200 |
commit | a7a10f4e76dda3d82b3cc5cc1fe6f4f5201a3828 (patch) | |
tree | f33c6606ed58c2c4c02b3ea3de4064cbd27e74e6 | |
parent | ec9f67566c2517d71cbc593c189f12c3f677ef54 (diff) |
Separate state from statements; Don't use mtl for evaluation
I also re-ordered the arguments of evalExpr, step, final, etc and added
some extra let bindings with the hopes that all the pattern matching on
expressions and statements is performed only once at the start instead
of every time at each call. Adding some trace functions suggested that I
didn't succeed at that, however. I'll need to figure out why.
In any case, the mtl-less evaluation code is certainly cleaner, and
having the state separated from the statements should also be a slight
performance improvement. Unfortunately, this separation isn't type-safe.
-rw-r--r-- | cli/Main.hs | 39 | ||||
-rw-r--r-- | logstat.cabal | 2 | ||||
-rw-r--r-- | logstat.conf | 4 | ||||
-rw-r--r-- | src/Logstat/Eval.hs | 213 | ||||
-rw-r--r-- | src/Logstat/Parse.hs | 10 | ||||
-rw-r--r-- | src/Logstat/Types.hs | 27 | ||||
-rw-r--r-- | tests/Main.hs | 4 |
7 files changed, 148 insertions, 151 deletions
diff --git a/cli/Main.hs b/cli/Main.hs index 37bccf3..89b2d21 100644 --- a/cli/Main.hs +++ b/cli/Main.hs @@ -3,8 +3,6 @@ module Main where import Control.Exception (catchJust) import Control.Monad (filterM,foldM) -import Control.Monad.Except -import Control.Monad.State.Strict import System.Directory (doesFileExist,getHomeDirectory) import System.Exit import System.FilePath ((</>)) @@ -30,44 +28,45 @@ extractShow proc = f [] ["msg"] proc f l v [] = Right (map EField v, reverse l) f l _ ((SShow e):[]) = Right (e, reverse l) f _ _ ((SShow _):_) = Left $ "Error: 'show' must not be followed by other statements" -- TODO: CONTEXT! - f l _ (x@(SGroup v _):xs) = f (x:l) v xs + f l _ (x@(SGroup v):xs) = f (x:l) v xs f l v (x:xs) = f (x:l) v xs -run :: [Expr] -> [Stmt] -> IO () -run sh = loop +run :: [State] -> [Event -> Either EvalError Val] -> Step -> ([State] -> [Either EvalError Event]) -> IO () +run st sh stp fnl = loop st where - loop comp = do + loop state = do r <- catchJust (\e -> if isEOFError e then Just () else Nothing) - (Just <$> line comp) - (const (Nothing <$ finalize comp)) + (Just <$> line state) + (const (Nothing <$ finalize state)) case r of Nothing -> return () - Just comp' -> loop comp' + Just state' -> loop state' eval ev = - case runExcept (mapM (evalExpr ev) sh) of + case mapM (\f -> f ev) sh of Left e -> hPutStrLn stderr $ show e ++ " in show statement" Right v -> C.putStrLn $ B.intercalate ", " $ map asBS v - line comp = do + line state = do l <- B.getLine - let event = Map.fromList [("msg", bs l)] - (r, comp') = runState (runExceptT (stepL event)) comp + let event = Map.fromList [("msg", bs l)] + (state', r) = stp state event case r of Left Filtered -> return () -- TODO: Add more error context, especially the file+line of the statement that triggered the error Left e -> hPutStrLn stderr $ show e ++ " with input: " ++ show l Right v -> eval v - return comp' + return state' - finalize comp = - let (r, _) = runState (runExceptT finalL) comp in - case r of - Left e -> hPutStrLn stderr $ show e - Right l -> mapM_ eval l + finalize state = mapM_ + (\r -> + case r of + Left e -> hPutStrLn stderr $ show e + Right l -> eval l) + (fnl state) @@ -92,7 +91,7 @@ main = do Left e -> hPutStrLn stderr e >> exitFailure Right r -> return r - run sh stmts + run (newState stmts) (map evalExpr sh) (stepL stmts) (finalL stmts) where opts = info (optparse <**> helper) fullDesc diff --git a/logstat.cabal b/logstat.cabal index de74d9f..c06f124 100644 --- a/logstat.cabal +++ b/logstat.cabal @@ -41,7 +41,6 @@ executable logstat , directory , filepath , logstat - , mtl , optparse-applicative test-suite tests @@ -55,4 +54,3 @@ test-suite tests , bytestring , containers , logstat - , mtl diff --git a/logstat.conf b/logstat.conf index 82883f1..af336d4 100644 --- a/logstat.conf +++ b/logstat.conf @@ -58,11 +58,11 @@ proc foo { ## ORDER BY .. LIMIT .. (we don't need the generality of SQL, so stick to getting a top n sort) # sort <number|string> <expr> <asc|desc> limit <num> #sort string ip asc limit 10; - #sort string ip desc limit 10; + sort string ip desc limit 10; # The expression(s) to output. Only one 'show' is allowed in a program, and it # must be the last statement. - show ip, code == 200; + show ip, code; ## Multiple expressions should generate a suitably-sized table. ## ("suitably-sized" is hard to tell when there's no top-n-sort or ## aggregation going on. Probably stick to CSV or so in that case?) diff --git a/src/Logstat/Eval.hs b/src/Logstat/Eval.hs index 6a92c60..c9453b5 100644 --- a/src/Logstat/Eval.hs +++ b/src/Logstat/Eval.hs @@ -1,11 +1,8 @@ {-# LANGUAGE OverloadedStrings, FlexibleContexts, FlexibleInstances #-} module Logstat.Eval where -import Control.Monad.Except -import Control.Monad.State.Strict import Data.Fixed (mod') import Data.Foldable (foldl',toList) -import Data.Maybe (catMaybes) import qualified Data.ByteString as B import qualified Data.Heap as Heap import qualified Data.Map.Strict as Map @@ -15,44 +12,68 @@ import Logstat.Value import Logstat.Regex -except :: MonadError e m => Either e a -> m a -except (Left e) = throwError e -except (Right e) = return e +data State + = StNone + | StSortAscNum !(Heap.Heap (SortItem AscNum )) + | StSortAscBS !(Heap.Heap (SortItem AscBS )) + | StSortDescNum !(Heap.Heap (SortItem DescNum)) + | StSortDescBS !(Heap.Heap (SortItem DescBS )) + | StGroup !(Map.Map [Val] ()) -- TODO: Should add more state than '()' + deriving Show +data SortItem a = SortItem !a !Event deriving Show --- Monad in which computations run. State is remembered in case of exception; --- An exception only indicates an error for a single event and does not prevent --- processing of future events. -type Comp a = ExceptT EvalError (State a) +newtype AscNum = AscNum Double deriving (Show,Eq) +newtype AscBS = AscBS B.ByteString deriving (Show,Eq) +newtype DescNum = DescNum Double deriving (Show,Eq,Ord) +newtype DescBS = DescBS B.ByteString deriving (Show,Eq,Ord) +instance Ord AscNum where (AscNum a) <= (AscNum b) = a >= b +instance Ord AscBS where (AscBS a) <= (AscBS b) = a >= b -getField :: MonadError EvalError m => Event -> Field -> m Val -getField st n = maybe (throwError (UnknownField n)) return $ Map.lookup n st +instance Eq a => Eq (SortItem a) where (SortItem a _) == (SortItem b _) = a == b +instance Ord a => Ord (SortItem a) where (SortItem a _) <= (SortItem b _) = a <= b -evalExpr :: MonadError EvalError m => Event -> Expr -> m Val -evalExpr st expr = case expr of - ELit e -> return e - EField f -> getField st f - ENot e -> bool . not . asBool <$> evalExpr st e - EIf e t f-> evalExpr st e >>= \b -> evalExpr st $ if asBool b then t else f +newState :: [Stmt] -> [State] +newState = map st + where + st s = case s of + SSort _ _ SortAscNum -> StSortAscNum mempty + SSort _ _ SortAscBS -> StSortAscBS mempty + SSort _ _ SortDescNum -> StSortDescNum mempty + SSort _ _ SortDescBS -> StSortDescBS mempty + SGroup _ -> StGroup mempty + _ -> StNone + + +getField :: Event -> Field -> Either EvalError Val +getField st n = maybe (Left (UnknownField n)) return $ Map.lookup n st + + +evalExpr :: Expr -> Event -> Either EvalError Val +evalExpr expr ev = case expr of + ELit e -> Right e + EField f -> getField ev f + ENot e -> bool . not . asBool <$> evalExpr e ev + EIf e t f-> evalExpr e ev >>= \b -> evalExpr (if asBool b then t else f) ev ENeg e -> do - v <- evalExpr st e >>= asNum + v <- evalExpr e ev >>= asNum return $ num (- v) EMatch r e -> do - v <- evalExpr st e + v <- evalExpr e ev return $ bool $ reMatch r (asBS v) EExtract e r -> do - v <- asBS <$> evalExpr st e - ma <- maybe (throwError (NoExtract v)) return $ match r v + v <- asBS <$> evalExpr e ev + ma <- maybe (Left (NoExtract v)) return $ match r v return $ bs $ ma !! 1 EReplace e r n -> -- TODO: Support subpattern substitution - bs . gsub r (const n) . asBS <$> evalExpr st e + bs . gsub r (const n) . asBS <$> evalExpr e ev EOp op a' b' -> case op of @@ -78,7 +99,7 @@ evalExpr st expr = case expr of OOr -> with a' return $ \a -> if asBool a then return a else with b' return return OAnd -> with a' return $ \a -> if asBool a then with b' return return else return a where - with v f p = evalExpr st v >>= f >>= p + with v f p = evalExpr v ev >>= f >>= p withab f p = with a' f $ \a -> with b' f $ \b -> p a b -- ByteString comparison bcmp f = withab (return . asBS) $ \a b -> return $ bool $ f a b @@ -89,7 +110,7 @@ evalExpr st expr = case expr of -- Division with zero-check idiv f = withab asNum $ \a b -> if b == 0 - then throwError DivByZero + then Left DivByZero else return $ num $ f a b @@ -113,32 +134,32 @@ evalExpr st expr = case expr of -- all logs that would not make it to the top n. This heap is then sorted in -- the final step. - -step :: Event -> Comp Stmt Event -step ev = get >>= \stmt -> case stmt of +step :: Stmt -> State -> Event -> (State, Either EvalError Event) +step stmt st ev = case stmt of SShow _ -> undefined -- Should be handled by extractShow - SSet f e -> evalExpr ev e >>= \v -> return $ Map.insert f v ev - SFilter e -> evalExpr ev e >>= \v -> if asBool v then return ev else throwError Filtered + SSet f e -> (,) st $ evalExpr e ev >>= \v -> return $ Map.insert f v ev + SFilter e -> (,) st $ evalExpr e ev >>= \v -> if asBool v then return ev else Left Filtered - SRegex f r p -> do + SRegex f r p -> (,) st $ do val <- asBS <$> getField ev f - ma <- maybe (throwError (NoMatch f val)) return $ match r val + ma <- maybe (Left (NoMatch f val)) return $ match r val return $ foldl' ins ev $ zip ma p where ins s (_, Nothing) = s ins s (v, Just n) = Map.insert n (bs v) s - SSort n e st -> + SSort n e _ -> case st of - SortAscNum hp -> run SortAscNum hp (\v -> AscNum <$> asNum v) - SortAscBS hp -> run SortAscBS hp (return . AscBS . asBS) - SortDescNum hp -> run SortDescNum hp (\v -> DescNum <$> asNum v) - SortDescBS hp -> run SortDescBS hp (return . DescBS . asBS) + StSortAscNum hp -> run StSortAscNum hp (\v -> AscNum <$> asNum v) + StSortAscBS hp -> run StSortAscBS hp (return . AscBS . asBS) + StSortDescNum hp -> run StSortDescNum hp (\v -> DescNum <$> asNum v) + StSortDescBS hp -> run StSortDescBS hp (return . DescBS . asBS) + _ -> error "Invalid state for sort" where - run wrap hp f = do - val <- evalExpr ev e >>= f - put $ SSort n e $ wrap (ins hp val) - throwError Filtered + run wrap hp f = + case evalExpr e ev >>= f of + Left err -> (st, Left err) + Right val -> (wrap (ins hp val), Left Filtered) ins hp val = let si = SortItem val ev in if Heap.size hp < n @@ -147,69 +168,69 @@ step ev = get >>= \stmt -> case stmt of then Heap.insert si $ Heap.deleteMin hp else hp - SGroup f st -> do - v <- mapM (getField ev) f - put $ SGroup f $ Map.insert v () st - throwError Filtered + SGroup f -> + let StGroup m = st in + case mapM (getField ev) f of + Left err -> (st, Left err) + Right v -> (StGroup $ Map.insert v () m, Left Filtered) -final :: Comp Stmt [Event] -final = get >>= \stmt -> case stmt of - SSort _ _ st -> +final :: Stmt -> State -> [Event] +final stmt st = case stmt of + SSort _ _ _ -> case st of - SortAscNum hp -> f hp - SortAscBS hp -> f hp - SortDescNum hp -> f hp - SortDescBS hp -> f hp + StSortAscNum hp -> f hp + StSortAscBS hp -> f hp + StSortDescNum hp -> f hp + StSortDescBS hp -> f hp + _ -> error "Invalid state for sort" where - f hp = return $ map (\(SortItem _ ev) -> ev) $ reverse $ toList hp + f hp = map (\(SortItem _ ev) -> ev) $ reverse $ toList hp - SGroup f st -> return $ map (\(k,_) -> Map.fromList $ zip f k) $ Map.toList st + SGroup f -> + let StGroup m = st in + map (\(k,_) -> Map.fromList $ zip f k) $ Map.toList m - _ -> return [] + _ -> [] +type Step = [State] -> Event -> ([State], Either EvalError Event) --- TODO: These conversions between Monad and value representation are ugly. -stepL :: Event -> Comp [Stmt] Event -stepL ev' = do - (r, st) <- f ev' <$> get - put st - except r +stepL :: [Stmt] -> Step +stepL stmts = + -- "materialize" the statements into a list of (State -> Event -> ..) + -- functions. This hopefully causes the pattern matching on the Stmt value to + -- be performed only once, thus speeding up evaluation. But I obviously need + -- to measure the effect of this optimization to see if it even works at all. + let fs = map step stmts in loop fs where - f :: Event -> [Stmt] -> (Either EvalError Event, [Stmt]) - f ev [] = (Right ev, []) - f ev (s:xs) = - let (r, s') = runState (runExceptT (step ev)) s - in case r of - Left _ -> (r, s':xs) - Right e -> let (r', xs') = f e xs in (r', s':xs') - --- How does error handling work here? e.g. a sort statement will return all --- results from 'final', but those results are then passed to 'step' again --- where each event can individually fail. This type signature forces those --- individual errors to be ignored. -finalL :: Comp [Stmt] [Event] -finalL = do - (r, st) <- f <$> get - put st - except r + loop (f:fns) (st:sts) ev = + case f st ev of + (st', Left err) -> (st':sts, Left err) + (st', Right e) -> let (sts', e') = loop fns sts e in (st':sts', e') + loop _ _ ev = ([], Right ev) + + +finalL :: [Stmt] -> [State] -> [Either EvalError Event] +finalL stmts = + -- Same thing as in stepL + let fns = zip (stepL' stmts) (map final stmts) in loop fns where - f :: [Stmt] -> (Either EvalError [Event], [Stmt]) - f [] = (Right [], []) - f (s:xs) = - let (r, s') = runState (runExceptT final) s - in case r of - Left _ -> (r, s':xs) - Right e -> - let (r', xs') = runState (runExceptT (collect e)) xs in (r', s':xs') - - collect :: [Event] -> Comp [Stmt] [Event] - collect evl = do - -- This throws away errors from step, not very nice. - l1 <- catMaybes <$> mapM (\ev -> (Just <$> stepL ev) `catchError` (const $ return Nothing)) evl - l2 <- finalL - -- This is potentially slow. In practice either step or final returns - -- events, not both, so this is easily optimized in case (++) doesn't - -- already specialize this case. - return (l1 ++ l2) + stepL' :: [Stmt] -> [Step] + stepL' [] = [] + stepL' (_:xs) = stepL xs : stepL' xs + + loop (f:fns) (st:sts) = + let (sts', l1) = steps (fst f) sts (snd f st) + l2 = loop fns sts' + in l1 ++ l2 + loop _ _ = [] + + steps :: Step -> [State] -> [Event] -> ([State], [Either EvalError Event]) + steps _ st [] = (st, []) + steps f st (ev:evs) = + let (st', r) = f st ev + (st'', evs') = steps f st' evs + in (,) st'' $ case r of + Left Filtered -> evs' + _ -> r : evs' diff --git a/src/Logstat/Parse.hs b/src/Logstat/Parse.hs index 7e59258..041e8fd 100644 --- a/src/Logstat/Parse.hs +++ b/src/Logstat/Parse.hs @@ -201,7 +201,7 @@ stmts end = concat <$> sepEndBy1 stmt (symbol ";") <* end stmtGroup = do symbol "group" n <- sepBy1 identifier (symbol ",") - return (SGroup n mempty) + return (SGroup n) stmtRegex = do symbol "regex" @@ -225,10 +225,10 @@ stmts end = concat <$> sepEndBy1 stmt (symbol ";") <* end symbol "limit" n <- fromIntegral <$> L.decimal let st = case (f,o) of - (True, True ) -> SortAscBS mempty - (False, True ) -> SortAscNum mempty - (True, False) -> SortDescBS mempty - (False, False) -> SortDescNum mempty + (True, True ) -> SortAscBS + (False, True ) -> SortAscNum + (True, False) -> SortDescBS + (False, False) -> SortDescNum return (SSort n e st) diff --git a/src/Logstat/Types.hs b/src/Logstat/Types.hs index 11d2588..fe2740f 100644 --- a/src/Logstat/Types.hs +++ b/src/Logstat/Types.hs @@ -1,7 +1,6 @@ module Logstat.Types where import Data.Map.Strict (Map) -import Data.Heap (Heap) import Data.ByteString (ByteString) import Logstat.Regex (Regex) @@ -48,26 +47,6 @@ type Event = Map Field Val -- aggregate - List of states to aggregate over (not efficient to implement it that way, but can be optimized with static analysis) -data SortItem a = SortItem !a !Event deriving Show -data SortState - = SortAscNum !(Heap (SortItem AscNum )) - | SortAscBS !(Heap (SortItem AscBS )) - | SortDescNum !(Heap (SortItem DescNum)) - | SortDescBS !(Heap (SortItem DescBS )) - deriving Show - -newtype AscNum = AscNum Double deriving (Show,Eq) -newtype AscBS = AscBS ByteString deriving (Show,Eq) -newtype DescNum = DescNum Double deriving (Show,Eq,Ord) -newtype DescBS = DescBS ByteString deriving (Show,Eq,Ord) - -instance Ord AscNum where (AscNum a) <= (AscNum b) = a >= b -instance Ord AscBS where (AscBS a) <= (AscBS b) = a >= b - -instance Eq a => Eq (SortItem a) where (SortItem a _) == (SortItem b _) = a == b -instance Ord a => Ord (SortItem a) where (SortItem a _) <= (SortItem b _) = a <= b - - data Op = OEq | ONeq | OLt | OGt | OLe | OGe -- String comparison | OIEq | OINeq | OILt | OIGt | OILe | OIGe -- Integer comparison @@ -76,6 +55,8 @@ data Op | OAnd | OOr -- Boolean operations deriving(Eq,Show) +data SortType = SortAscNum | SortAscBS | SortDescNum | SortDescBS deriving Show + data Expr = EField !Field | ELit !Val @@ -92,8 +73,8 @@ data Stmt = SRegex !Field !Regex ![Maybe Field] | SSet !Field !Expr | SFilter !Expr - | SSort !Int !Expr !SortState - | SGroup ![Field] !(Map [Val] ()) -- TODO: Should add more state than '()' + | SSort !Int !Expr !SortType + | SGroup ![Field] | SShow ![Expr] deriving(Show) diff --git a/tests/Main.hs b/tests/Main.hs index 9386aeb..1b1f44d 100644 --- a/tests/Main.hs +++ b/tests/Main.hs @@ -1,8 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} module Main where -import Control.Monad.Except -import Control.Monad.State.Strict import Data.ByteString (ByteString) import qualified Data.Map.Strict as Map import Test.HUnit @@ -18,7 +16,7 @@ expr e res = e ~: case parseStmts (Config mempty mempty []) "-" ("set t " ++ e) of Left err -> assertFailure ("Parsing failed: " ++ err) Right stmts -> - case fst $ runState (runExceptT (step mempty)) $ head stmts of + case snd $ step (head stmts) (head $ newState stmts) mempty of Left err' -> assertFailure ("Evaluation exception in " ++ show stmts ++ ":\n" ++ show err') Right v -> assertEqual (show stmts) res (asBS $ Map.findWithDefault undefined "t" v) |