From 0a3c7d9ea5bbb0515a5568153cd81d0578116cc0 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 16 Oct 2017 21:07:56 +0200 Subject: Move Main into lib; Fix space leak; Simplify stepL/finalL I've now finally performed some benchmarks. This led me to the following conclusions: * The improvements by using PCRE 'study' are more significant than I had expected (more than 2x faster in my apache log test) * My attempt at letting the pattern matching execute earlier in the program somehow backfired and ended up being slower (even after I had rewritten 'step' and 'evalExpr' to return functions). Perhaps GHC was already performing some optimizations that I ruined somewhere in my attempt to make it faster? I'm not really sure what is going on. * Interestingly, the similar optimisations that I did in Main.hs did cause a slight (5%) performance boost, so I've kept them. * 20% of the time is spent on GC, I feel that this is a bit high, but I'm unsure how to improve that right now. Smallish optimisations like the pattern match avoidance are a bit premature at this point. I'd better spend my time on finalizing some core features and focussing on getting the overall architecture right. --- .gitignore | 1 + cli/Main.hs | 111 +--------------------------------------------------- logstat.cabal | 10 ++--- logstat.conf | 4 +- src/Logstat/Eval.hs | 35 ++++++----------- src/Logstat/Main.hs | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++ stack.yaml | 2 +- 7 files changed, 130 insertions(+), 142 deletions(-) create mode 100644 src/Logstat/Main.hs diff --git a/.gitignore b/.gitignore index 3a5b475..f5e751f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .stack-work/ +*.swp diff --git a/cli/Main.hs b/cli/Main.hs index 89b2d21..8e9426c 100644 --- a/cli/Main.hs +++ b/cli/Main.hs @@ -1,109 +1,2 @@ -{-# LANGUAGE OverloadedStrings #-} -module Main where - -import Control.Exception (catchJust) -import Control.Monad (filterM,foldM) -import System.Directory (doesFileExist,getHomeDirectory) -import System.Exit -import System.FilePath (()) -import System.IO (hPutStrLn,stderr) -import System.IO.Error (isEOFError) -import Options.Applicative -import Data.Semigroup ((<>)) -import qualified Data.ByteString as B -import qualified Data.ByteString.Char8 as C -import qualified Data.Map.Strict as Map - -import Logstat.Types -import Logstat.Parse -import Logstat.Value -import Logstat.Eval - - --- Extract the final 'show' statement or provide a default expression to display. --- Returns an error if there is a non-last 'show' statement. -extractShow :: [Stmt] -> Either String ([Expr], [Stmt]) -extractShow proc = f [] ["msg"] proc - where - f l v [] = Right (map EField v, reverse l) - f l _ ((SShow e):[]) = Right (e, reverse l) - f _ _ ((SShow _):_) = Left $ "Error: 'show' must not be followed by other statements" -- TODO: CONTEXT! - f l _ (x@(SGroup v):xs) = f (x:l) v xs - f l v (x:xs) = f (x:l) v xs - - - -run :: [State] -> [Event -> Either EvalError Val] -> Step -> ([State] -> [Either EvalError Event]) -> IO () -run st sh stp fnl = loop st - where - loop state = do - r <- catchJust - (\e -> if isEOFError e then Just () else Nothing) - (Just <$> line state) - (const (Nothing <$ finalize state)) - case r of - Nothing -> return () - Just state' -> loop state' - - eval ev = - case mapM (\f -> f ev) sh of - Left e -> hPutStrLn stderr $ show e ++ " in show statement" - Right v -> C.putStrLn $ B.intercalate ", " $ map asBS v - - line state = do - l <- B.getLine - let event = Map.fromList [("msg", bs l)] - (state', r) = stp state event - case r of - Left Filtered -> return () - -- TODO: Add more error context, especially the file+line of the statement that triggered the error - Left e -> hPutStrLn stderr $ show e ++ " with input: " ++ show l - Right v -> eval v - return state' - - finalize state = mapM_ - (\r -> - case r of - Left e -> hPutStrLn stderr $ show e - Right l -> eval l) - (fnl state) - - - -data Options = Options - { optConfig :: [String] - , optEval :: [String] - , optNoRC :: Bool - } deriving(Show) - - -main :: IO () -main = do - o <- execParser opts - - home <- getHomeDirectory - c <- loadCfg - (if optNoRC o then [] else ["/etc/logstat.conf", home ".logstat.conf", "logstat.conf"]) - (optConfig o) - - stmts' <- concat <$> mapM (parseStmtsIO c "-e") (optEval o) - (sh, stmts) <- case extractShow stmts' of - Left e -> hPutStrLn stderr e >> exitFailure - Right r -> return r - - run (newState stmts) (map evalExpr sh) (stepL stmts) (finalL stmts) - - where - opts = info (optparse <**> helper) fullDesc - optparse = Options - <$> many (strOption (long "config" <> short 'c' <> metavar "FILE" <> help "Config file")) - <*> some (strOption (long "eval" <> short 'e' <> metavar "CODE" <> help "Code to evaluate")) - <*> switch (long "norc" <> short 'X' <> help "Don't load default config files") - - loadCfg :: [String] -> [String] -> IO Config - loadCfg def conf = do - def' <- filterM doesFileExist def - foldM - parseFileIO - (Config mempty mempty []) - (def' ++ conf) +module Main (main) where +import Logstat.Main diff --git a/logstat.cabal b/logstat.cabal index c06f124..fe4302c 100644 --- a/logstat.cabal +++ b/logstat.cabal @@ -21,6 +21,7 @@ library , heaps , megaparsec , mtl + , optparse-applicative , pcre-light , text exposed-modules: @@ -29,19 +30,14 @@ library Logstat.Regex Logstat.Types Logstat.Value + Logstat.Main executable logstat main-is: Main.hs ghc-options: -threaded -rtsopts -with-rtsopts=-N -Wall default-language: Haskell2010 hs-source-dirs: cli - build-depends: base - , bytestring - , containers - , directory - , filepath - , logstat - , optparse-applicative + build-depends: base, logstat test-suite tests main-is: Main.hs diff --git a/logstat.conf b/logstat.conf index af336d4..2ad7769 100644 --- a/logstat.conf +++ b/logstat.conf @@ -19,8 +19,8 @@ proc apache { [ ] HTTP\/ (? [012]\.[0-9] ) " [ ] (? [0-9]{3} ) [ ] ((? [0-9]+ )|-) - [ ] " (? [^"]+ ) " - [ ] " (? [^"]+ ) "/x; + [ ] " (? [^"]* ) " + [ ] " (? [^"]* ) "/x; } proc foo { diff --git a/src/Logstat/Eval.hs b/src/Logstat/Eval.hs index c9453b5..8e29818 100644 --- a/src/Logstat/Eval.hs +++ b/src/Logstat/Eval.hs @@ -172,7 +172,9 @@ step stmt st ev = case stmt of let StGroup m = st in case mapM (getField ev) f of Left err -> (st, Left err) - Right v -> (StGroup $ Map.insert v () m, Left Filtered) + Right v -> + let m' = Map.insert v () m + in m' `seq` (StGroup m', Left Filtered) final :: Stmt -> State -> [Event] @@ -197,32 +199,19 @@ final stmt st = case stmt of type Step = [State] -> Event -> ([State], Either EvalError Event) stepL :: [Stmt] -> Step -stepL stmts = - -- "materialize" the statements into a list of (State -> Event -> ..) - -- functions. This hopefully causes the pattern matching on the Stmt value to - -- be performed only once, thus speeding up evaluation. But I obviously need - -- to measure the effect of this optimization to see if it even works at all. - let fs = map step stmts in loop fs - where - loop (f:fns) (st:sts) ev = - case f st ev of - (st', Left err) -> (st':sts, Left err) - (st', Right e) -> let (sts', e') = loop fns sts e in (st':sts', e') - loop _ _ ev = ([], Right ev) +stepL (s:stmts) (st:sts) ev = + case step s st ev of + (st', Left err) -> (st':sts, Left err) + (st', Right e) -> let (sts', e') = stepL stmts sts e in (st':sts', e') +stepL _ _ ev = ([], Right ev) finalL :: [Stmt] -> [State] -> [Either EvalError Event] -finalL stmts = - -- Same thing as in stepL - let fns = zip (stepL' stmts) (map final stmts) in loop fns +finalL = loop where - stepL' :: [Stmt] -> [Step] - stepL' [] = [] - stepL' (_:xs) = stepL xs : stepL' xs - - loop (f:fns) (st:sts) = - let (sts', l1) = steps (fst f) sts (snd f st) - l2 = loop fns sts' + loop (s:stmts) (st:sts) = + let (sts', l1) = steps (stepL stmts) sts (final s st) + l2 = loop stmts sts' in l1 ++ l2 loop _ _ = [] diff --git a/src/Logstat/Main.hs b/src/Logstat/Main.hs new file mode 100644 index 0000000..d4c72e4 --- /dev/null +++ b/src/Logstat/Main.hs @@ -0,0 +1,109 @@ +{-# LANGUAGE OverloadedStrings #-} +module Logstat.Main (main) where + +import Control.Exception (catchJust) +import Control.Monad (filterM,foldM) +import System.Directory (doesFileExist,getHomeDirectory) +import System.Exit +import System.FilePath (()) +import System.IO (hPutStrLn,stderr) +import System.IO.Error (isEOFError) +import Options.Applicative +import Data.Semigroup ((<>)) +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as C +import qualified Data.Map.Strict as Map + +import Logstat.Types +import Logstat.Parse +import Logstat.Value +import Logstat.Eval + + +-- Extract the final 'show' statement or provide a default expression to display. +-- Returns an error if there is a non-last 'show' statement. +extractShow :: [Stmt] -> Either String ([Expr], [Stmt]) +extractShow proc = f [] ["msg"] proc + where + f l v [] = Right (map EField v, reverse l) + f l _ ((SShow e):[]) = Right (e, reverse l) + f _ _ ((SShow _):_) = Left $ "Error: 'show' must not be followed by other statements" -- TODO: CONTEXT! + f l _ (x@(SGroup v):xs) = f (x:l) v xs + f l v (x:xs) = f (x:l) v xs + + + +run :: [State] -> [Event -> Either EvalError Val] -> Step -> ([State] -> [Either EvalError Event]) -> IO () +run st sh stp fnl = loop st + where + loop state = do + r <- catchJust + (\e -> if isEOFError e then Just () else Nothing) + (Just <$> line state) + (const (Nothing <$ finalize state)) + case r of + Nothing -> return () + Just state' -> loop state' + + eval ev = + case mapM (\f -> f ev) sh of + Left e -> hPutStrLn stderr $ show e ++ " in show statement" + Right v -> C.putStrLn $ B.intercalate ", " $ map asBS v + + line state = do + l <- B.getLine + let event = Map.fromList [("msg", bs l)] + (state', r) = stp state event + case r of + Left Filtered -> return () + -- TODO: Add more error context, especially the file+line of the statement that triggered the error + Left e -> hPutStrLn stderr $ show e ++ " with input: " ++ show l + Right v -> eval v + return state' + + finalize state = mapM_ + (\r -> + case r of + Left e -> hPutStrLn stderr $ show e + Right l -> eval l) + (fnl state) + + + +data Options = Options + { optConfig :: [String] + , optEval :: [String] + , optNoRC :: Bool + } deriving(Show) + + +main :: IO () +main = do + o <- execParser opts + + home <- getHomeDirectory + c <- loadCfg + (if optNoRC o then [] else ["/etc/logstat.conf", home ".logstat.conf", "logstat.conf"]) + (optConfig o) + + stmts' <- concat <$> mapM (parseStmtsIO c "-e") (optEval o) + (sh, stmts) <- case extractShow stmts' of + Left e -> hPutStrLn stderr e >> exitFailure + Right r -> return r + + run (newState stmts) (map evalExpr sh) (stepL stmts) (finalL stmts) + + where + opts = info (optparse <**> helper) fullDesc + optparse = Options + <$> many (strOption (long "config" <> short 'c' <> metavar "FILE" <> help "Config file")) + <*> some (strOption (long "eval" <> short 'e' <> metavar "CODE" <> help "Code to evaluate")) + <*> switch (long "norc" <> short 'X' <> help "Don't load default config files") + + loadCfg :: [String] -> [String] -> IO Config + loadCfg def conf = do + def' <- filterM doesFileExist def + foldM + parseFileIO + (Config mempty mempty []) + (def' ++ conf) diff --git a/stack.yaml b/stack.yaml index 8be8203..1148e49 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,3 +1,3 @@ -resolver: lts-9.2 +resolver: lts-9.9 packages: - '.' -- cgit v1.2.3