summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2017-10-16 21:07:56 +0200
committerYorhel <git@yorhel.nl>2017-10-16 21:18:40 +0200
commit0a3c7d9ea5bbb0515a5568153cd81d0578116cc0 (patch)
treec152fd9c19eee98e2b045249a654013ffd8312b3
parenta7a10f4e76dda3d82b3cc5cc1fe6f4f5201a3828 (diff)
Move Main into lib; Fix space leak; Simplify stepL/finalL
I've now finally performed some benchmarks. This led me to the following conclusions: * The improvements by using PCRE 'study' are more significant than I had expected (more than 2x faster in my apache log test) * My attempt at letting the pattern matching execute earlier in the program somehow backfired and ended up being slower (even after I had rewritten 'step' and 'evalExpr' to return functions). Perhaps GHC was already performing some optimizations that I ruined somewhere in my attempt to make it faster? I'm not really sure what is going on. * Interestingly, the similar optimisations that I did in Main.hs did cause a slight (5%) performance boost, so I've kept them. * 20% of the time is spent on GC, I feel that this is a bit high, but I'm unsure how to improve that right now. Smallish optimisations like the pattern match avoidance are a bit premature at this point. I'd better spend my time on finalizing some core features and focussing on getting the overall architecture right.
-rw-r--r--.gitignore1
-rw-r--r--cli/Main.hs111
-rw-r--r--logstat.cabal10
-rw-r--r--logstat.conf4
-rw-r--r--src/Logstat/Eval.hs35
-rw-r--r--src/Logstat/Main.hs109
-rw-r--r--stack.yaml2
7 files changed, 130 insertions, 142 deletions
diff --git a/.gitignore b/.gitignore
index 3a5b475..f5e751f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
.stack-work/
+*.swp
diff --git a/cli/Main.hs b/cli/Main.hs
index 89b2d21..8e9426c 100644
--- a/cli/Main.hs
+++ b/cli/Main.hs
@@ -1,109 +1,2 @@
-{-# LANGUAGE OverloadedStrings #-}
-module Main where
-
-import Control.Exception (catchJust)
-import Control.Monad (filterM,foldM)
-import System.Directory (doesFileExist,getHomeDirectory)
-import System.Exit
-import System.FilePath ((</>))
-import System.IO (hPutStrLn,stderr)
-import System.IO.Error (isEOFError)
-import Options.Applicative
-import Data.Semigroup ((<>))
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Char8 as C
-import qualified Data.Map.Strict as Map
-
-import Logstat.Types
-import Logstat.Parse
-import Logstat.Value
-import Logstat.Eval
-
-
--- Extract the final 'show' statement or provide a default expression to display.
--- Returns an error if there is a non-last 'show' statement.
-extractShow :: [Stmt] -> Either String ([Expr], [Stmt])
-extractShow proc = f [] ["msg"] proc
- where
- f l v [] = Right (map EField v, reverse l)
- f l _ ((SShow e):[]) = Right (e, reverse l)
- f _ _ ((SShow _):_) = Left $ "Error: 'show' must not be followed by other statements" -- TODO: CONTEXT!
- f l _ (x@(SGroup v):xs) = f (x:l) v xs
- f l v (x:xs) = f (x:l) v xs
-
-
-
-run :: [State] -> [Event -> Either EvalError Val] -> Step -> ([State] -> [Either EvalError Event]) -> IO ()
-run st sh stp fnl = loop st
- where
- loop state = do
- r <- catchJust
- (\e -> if isEOFError e then Just () else Nothing)
- (Just <$> line state)
- (const (Nothing <$ finalize state))
- case r of
- Nothing -> return ()
- Just state' -> loop state'
-
- eval ev =
- case mapM (\f -> f ev) sh of
- Left e -> hPutStrLn stderr $ show e ++ " in show statement"
- Right v -> C.putStrLn $ B.intercalate ", " $ map asBS v
-
- line state = do
- l <- B.getLine
- let event = Map.fromList [("msg", bs l)]
- (state', r) = stp state event
- case r of
- Left Filtered -> return ()
- -- TODO: Add more error context, especially the file+line of the statement that triggered the error
- Left e -> hPutStrLn stderr $ show e ++ " with input: " ++ show l
- Right v -> eval v
- return state'
-
- finalize state = mapM_
- (\r ->
- case r of
- Left e -> hPutStrLn stderr $ show e
- Right l -> eval l)
- (fnl state)
-
-
-
-data Options = Options
- { optConfig :: [String]
- , optEval :: [String]
- , optNoRC :: Bool
- } deriving(Show)
-
-
-main :: IO ()
-main = do
- o <- execParser opts
-
- home <- getHomeDirectory
- c <- loadCfg
- (if optNoRC o then [] else ["/etc/logstat.conf", home </> ".logstat.conf", "logstat.conf"])
- (optConfig o)
-
- stmts' <- concat <$> mapM (parseStmtsIO c "-e") (optEval o)
- (sh, stmts) <- case extractShow stmts' of
- Left e -> hPutStrLn stderr e >> exitFailure
- Right r -> return r
-
- run (newState stmts) (map evalExpr sh) (stepL stmts) (finalL stmts)
-
- where
- opts = info (optparse <**> helper) fullDesc
- optparse = Options
- <$> many (strOption (long "config" <> short 'c' <> metavar "FILE" <> help "Config file"))
- <*> some (strOption (long "eval" <> short 'e' <> metavar "CODE" <> help "Code to evaluate"))
- <*> switch (long "norc" <> short 'X' <> help "Don't load default config files")
-
- loadCfg :: [String] -> [String] -> IO Config
- loadCfg def conf = do
- def' <- filterM doesFileExist def
- foldM
- parseFileIO
- (Config mempty mempty [])
- (def' ++ conf)
+module Main (main) where
+import Logstat.Main
diff --git a/logstat.cabal b/logstat.cabal
index c06f124..fe4302c 100644
--- a/logstat.cabal
+++ b/logstat.cabal
@@ -21,6 +21,7 @@ library
, heaps
, megaparsec
, mtl
+ , optparse-applicative
, pcre-light
, text
exposed-modules:
@@ -29,19 +30,14 @@ library
Logstat.Regex
Logstat.Types
Logstat.Value
+ Logstat.Main
executable logstat
main-is: Main.hs
ghc-options: -threaded -rtsopts -with-rtsopts=-N -Wall
default-language: Haskell2010
hs-source-dirs: cli
- build-depends: base
- , bytestring
- , containers
- , directory
- , filepath
- , logstat
- , optparse-applicative
+ build-depends: base, logstat
test-suite tests
main-is: Main.hs
diff --git a/logstat.conf b/logstat.conf
index af336d4..2ad7769 100644
--- a/logstat.conf
+++ b/logstat.conf
@@ -19,8 +19,8 @@ proc apache {
[ ] HTTP\/ (?<version> [012]\.[0-9] ) "
[ ] (?<code> [0-9]{3} )
[ ] ((?<size> [0-9]+ )|-)
- [ ] " (?<referer> [^"]+ ) "
- [ ] " (?<useragent> [^"]+ ) "/x;
+ [ ] " (?<referer> [^"]* ) "
+ [ ] " (?<useragent> [^"]* ) "/x;
}
proc foo {
diff --git a/src/Logstat/Eval.hs b/src/Logstat/Eval.hs
index c9453b5..8e29818 100644
--- a/src/Logstat/Eval.hs
+++ b/src/Logstat/Eval.hs
@@ -172,7 +172,9 @@ step stmt st ev = case stmt of
let StGroup m = st in
case mapM (getField ev) f of
Left err -> (st, Left err)
- Right v -> (StGroup $ Map.insert v () m, Left Filtered)
+ Right v ->
+ let m' = Map.insert v () m
+ in m' `seq` (StGroup m', Left Filtered)
final :: Stmt -> State -> [Event]
@@ -197,32 +199,19 @@ final stmt st = case stmt of
type Step = [State] -> Event -> ([State], Either EvalError Event)
stepL :: [Stmt] -> Step
-stepL stmts =
- -- "materialize" the statements into a list of (State -> Event -> ..)
- -- functions. This hopefully causes the pattern matching on the Stmt value to
- -- be performed only once, thus speeding up evaluation. But I obviously need
- -- to measure the effect of this optimization to see if it even works at all.
- let fs = map step stmts in loop fs
- where
- loop (f:fns) (st:sts) ev =
- case f st ev of
- (st', Left err) -> (st':sts, Left err)
- (st', Right e) -> let (sts', e') = loop fns sts e in (st':sts', e')
- loop _ _ ev = ([], Right ev)
+stepL (s:stmts) (st:sts) ev =
+ case step s st ev of
+ (st', Left err) -> (st':sts, Left err)
+ (st', Right e) -> let (sts', e') = stepL stmts sts e in (st':sts', e')
+stepL _ _ ev = ([], Right ev)
finalL :: [Stmt] -> [State] -> [Either EvalError Event]
-finalL stmts =
- -- Same thing as in stepL
- let fns = zip (stepL' stmts) (map final stmts) in loop fns
+finalL = loop
where
- stepL' :: [Stmt] -> [Step]
- stepL' [] = []
- stepL' (_:xs) = stepL xs : stepL' xs
-
- loop (f:fns) (st:sts) =
- let (sts', l1) = steps (fst f) sts (snd f st)
- l2 = loop fns sts'
+ loop (s:stmts) (st:sts) =
+ let (sts', l1) = steps (stepL stmts) sts (final s st)
+ l2 = loop stmts sts'
in l1 ++ l2
loop _ _ = []
diff --git a/src/Logstat/Main.hs b/src/Logstat/Main.hs
new file mode 100644
index 0000000..d4c72e4
--- /dev/null
+++ b/src/Logstat/Main.hs
@@ -0,0 +1,109 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Logstat.Main (main) where
+
+import Control.Exception (catchJust)
+import Control.Monad (filterM,foldM)
+import System.Directory (doesFileExist,getHomeDirectory)
+import System.Exit
+import System.FilePath ((</>))
+import System.IO (hPutStrLn,stderr)
+import System.IO.Error (isEOFError)
+import Options.Applicative
+import Data.Semigroup ((<>))
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as C
+import qualified Data.Map.Strict as Map
+
+import Logstat.Types
+import Logstat.Parse
+import Logstat.Value
+import Logstat.Eval
+
+
+-- Extract the final 'show' statement or provide a default expression to display.
+-- Returns an error if there is a non-last 'show' statement.
+extractShow :: [Stmt] -> Either String ([Expr], [Stmt])
+extractShow proc = f [] ["msg"] proc
+ where
+ f l v [] = Right (map EField v, reverse l)
+ f l _ ((SShow e):[]) = Right (e, reverse l)
+ f _ _ ((SShow _):_) = Left $ "Error: 'show' must not be followed by other statements" -- TODO: CONTEXT!
+ f l _ (x@(SGroup v):xs) = f (x:l) v xs
+ f l v (x:xs) = f (x:l) v xs
+
+
+
+run :: [State] -> [Event -> Either EvalError Val] -> Step -> ([State] -> [Either EvalError Event]) -> IO ()
+run st sh stp fnl = loop st
+ where
+ loop state = do
+ r <- catchJust
+ (\e -> if isEOFError e then Just () else Nothing)
+ (Just <$> line state)
+ (const (Nothing <$ finalize state))
+ case r of
+ Nothing -> return ()
+ Just state' -> loop state'
+
+ eval ev =
+ case mapM (\f -> f ev) sh of
+ Left e -> hPutStrLn stderr $ show e ++ " in show statement"
+ Right v -> C.putStrLn $ B.intercalate ", " $ map asBS v
+
+ line state = do
+ l <- B.getLine
+ let event = Map.fromList [("msg", bs l)]
+ (state', r) = stp state event
+ case r of
+ Left Filtered -> return ()
+ -- TODO: Add more error context, especially the file+line of the statement that triggered the error
+ Left e -> hPutStrLn stderr $ show e ++ " with input: " ++ show l
+ Right v -> eval v
+ return state'
+
+ finalize state = mapM_
+ (\r ->
+ case r of
+ Left e -> hPutStrLn stderr $ show e
+ Right l -> eval l)
+ (fnl state)
+
+
+
+data Options = Options
+ { optConfig :: [String]
+ , optEval :: [String]
+ , optNoRC :: Bool
+ } deriving(Show)
+
+
+main :: IO ()
+main = do
+ o <- execParser opts
+
+ home <- getHomeDirectory
+ c <- loadCfg
+ (if optNoRC o then [] else ["/etc/logstat.conf", home </> ".logstat.conf", "logstat.conf"])
+ (optConfig o)
+
+ stmts' <- concat <$> mapM (parseStmtsIO c "-e") (optEval o)
+ (sh, stmts) <- case extractShow stmts' of
+ Left e -> hPutStrLn stderr e >> exitFailure
+ Right r -> return r
+
+ run (newState stmts) (map evalExpr sh) (stepL stmts) (finalL stmts)
+
+ where
+ opts = info (optparse <**> helper) fullDesc
+ optparse = Options
+ <$> many (strOption (long "config" <> short 'c' <> metavar "FILE" <> help "Config file"))
+ <*> some (strOption (long "eval" <> short 'e' <> metavar "CODE" <> help "Code to evaluate"))
+ <*> switch (long "norc" <> short 'X' <> help "Don't load default config files")
+
+ loadCfg :: [String] -> [String] -> IO Config
+ loadCfg def conf = do
+ def' <- filterM doesFileExist def
+ foldM
+ parseFileIO
+ (Config mempty mempty [])
+ (def' ++ conf)
diff --git a/stack.yaml b/stack.yaml
index 8be8203..1148e49 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -1,3 +1,3 @@
-resolver: lts-9.2
+resolver: lts-9.9
packages:
- '.'