1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
|
{-# LANGUAGE GADTs #-}
module Logstat.Parse
( parseStmts
, parseStmtsIO
, parseFileIO
)
where
import Control.Applicative (empty)
import Control.Monad (void,when)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Identity
import Control.Monad.State
import Data.List (sortOn)
import System.Directory
import System.Exit (exitFailure)
import System.FilePath (takeDirectory,(</>))
import System.IO (hPutStr,stderr)
import Text.Megaparsec
import Text.Megaparsec.Expr
import qualified Data.ByteString as B
import qualified Data.Map.Strict as M
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Text.Megaparsec.Lexer as L
import Logstat.Types
import Logstat.Regex
import Logstat.Value
emapM :: Monad m => (Expr -> m Expr) -> Expr -> m Expr
emapM f e = f e >>= \e' -> case e' of
EOp o a b -> EOp o <$> emapM f a <*> emapM f b
EIf a b c -> EIf <$> emapM f a <*> emapM f b <*> emapM f c
EMatch a b -> EMatch a <$> emapM f b
ENeg a -> ENeg <$> emapM f a
ENot a -> ENot <$> emapM f a
r -> return r
emap :: (Expr -> Expr) -> Expr -> Expr
emap f e = runIdentity (emapM (return . f) e)
type Parser m = StateT Config (ParsecT Dec String m)
-- Extracts the named subpatterns from a Regex and sorts them into a list that
-- can be zipped with the match results for quick capture extraction.
namedPatterns :: Regex -> [Maybe Field]
namedPatterns re = fill 0 $ sortOn snd $ captureNames re
where
fill _ [] = []
fill i m@((name,n):xs)
| i == n = Just (T.unpack $ T.decodeUtf8 name) : fill (i+1) xs
| otherwise = Nothing : fill (i+1) m
-- Whitespace and comments
ws :: Monad m => Parser m ()
ws = L.space (void spaceChar) (L.skipLineComment "#") empty
lexeme :: Monad m => Parser m a -> Parser m a
lexeme = L.lexeme ws
symbol :: Monad m => String -> Parser m ()
symbol s = void $ L.symbol ws s
identifier :: Monad m => Parser m String
identifier = lexeme $ do
f <- letterChar
t <- many alphaNumChar
return (f:t)
-- Quoted literals, str: "string..", reg: /regex../
quotedlit :: Monad m => Char -> Parser m String
quotedlit q = lexeme $ between (char q) (char q) $
many $ esc q
<|> esc '\\'
<|> satisfy (/= q)
where
esc c = string ('\\':c:[]) >> return c
str :: Monad m => Parser m B.ByteString
str = T.encodeUtf8 . T.pack <$> quotedlit '"'
reg :: Monad m => [PCREOption] -> Parser m Regex
reg defopt = do
_ <- optional (char 'm') -- m/whatever/; If we're pretending to be perl, might as well go at least this far
s <- quotedlit '/'
opts <- many $ -- /s and /m seem a bit useless on single-line log entries
extended <$ char 'x'
<|> caseless <$ char 'i'
case compileM (T.encodeUtf8 $ T.pack s) (defopt ++ opts) of
Left e -> fail $ "Error in regular expression: " ++ e
-- Studying the regex at this point is somewhat premature - we may not
-- actually be using this regex in our program. However, we also should not
-- defer the study to after the main program has been assembled, because at
-- that point the func/proc expansion may have duplicated the Regex object.
-- Studying should ideally happen after assembling the main program but
-- before func/proc expansion. These actions are currently performed in the
-- wrong order.
Right r -> return (study r)
expr :: Monad m => Parser m Expr
expr = label "expression" $ makeExprParser term
[ [ InfixR (EOp OPow <$ symbol "**") ]
, [ Prefix (foldr1 (.) <$> some (ENot <$ (symbol "!")))
, Prefix (ENeg <$ symbol "-" ) ]
, [ InfixL (EOp OMul <$ (lexeme $ try $ char '*' <* notFollowedBy (char '*')))
, InfixL (EOp ODiv <$ symbol "/" )
, InfixL (EOp OMod <$ symbol "%" ) ]
, [ InfixL (EOp OPlus <$ symbol "+" )
, InfixL (EOp OMinus <$ symbol "-" )
, InfixL (EOp OConcat <$ symbol "." ) ]
, [ InfixN (EOp OLt <$ symbol "lt")
, InfixN (EOp OGt <$ symbol "gt")
, InfixN (EOp OLe <$ symbol "le")
, InfixN (EOp OGe <$ symbol "ge")
, InfixN (EOp OILt <$ (lexeme $ try $ char '<' <* notFollowedBy (char '=')))
, InfixN (EOp OIGt <$ (lexeme $ try $ char '>' <* notFollowedBy (char '=')))
, InfixN (EOp OILe <$ symbol "<=")
, InfixN (EOp OIGe <$ symbol ">=") ]
, [ InfixN (EOp OEq <$ symbol "eq")
, InfixN (EOp ONeq <$ symbol "ne")
, InfixN (EOp OIEq <$ symbol "==")
, InfixN (EOp OINeq <$ symbol "!=")
, Postfix(EMatch <$> (symbol "~" >> reg [no_auto_capture]))
, Postfix((\a b -> ENot (EMatch a b)) <$> (symbol "!~" >> reg [no_auto_capture])) ]
, [ InfixL (EOp OAnd <$ symbol "&&") ]
, [ InfixL (EOp OOr <$ symbol "||") ]
, [ InfixR (try $ between (symbol "?") (symbol ":") expr >>= \m -> return $ (\l r -> EIf l m r)) ]
]
where
term = (try (do { n <- identifier; symbol "("; return n}) >>= func)
<|> EField <$> identifier
<|> ELit . bs <$> str
<|> ELit . num <$> number
<|> between (symbol "(") (symbol ")") expr
number = lexeme $ try L.float
<|> fromIntegral <$> try (char '0' >> char' 'x' >> L.hexadecimal)
<|> fromIntegral <$> try (char '0' >> char' 'o' >> L.octal)
<|> fromIntegral <$> L.decimal
func n
| n == "extract" = fextract
| n == "replace" = freplace
| otherwise = fdefined n
fextract = do
e <- expr
symbol ","
r <- reg []
when (captureCount r < 1) $ fail "Regular expression does not have any subpatterns to extract"
symbol ")"
return $ EExtract e r
freplace = do
e <- expr
symbol ","
r <- reg []
symbol ","
n <- str
symbol ")"
return $ EReplace e r n
fdefined n = do
f' <- M.lookup n . cfgFuncs <$> get
(args, e) <- maybe (fail $ "Unknown function " ++ n) return f'
a <- sepBy expr (symbol ",")
symbol ")"
when (length a /= length args) $ fail
$ "Function " ++ n ++ " expects " ++ show (length args) ++ " arguments, but " ++ show (length a) ++ " were given"
return $ fillargs e $ zip args a
fillargs e a =
let f (EField x) = maybe undefined id $ lookup x a
f x = x
in emap f e
-- List of statements, final semicolon is optional
stmts :: Monad m => Parser m a -> Parser m [Stmt]
stmts end = concat <$> sepEndBy1 stmt (symbol ";") <* end
where
stmt = stmtUse <|> (:[]) <$> (stmtFilter <|> stmtShow <|> stmtGroup <|> stmtRegex <|> stmtSet <|> stmtSort)
stmtUse = do
symbol "use"
n <- identifier
proc <- M.lookup n . cfgProcs <$> get
case proc of
Nothing -> fail $ "No such proc: " ++ n
Just s -> return s
stmtFilter = symbol "filter" >> SFilter <$> expr
stmtShow = symbol "show" >> SShow <$> sepBy1 expr (symbol ",")
stmtGroup = do
symbol "group"
n <- sepBy1 identifier (symbol ",")
return (SGroup n mempty)
stmtRegex = do
symbol "regex"
f <- identifier
r <- reg [no_auto_capture]
return (SRegex f r (namedPatterns r))
stmtSet = do
symbol "set"
f <- identifier
e <- expr
return (SSet f e)
stmtSort = do
symbol "sort"
f <- True <$ symbol "string"
<|> False <$ symbol "number"
e <- expr
o <- True <$ symbol "asc"
<|> False <$ symbol "desc"
symbol "limit"
n <- fromIntegral <$> L.decimal
let st = case (f,o) of
(True, True ) -> SortAscBS mempty
(False, True ) -> SortAscNum mempty
(True, False) -> SortDescBS mempty
(False, False) -> SortDescNum mempty
return (SSort n e st)
runPure :: Parser Identity a -> Config -> String -> String -> Either String a
runPure p c fn code =
case runIdentity $ runParserT (evalStateT p c) fn code of
Left e -> Left $ parseErrorPretty e
Right x -> Right x
runIO :: Parser IO a -> Config -> String -> String -> IO a
runIO p c fn code = runParserT (evalStateT p c) fn code >>= ret
where
ret (Left e) = hPutStr stderr (parseErrorPretty e) >> exitFailure
ret (Right x) = return x
-- Parse a list of statements given on the command line
parseStmts :: Config -> String -> String -> Either String [Stmt]
parseStmts = runPure (ws >> stmts eof)
parseStmtsIO :: Config -> String -> String -> IO [Stmt]
parseStmtsIO = runIO (ws >> stmts eof)
-- Parse a config file
parseFileIO :: Config -> String -> IO Config
parseFileIO cfg' fn' = do
fn <- canonicalizePath fn'
if elem fn (cfgFiles cfg')
then return cfg'
else do
let cfg = cfg' { cfgFiles = fn : cfgFiles cfg' }
readFile fn >>= runIO parser cfg fn
where
parser = between ws eof body >> get
body = many $ proc <|> func <|> include
proc = do
symbol "proc"
n <- identifier
-- Fail as early as possible, so that we have proper error context. Also
-- has the fancy side effect that duplicate procs are detected before any
-- syntax errors further down the road.
st <- cfgProcs <$> get
when (M.member n st) $ fail $ "Duplicate definition of proc " ++ n
symbol "{"
c <- stmts (symbol "}")
modify' $ \conf -> conf {cfgProcs = M.insert n c st}
func = do
symbol "func"
n <- identifier
st <- cfgFuncs <$> get
when (M.member n st) $ fail $ "Duplicate definition of func " ++ n
symbol "("
a <- sepBy identifier (symbol ",")
symbol ")"
e <- expr
symbol ";"
-- Make sure that the expression can only refer to variables declared in the function definition
_ <- emapM (\fe -> do
case fe of
EField fn -> when (notElem fn a) $ fail $ "Unknown variable " ++ fn
_ -> return ()
return fe) e
modify' $ \conf -> conf {cfgFuncs = M.insert n (a,e) st}
include = do
symbol "include"
n <- quotedlit '"'
symbol ";"
conf <- get
conf' <- liftIO $ parseFileIO conf $ takeDirectory fn' </> n
put conf'
|