Logstat/Parse.hs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230

{-# LANGUAGE GADTs #-}
module Logstat.Parse
  ( parseStmts
  , parseStmtsIO
  , parseFileIO
  )
where

import Control.Applicative (empty)
import Control.Monad (void,when)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Identity
import Control.Monad.State
import Data.List (sortOn)
import System.Directory
import System.Exit (exitFailure)
import System.FilePath (takeDirectory,(</>))
import System.IO (hPutStr,stderr)
import Text.Megaparsec
import Text.Megaparsec.Expr
import Text.Regex.PCRE.Light
import qualified Data.ByteString as B
import qualified Data.Map.Strict as M
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Text.Megaparsec.Lexer as L

import Logstat.Types
import Logstat.Value


type Parser m = StateT Config (ParsecT Dec String m)

-- Extracts the named subpatterns from a Regex and sorts them into a list that
-- can be zipped with the match results for quick capture extraction.
namedPatterns :: Regex -> [Maybe Field]
namedPatterns re = fill 0 $ sortOn snd $ captureNames re
  where
  fill _ [] = []
  fill i m@((name,n):xs)
    | i == n    = Just (T.unpack $ T.decodeUtf8 name) : fill (i+1) xs
    | otherwise = Nothing : fill (i+1) m


-- Whitespace and comments
ws :: Monad m => Parser m ()
ws = L.space (void spaceChar) (L.skipLineComment "#") empty

lexeme :: Monad m => Parser m a -> Parser m a
lexeme = L.lexeme ws

symbol :: Monad m => String -> Parser m ()
symbol s = void $ L.symbol ws s

identifier :: Monad m => Parser m String
identifier = lexeme $ do
  f <- letterChar
  t <- many alphaNumChar
  return (f:t)


-- Quoted literals, str: "string..", reg: /regex../
quotedlit :: Monad m => Char -> Parser m String
quotedlit q = lexeme $ between (char q) (char q) $
  many $ esc q
     <|> esc '\\'
     <|> satisfy (/= q)
  where
  esc c = string ('\\':c:[]) >> return c

str :: Monad m => Parser m B.ByteString
str = T.encodeUtf8 . T.pack <$> quotedlit '"'

reg :: Monad m => Parser m Regex
reg = do
  _ <- optional (char 'm') -- m/whatever/; If we're pretending to be perl, might as well go at least this far
  s <- quotedlit '/'
  opts <- many $ -- /s and /m seem a bit useless on single-line log entries
        extended <$ char 'x'
    <|> caseless <$ char 'i'
  case compileM (T.encodeUtf8 $ T.pack s) (no_auto_capture:opts) of
    Left e -> fail $ "Error in regular expression: " ++ e
    Right r -> return r


-- List of statements, final semicolon is optional
stmts :: Monad m => Parser m a -> Parser m [Stmt]
stmts end = concat <$> sepEndBy1 stmt (symbol ";") <* end
  where
  stmt = stmtUse <|> (:[]) <$> (stmtFilter <|> stmtShow <|> stmtGroup <|> stmtRegex <|> stmtSet <|> stmtSort)

  stmtUse = do
    symbol "use"
    n <- identifier
    proc <- M.lookup n . cfgProcs <$> get
    case proc of
      Nothing -> fail $ "No such proc: " ++ n
      Just s -> return s

  stmtFilter = symbol "filter" >> SFilter <$> expr
  stmtShow   = symbol "show"   >> SShow   <$> sepBy1 expr (symbol ",")

  stmtGroup = do
    symbol "group"
    n <- sepBy1 identifier (symbol ",")
    return (SGroup n mempty)

  stmtRegex = do
    symbol "regex"
    f <- identifier
    r <- reg
    return (SRegex f r (namedPatterns r))

  stmtSet = do
    symbol "set"
    f <- identifier
    e <- expr
    return (SSet f e)

  stmtSort = do
    symbol "sort"
    f <- True  <$ symbol "string"
     <|> False <$ symbol "number"
    e <- expr
    o <- True  <$ symbol "asc"
     <|> False <$ symbol "desc"
    symbol "limit"
    n <- fromIntegral <$> L.decimal
    let st = case (f,o) of
               (True,  True ) -> SortAscBS mempty
               (False, True ) -> SortAscNum mempty
               (True,  False) -> SortDescBS mempty
               (False, False) -> SortDescNum mempty
    return (SSort n e st)

  term = EField <$> identifier
     <|> ELit . bs <$> str
     <|> ELit . num <$> number
     <|> between (symbol "(") (symbol ")") expr

  expr = makeExprParser term
     [ [ Prefix (foldr1 (.) <$> some (ENot <$ (symbol "!")))
       , Prefix (ENeg        <$ symbol "-" ) ]
     , [ InfixL (EOp OMul    <$ symbol "*" )
       , InfixL (EOp ODiv    <$ symbol "/" )
       , InfixL (EOp OMod    <$ symbol "%" ) ]
     , [ InfixL (EOp OPlus   <$ symbol "+" )
       , InfixL (EOp OMinus  <$ symbol "-" )
       , InfixL (EOp OConcat <$ symbol "." ) ]
     , [ InfixN (EOp OLt     <$ symbol "lt")
       , InfixN (EOp OGt     <$ symbol "gt")
       , InfixN (EOp OLe     <$ symbol "le")
       , InfixN (EOp OGe     <$ symbol "ge")
       , InfixN (EOp OILt    <$ (lexeme $ try $ char '<' <* notFollowedBy (char '=')))
       , InfixN (EOp OIGt    <$ (lexeme $ try $ char '>' <* notFollowedBy (char '=')))
       , InfixN (EOp OILe    <$ symbol "<=")
       , InfixN (EOp OIGe    <$ symbol ">=") ]
     , [ InfixN (EOp OEq     <$ symbol "eq")
       , InfixN (EOp ONeq    <$ symbol "ne")
       , InfixN (EOp OIEq    <$ symbol "==")
       , InfixN (EOp OINeq   <$ symbol "!=") ]
     , [ InfixL (EOp OAnd    <$ symbol "&&") ]
     , [ InfixL (EOp OOr     <$ symbol "||") ]
     ]

  number = lexeme $ try L.float
       <|> fromIntegral <$> try (char '0' >> char' 'x' >> L.hexadecimal)
       <|> fromIntegral <$> try (char '0' >> char' 'o' >> L.octal)
       <|> fromIntegral <$> L.decimal


runPure :: Parser Identity a -> Config -> String -> String -> Either String a
runPure p c fn code =
  case runIdentity $ runParserT (evalStateT p c) fn code of
    Left e  -> Left $ parseErrorPretty e
    Right x -> Right x


runIO :: Parser IO a -> Config -> String -> String -> IO a
runIO p c fn code = runParserT (evalStateT p c) fn code >>= ret
  where
  ret (Left e)  = hPutStr stderr (parseErrorPretty e) >> exitFailure
  ret (Right x) = return x


-- Parse a list of statements given on the command line
parseStmts :: Config -> String -> String -> Either String [Stmt]
parseStmts = runPure (ws >> stmts eof)

parseStmtsIO :: Config -> String -> String -> IO [Stmt]
parseStmtsIO = runIO (ws >> stmts eof)


-- Parse a config file
parseFileIO :: Config -> String -> IO Config
parseFileIO cfg' fn' = do
  fn <- canonicalizePath fn'
  if elem fn (cfgFiles cfg')
    then return cfg'
    else do
      let cfg = cfg' { cfgFiles = fn : cfgFiles cfg' }
      readFile fn >>= runIO parser cfg fn

  where
  parser = between ws eof body >> get
  body = many $ proc <|> include

  proc = do
    symbol "proc"
    n <- identifier

    -- Fail as early as possible, so that we have proper error context. Also
    -- has the fancy side effect that duplicate procs are detected before any
    -- syntax errors further down the road.
    st <- cfgProcs <$> get
    when (M.member n st) $ fail $ "Duplicate definition of proc " ++ n

    symbol "{"
    c <- stmts (symbol "}")

    modify' $ \conf -> conf {cfgProcs = M.insert n c st}

  include = do
    symbol "include"
    n <- quotedlit '"'
    symbol ";"
    conf <- get
    conf' <- liftIO $ parseFileIO conf $ takeDirectory fn' </> n
    put conf'