summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2020-10-22 10:55:48 +0200
committerYorhel <git@yorhel.nl>2020-10-22 10:55:50 +0200
commit17efa52f4942dc3bc7b419eedf2eb31bee17d496 (patch)
treef8a15cfdb815832ef79cbd198c9dd170986f4323
parent9ea4c004f6a09e9d00121bbef99c117ab066c34c (diff)
Rewrite in C + a bunch of visible changes
Ironically, I find C easier to maintain than Haskell, largely because its build environment and APIs are more stable and more familiar to me. Resulting binary is also a *lot* smaller. Not done any performance measurements yet, algorithmically this new implementation has some really bad worst cases, but it wouldn't matter too much if you never hit them. User-visible improvements: - pre_if now supports braces - variables are no longer lexically scoped - error messages come with context And quite likely many regressions. I'll need to write some more tests.
-rw-r--r--.gitignore2
-rw-r--r--LICENSE2
-rw-r--r--Main.hs537
-rw-r--r--Makefile41
-rw-r--r--README12
-rw-r--r--Setup.hs2
-rw-r--r--nginx-confgen.c1085
-rw-r--r--nginx-confgen.cabal28
-rw-r--r--nginx-confgen.pod26
-rw-r--r--stack.yaml3
-rw-r--r--test/main.conf1
11 files changed, 1149 insertions, 590 deletions
diff --git a/.gitignore b/.gitignore
index 45888f4..44e0329 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
-.stack-work/
*.swp
nginx-confgen
+nginx-confgen.1
diff --git a/LICENSE b/LICENSE
index 4754ff1..40f8465 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2018 Yoran Heling
+Copyright (c) 2018-2020 Yoran Heling
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/Main.hs b/Main.hs
deleted file mode 100644
index 491edc7..0000000
--- a/Main.hs
+++ /dev/null
@@ -1,537 +0,0 @@
-{-# LANGUAGE TupleSections #-}
-module Main where
-
-import Control.Applicative (empty)
-import Control.Exception.Base (throwIO,Exception)
-import Control.Monad (when,void,foldM,join,filterM)
-import qualified Data.Array as A
-import Data.Char (isSpace)
-import Data.HashMap.Strict (HashMap)
-import qualified Data.HashMap.Strict as M
-import Data.Maybe (isJust)
-import Data.Semigroup ((<>))
-import Data.Void
-import Options.Applicative hiding (Parser)
-import System.Directory
-import System.FilePath
-import System.IO (hPutStrLn,stderr)
-import System.IO.Error (tryIOError)
-import System.Process
-import Text.Megaparsec hiding (many,some,option,hidden)
-import Text.Megaparsec.Char
-import qualified Text.Megaparsec.Char.Lexer as L
-import qualified Text.Regex.TDFA as R
-
-
--- Config file AST
-
-type Conf = [Directive]
-
-data Directive
- = Directive String [Arg] (Maybe Conf)
- | BlockRef String -- extension
- deriving Show
-
-data Arg
- = ArgString String -- Retains any quoting and escape sequences
- | ArgArray String -- extension
- | ArgBlock String -- extension
- deriving Show
-
-
-
-type Parser = Parsec Void String
-
--- Name of a directive or variable.
--- I've no clue what the actual nginx parsing rules are.
-identifier :: Parser String
-identifier = (:)
- <$> letterChar
- <*> many (alphaNumChar <|> char '_')
-
-isIdentifier :: String -> Bool
-isIdentifier n = isJust $ parseMaybe identifier n
-
-parser :: Parser Conf
-parser = between ws eof $ many stmt
- where
- -- Whitespace and comments
- ws :: Parser ()
- ws = L.space (void spaceChar) (L.skipLineComment "#") empty
- lexeme :: Parser a -> Parser a
- lexeme = L.lexeme ws
- symbol :: String -> Parser ()
- symbol s = void $ L.symbol ws s
-
- blockvar, arrayvar :: Parser String
- blockvar = lexeme (char '&' >> identifier)
- arrayvar = lexeme (char '@' >> identifier)
-
- arg :: Parser Arg
- arg = ArgArray <$> arrayvar
- <|> ArgBlock <$> blockvar
- <|> ArgString <$> lexeme fstr
- <?> "argument"
- where
- -- My understanding from reading ngx_conf_file.c: Single/double quoted and
- -- unquoted strings all work the same way. Backslash escapes everything,
- -- space is not allowed in unquoted strings. Variable interpolation is
- -- allowed in all cases, but whether they are resolved depends on the
- -- directive. (Which means that "${v}" and "$v" are not equivalent if the
- -- directive does not resolve variables)
- fstr = qstr '"'
- <|> qstr '\''
- <|> join <$> (some (lit (\c -> not (isSpace c) && c /= '{' && c /= ';')))
-
- qstr c = between (char c) (char c) $ do
- s <- join <$> many (lit (/=c))
- return (c:s ++ [c])
-
- lit :: (Char -> Bool) -> Parser String
- lit f = (char '\\' >> anySingle >>= \c -> return $ '\\':[c])
- <|> some (satisfy (\c -> f c && c /= '\\'))
-
- stmt :: Parser Directive
- stmt = blockref <|> directive <?> "directive"
- where
- block :: Parser [Directive]
- block =
- let close = symbol "}" >> (optional $ symbol ";")
- in between (symbol "{") close $ many stmt
-
- directive = Directive
- -- A "proper" directive would start with an identifier, but some modules
- -- (e.g. http_core's 'types' directive) have special syntax. So let's
- -- just allow any unquoted string that doesn't interfere with the rest of
- -- the syntax.
- <$> lexeme (some $ satisfy $ \c -> not (isSpace c) && c /= '\\' && c /= '$' && c /= '{' && c /= '}' && c /= ';')
- <*> many arg
- <*> ( Just <$> block
- <|> Nothing <$ symbol ";")
-
- blockref = BlockRef <$> blockvar <* symbol ";"
-
-
--- Utilify function to selectively unescape some escape sequences
-unescape' :: (Char -> Bool) -> String -> String
-unescape' h ('\\':c:xs) = if h c then c : unescape' h xs else '\\' : c : unescape' h xs
-unescape' h (x:xs) = x : unescape' h xs
-unescape' _ [] = []
-
--- Turns a string argument from the AST into a fully unescaped Haskell string.
--- (Yes, argument strings ought to have their own type to make it clear when
--- we're dealing with quoted argument strings or with plain Haskell strings,
--- but this stringly-typed hack will do for now)
-unescape :: String -> String
-unescape s = unescape' (const True) $ case s of
- '"' :xs -> init xs
- '\'':xs -> init xs
- xs -> xs
-
-
-
-fmt :: Conf -> String
-fmt conf = concatMap (++"\n") $ concatMap dir conf
- where
- indent = map (" "++)
-
- dir :: Directive -> [String]
- dir (Directive n a b) =
- let (suffix, b') = block b
- in [n ++ fmtArgs a ++ suffix] ++ b'
- dir (BlockRef n) = ['&' : n ++ ";"]
-
- block :: Maybe [Directive] -> (String, [String])
- block Nothing = (";", [])
- block (Just l) = (" {", (indent $ concatMap dir l) ++ ["}"]);
-
-fmtArgs :: [Arg] -> String
-fmtArgs = concatMap ((' ':).arg)
- where
- arg (ArgString s) = s
- arg (ArgArray n) = '@' : n
- arg (ArgBlock n) = '&' : n
-
-
-
-
--- TODO: Add source locations to these errors
-data Error
- = MacroNoName
- | MacroNoBlock String
- | UnknownBlockRef String
- | BlockArg String
- | UnknownArray String
- | MacroDefBlock String String
- | MacroDefArray String String
- | MacroDefVar String
- | MacroNotEnoughArgs String
- | MacroTooManyArgs String
- | MacroNoNeedsBlock String
- | MacroNeedsBlock String
- | IncludeArg
- | IncludeNotFound String
- | IncludeParse (ParseErrorBundle String Void)
- | IncludeRecurse
- | SetArg
- | ExecArg
- | WarnArg
- | IfNeedsBlock
- | IfUnknown
- | IfInvalidArg
- | InvalidRegex String
-
-instance Show Error where
- show MacroNoName = "Macro directive missing or invalid name argument, syntax is \"macro name ..args.. { }\""
- show (MacroNoBlock n) = "Macro '"++n++"' has no block argument, syntax is \"macro name ..args.. { }\""
- show (UnknownBlockRef n) = "Reference to unknown block variable '&"++n++"'"
- show (BlockArg n) = "Block variable '&"++n++"' may not be used as argument to a directive"
- show (UnknownArray n) = "Reference to unknown variable '&'"++n++"'"
- show (MacroDefBlock n a) = "Block argument '&"++a++"' must be the last in macro definition of '"++n++"'"
- show (MacroDefArray n a) = "Array argument '@"++a++"' must be last or before block argument in macro definition of '"++n++"'"
- show (MacroDefVar n) = "Arguments to macro definition of '"++n++"' can only contain variables"
- show (MacroNotEnoughArgs n) = "Not enough arguments given to macro '"++n++"'"
- show (MacroTooManyArgs n) = "Too many arguments given to macro '"++n++"'"
- show (MacroNoNeedsBlock n) = "Macro '"++n++"' does not accept a block argument"
- show (MacroNeedsBlock n) = "Macro '"++n++"' requires a block argument, but none given"
- show IncludeArg = "Invalid argument(s) to 'pre_include'"
- show (IncludeNotFound f) = "Can't find include file '"++f++"'"
- show (IncludeParse e) = errorBundlePretty e
- show IncludeRecurse = "Recursion depth exceeded with 'pre_include'"
- show SetArg = "Invalid argument(s) to 'pre_set'"
- show ExecArg = "Invalid argument(s) to 'pre_exec'"
- show WarnArg = "Invalid argument(s) to 'pre_warn'"
- show IfNeedsBlock = "'pre_if' directive requires a block argument"
- show IfUnknown = "Unknown argument or operator in 'pre_if' directive"
- show IfInvalidArg = "Invalid &block or @array argument to 'pre_if'"
- show (InvalidRegex s) = "Invalid regular expression in pre_if: '"++s++"'"
-
-instance Exception Error
-
-
-
-
-data Macro = Macro
- { mName :: String
- , mState :: PState -- the state in which this macro was defined
- , mScalar :: [String]
- , mArray :: Maybe String
- , mBlock :: Maybe String
- , mCode :: Conf
- }
-
-
-macroDef :: PState -> String -> [Arg] -> Conf -> IO PState
-macroDef st name arg code = do
- when (not $ isIdentifier name) $ throwIO MacroNoName
- case reverse arg of
- (ArgBlock b):(ArgArray a):xs -> m xs (Just a) (Just b)
- (ArgBlock b):xs -> m xs Nothing (Just b)
- (ArgArray a):xs -> m xs (Just a) Nothing
- xs -> m xs Nothing Nothing
- where
- f (ArgBlock v) = throwIO (MacroDefBlock name v)
- f (ArgArray v) = throwIO (MacroDefArray name v)
- f (ArgString ('$':v)) = if isIdentifier v then return v else throwIO (MacroDefVar name)
- f _ = throwIO (MacroDefVar name)
- m rscalars a block = do
- scalars <- mapM f $ reverse rscalars
- let macro = Macro name st scalars a block code
- return (st { stMacros = M.insert name macro (stMacros st) })
-
-
-macroExpand :: Macro -> [Arg] -> Maybe Conf -> IO [Directive]
-macroExpand m iargs iblock = do
- (args, leftover) <- genargs mempty (mScalar m) iargs
- arr <- case (leftover, mArray m) of
- (a, Just b) -> return [(b, a)]
- ([], Nothing) -> return []
- _ -> throwIO (MacroTooManyArgs (mName m))
- block <- case (iblock, mBlock m) of
- (Just a, Just b) -> return [(b, a)]
- (Nothing, Nothing) -> return []
- (Just _, Nothing) -> throwIO (MacroNoNeedsBlock (mName m))
- (Nothing, Just _) -> throwIO (MacroNeedsBlock (mName m))
- let nst = (mState m) { stArgs = args, stArray = arr, stBlock = block }
- procConf' nst (mCode m)
- where
- genargs :: HashMap String String -> [String] -> [Arg] -> IO (HashMap String String, [Arg])
- -- All arguments are ArgString, enforced by 'interp'
- genargs vars (n:ns) (ArgString a:as) = genargs (M.insert n a vars) ns as
- genargs vars [] as = return (vars, as)
- genargs _ _ _ = throwIO (MacroNotEnoughArgs (mName m))
-
-
-
-
--- [Arg] should not have been interpolated yet, otherwise rmParen may remove
--- parenthesis from variables.
--- Conf should also not have been interpolated
-ifExpand :: PState -> [Arg] -> Conf -> IO Conf
-ifExpand st arg conf = do
- args <- mapM validateArg arg
- (st', ok) <- case args of
-
- -- Single argument, test if true/false
- [v] -> do
- v' <- interpArg v
- return $ (st, v' /= "" && v' /= "0")
-
- -- Equality/inequality
- [a, "=" , b] -> (st,) <$> ((==) <$> interpArg a <*> interpArg b)
- [a, "!=", b] -> (st,) <$> ((/=) <$> interpArg a <*> interpArg b)
-
- -- Regex
- [a, "~" , b] -> regex a b True
- [a, "~*" , b] -> regex a b False
- [a, "!~" , b] -> (fmap . fmap) not $ regex a b True
- [a, "!~*", b] -> (fmap . fmap) not $ regex a b False
-
- ---- File tests
- [ "-f", a] -> (st,) <$> ( interpArg a >>= doesFileExist)
- ["!-f", a] -> (st,) <$> (not <$> (interpArg a >>= doesFileExist))
- [ "-d", a] -> (st,) <$> ( interpArg a >>= doesDirectoryExist)
- ["!-d", a] -> (st,) <$> (not <$> (interpArg a >>= doesDirectoryExist))
- [ "-e", a] -> (st,) <$> ( interpArg a >>= doesPathExist)
- ["!-e", a] -> (st,) <$> (not <$> (interpArg a >>= doesPathExist))
- [ "-x", a] -> (st,) <$> ( interpArg a >>= doesExecutableExist)
- ["!-x", a] -> (st,) <$> (not <$> (interpArg a >>= doesExecutableExist))
-
- -- Dunno
- _ -> throwIO IfUnknown
-
- if ok
- then procConf' st' conf
- else return []
-
- where
- -- All arguments must be fully evaluated ArgStrings
- validateArg :: Arg -> IO String
- validateArg (ArgString l) = return l
- validateArg _ = throwIO IfInvalidArg
-
- -- Performs variable substitution and flattens the result
- -- TODO: Throw error if a variable could not be substituted.
- interpArg :: String -> IO String
- interpArg a = do
- [ArgString a'] <- procArg st [ArgString a]
- return $ unescape a'
-
- -- System.Directory is missing this check
- doesExecutableExist :: FilePath -> IO Bool
- doesExecutableExist p = either (const False) executable <$> tryIOError (getPermissions p)
-
- -- Regex matching
- regex :: String -> String -> Bool -> IO (PState, Bool)
- regex a' b' caseSen = do
- a <- interpArg a'
- b <- interpArg b'
- -- 'Either String' does not implement fail, but 'Maybe' does. Go figure.
- reg <- case R.makeRegexOptsM (R.defaultCompOpt { R.caseSensitive = caseSen }) R.defaultExecOpt b of
- Nothing -> throwIO (InvalidRegex b)
- Just r -> return r
- case R.matchOnceText reg a of
- Nothing -> return (st, False)
- Just (_, res, _) ->
- let nargs = foldr (\(n,(s,_)) i -> M.insert (show n) s i)
- (stArgs st)
- (A.assocs res)
- st' = st { stArgs = nargs }
- in return (st', True)
-
-
-
-
-includeExpand :: PState -> Int -> String -> IO (PState, [Directive])
-includeExpand st n fn = do
- fns <- filterM doesFileExist $ map (</>fn) $ stIncDir st
- f <- case fns of
- [] -> throwIO (IncludeNotFound fn)
- f:_ -> return f
- contents <- readFile f
- ast <- case parse parser fn contents of
- Left e -> throwIO (IncludeParse e)
- Right r -> return r
- (nst, conf) <- procConf (st { stIncludes = n-1 }) ast
- return (nst { stIncludes = n+1 }, conf)
-
-
-
-data PState = PState
- { stIncDir :: [String]
- , stVars :: HashMap String String
- , stMacros :: HashMap String Macro
- , stArgs :: HashMap String String -- shadows stVars
- -- Max 1, but Prelude's 'lookup' isn't generic to work on Maybe, so this'll do
- , stArray :: [(String, [Arg])]
- , stBlock :: [(String, Conf)]
- , stIncludes :: Int
- }
-
-
-procArg :: PState -> [Arg] -> IO [Arg]
-procArg st gargs = join <$> mapM interp gargs
- where
- interp a = case a of
- ArgBlock n -> throwIO (BlockArg n)
- ArgArray n ->
- case lookup n (stArray st) of
- Nothing -> throwIO (UnknownArray n)
- Just l -> return l -- No interpolation necessary I think, should have been processed at call site
- ArgString l -> return [ArgString $ interps l]
-
- -- Interpolated variables will be converted to the quoting style of the containing string.
- -- E.g.
- -- a = "abc{"; b = "x\yz"; c = "$something"
- -- $a -> abc\{
- -- $a$b; -> abc\{xyz
- -- "$a$b"; -> "abc{xyz"
- -- $c$a -> $somethingabc\{ <- Or should this be ${something}abc\{? Current solution is simpler, but not sure what the expected behavior is here.
-
- doubleEsc = (=='"')
- singleEsc = (=='\'')
- plainEsc c = c == '{' || c == ';' || isSpace c
-
- escFunc ('"' :_) = doubleEsc
- escFunc ('\'':_) = singleEsc
- escFunc _ = plainEsc
-
- quote :: (Char -> Bool) -> String -> String
- quote f = concatMap (\c -> if f c then ['\\', c] else [c])
-
- -- Note that the config parser already ensures that quoted strings have an
- -- end quote, so the use of 'init' here is safe.
- unquote s = case s of
- '"' :xs -> unescape' doubleEsc (init xs)
- '\'':xs -> unescape' singleEsc (init xs)
- xs -> unescape' plainEsc xs
-
- interps :: String -> String
- interps s = case parse (rep (escFunc s)) s s of
- Left e -> error ("Variable interpolation failed to parse: " ++ errorBundlePretty e)
- Right s' -> join s'
- where
- scalarname = identifier <|> some digitChar
- scalarvar = char '$'
- >> between (char '{') (char '}') scalarname
- <|> scalarname
- var f = scalarvar >>= \n ->
- case (M.lookup n (stArgs st), M.lookup n (stVars st)) of
- (Just v, _) -> return $ quote f $ unquote v
- (_, Just v) -> return $ quote f $ unquote v
- _ -> fail "unknown variable"
- rep f = many (string "\\$" <|> try (var f) <|> ((:[]) <$> anySingle))
-
-
-
-
-procConf' :: PState -> [Directive] -> IO [Directive]
-procConf' st c = snd <$> procConf st c
-
-procConf :: PState -> [Directive] -> IO (PState, [Directive])
-procConf gst gconf = foldM p (gst, []) gconf
- where
- p (st, a) i = (fmap . fmap) (a++) (stmt st i)
-
- stmt :: PState -> Directive -> IO (PState, [Directive])
-
- stmt st (BlockRef n) =
- case lookup n (stBlock st) of
- Nothing -> throwIO (UnknownBlockRef n)
- Just b -> return (st, b) -- All further processing on b should already have been done at call site
-
- stmt st (Directive "macro" a b) =
- case (a,b) of
- (ArgString n:_ , Nothing) -> throwIO (MacroNoBlock n)
- (ArgString n:a', Just b') -> (,[]) <$> macroDef st n a' b'
- (_ , _ ) -> throwIO MacroNoName
-
- stmt st (Directive "pre_include" a b) =
- case (stIncludes st, a, b) of
- (0, _, _) -> throwIO IncludeRecurse
- (i, [ArgString n], Nothing) -> includeExpand st i $ unescape n
- _ -> throwIO IncludeArg
-
- stmt st (Directive "pre_set" a b) =
- case (a, b) of
- (ArgString ('$':n):arg:[], Nothing) -> do
- [ArgString arg'] <- procArg st [arg]
- return (st { stVars = M.insert n arg' (stVars st) }, [])
- _ -> throwIO SetArg
-
- stmt st (Directive "pre_warn" a b) =
- (st, []) <$ case (a, b) of
- (arg, Nothing) -> do
- msg <- fmtArgs <$> procArg st arg
- hPutStrLn stderr ("[warn]" ++ msg)
- _ -> throwIO WarnArg
-
- stmt st (Directive "pre_exec" a b) =
- case (a, b) of
- (ArgString ('$':n):arg:[], Nothing) -> do
- -- TODO: Throw error if a variable could not be substituted? That would
- -- demand that all shell variables are escaped properly, which in turn
- -- may prevent some surprises.
- [ArgString cmd] <- procArg st [arg]
- -- This will throw an IOError on failure, which is good enough
- ret <- readCreateProcess (shell $ unescape cmd) ""
- -- Remove final newline
- let ret' = if not (null ret) && last ret == '\n' then init ret else ret
- return (st { stVars = M.insert n ret' (stVars st) }, [])
- _ -> throwIO ExecArg
-
- stmt st (Directive "pre_if" a b) =
- (st,) <$> case b of
- Nothing -> throwIO IfNeedsBlock
- Just b' -> ifExpand st a b'
-
- stmt st (Directive name a b) = (st,) <$> do
- a' <- procArg st a
- b' <- mapM (procConf' st) b
- case M.lookup name (stMacros st) of
- Just macro -> macroExpand macro a' b'
- Nothing -> return [Directive name a' b']
-
-
-
-
-
-data Options = Options
- { optVersion :: Bool
- , optInput :: String
- , optOutput :: String
- , optInclude :: [String]
- }
-
-main :: IO ()
-main = do
- o <- execParser opts
- if optVersion o
- -- I could use cabal's Paths_* module here to get the version from the
- -- cabal file, but unfortunately that also puts all my build paths in the
- -- generated binary.
- then putStrLn "nginx-confgen 1.2"
- else prog o
-
- where
- opts = info (optparse <**> helper) fullDesc
- optparse = Options
- <$> switch (short 'V' <> long "version" <> hidden <> help "Show program version")
- <*> strOption (short 'i' <> metavar "FILE" <> value "-" <> showDefault <> help "Input file")
- <*> strOption (short 'o' <> metavar "FILE" <> value "-" <> showDefault <> help "Output file")
- <*> many (strOption (short 'I' <> metavar "DIR" <> help "Add search path for pre_include directives"))
-
- prog o = do
- let inc = if null (optInclude o) then ["."] else optInclude o
-
- input <- if optInput o == "-"
- then getContents
- else readFile (optInput o)
-
- case parse parser (optInput o) input of
- Left e -> hPutStrLn stderr $ errorBundlePretty e
- Right r -> do
- output <- fmt <$> procConf' (PState inc mempty mempty mempty mempty mempty 15) r
- if optOutput o == "-"
- then putStr output
- else writeFile (optOutput o) output
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..557b75f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,41 @@
+CC ?= cc
+CFLAGS ?= -Wall -O2 -g
+PREFIX ?= /usr/local
+BINDIR ?= ${PREFIX}/bin
+MANDIR ?= ${PREFIX}/share/man/man1
+
+NGCFG_VERSION=2.0
+
+all: bin doc
+bin: nginx-confgen
+doc: nginx-confgen.1
+
+nginx-confgen: nginx-confgen.c Makefile
+ ${CC} ${CFLAGS} -DNGCFG_VERSION='"${NGCFG_VERSION}"' $< -o $@
+
+nginx-confgen.1: nginx-confgen.pod Makefile
+ pod2man --center "nginx-confgen manual" --release nginx-confgen-${NGCFG_VERSION} $< >$@
+
+clean:
+ rm -f nginx-confgen nginx-confgen.1
+
+distclean: clean
+
+install: install-bin install-doc
+
+install-bin: bin
+ mkdir -p ${BINDIR}
+ install -m0755 nginx-confgen ${BINDIR}/
+
+install-doc: doc
+ mkdir -p ${MANDIR}
+ install -m0644 nginx-confgen.1 ${MANDIR}/
+
+uninstall: uninstall-bin uninstall-doc
+
+# XXX: Ideally, these would also remove the directories created by 'install' if they are empty.
+uninstall-bin:
+ rm -f ${BINDIR}/nginx-confgen
+
+uninstall-doc:
+ rm -f ${MANDIR}/nginx-confgen.1
diff --git a/README b/README
index 5cf12fe..6ecb677 100644
--- a/README
+++ b/README
@@ -8,11 +8,15 @@ DESCRIPTION
BUILD
- Install Haskell Stack: https://haskellstack.org/
- Then run (in this git repo):
+ Just type 'make'.
- stack install
+ That will also create the man page, which will require pod2man. If you just
+ want build the binary, type 'make bin'.
+
+INSTALL
+
+ make install PREFIX=/usr
USAGE
- See nginx-confgen.pod
+ See the man page.
diff --git a/Setup.hs b/Setup.hs
deleted file mode 100644
index 9a994af..0000000
--- a/Setup.hs
+++ /dev/null
@@ -1,2 +0,0 @@
-import Distribution.Simple
-main = defaultMain
diff --git a/nginx-confgen.c b/nginx-confgen.c
new file mode 100644
index 0000000..533e596
--- /dev/null
+++ b/nginx-confgen.c
@@ -0,0 +1,1085 @@
+/* Copyright (c) 2020 Yoran Heling
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#define _GNU_SOURCE
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <spawn.h>
+#include <regex.h>
+#include <getopt.h>
+
+
+/* TODO: There's no need for random access while processing a file. The
+ * parsing/processing/formatting steps are currently separated, but could
+ * really be done in a single step in a streaming fashion as well. That may or
+ * may not simplify the code, I haven't really investigated yet.
+ * (Output will have to be buffered either way, we wouldn't want to write
+ * anything if there's an error during processing)
+ *
+ * Also, the current implementation leaks memory everywhere.
+ * That's by design.
+ * Kind of.
+ */
+
+
+struct ctx {
+ char *fn;
+ int line, offset;
+ struct ctx *parent;
+};
+
+
+struct cfg_arg {
+ struct ctx ctx;
+ char *data;
+ struct cfg_arg *next;
+};
+
+struct cfg_directive {
+ struct ctx ctx;
+ char *name;
+ struct cfg_arg *args;
+ struct cfg_directive *body, *next;
+};
+static const struct cfg_directive _empty_body = {};
+static struct cfg_directive *empty_body = (struct cfg_directive *)&_empty_body;
+
+
+#define isAlpha(c) (((unsigned)(c)|32)-'a' < 26)
+#define isNum(c) (((unsigned)(c))-'0' < 10)
+
+
+#ifdef __GNUC__
+__attribute__((noreturn, format(printf, 2, 3)))
+#endif
+static void die(const struct ctx *ctx, const char *fmt, ...) {
+ va_list arg;
+ va_start(arg, fmt);
+ vfprintf(stderr, fmt, arg);
+ va_end(arg);
+ fprintf(stderr, "\n in %s", ctx->fn);
+ if(ctx->line)
+ fprintf(stderr, " line %d:%d", ctx->line+1, ctx->offset+1);
+ putc('\n', stderr);
+
+ while((ctx = ctx->parent))
+ fprintf(stderr, " included by %s line %d:%d\n", ctx->fn, ctx->line, ctx->offset);
+ exit(1);
+}
+
+
+/* Read a file descriptor to EOF. Buffer will be zero-terminated. */
+static size_t slurp_fd(int fd, char **buf) {
+ size_t bufsize = 4096, buflen = 0;
+ ssize_t r;
+ *buf = malloc(bufsize);
+ while((r = read(fd, *buf+buflen, bufsize-buflen-1)) > 0) {
+ buflen += r;
+ if(bufsize-buflen < 4096) {
+ bufsize *= 2;
+ *buf = realloc(*buf, bufsize);
+ }
+ }
+ if(r < 0) {
+ free(*buf);
+ *buf = NULL;
+ return -1;
+ }
+ (*buf)[buflen] = 0;
+ return buflen;
+}
+
+
+static struct cfg_arg *cfg_arg_copy(struct cfg_arg *a) {
+ struct cfg_arg *head = NULL, **t = &head;
+ while(a) {
+ *t = malloc(sizeof(struct cfg_arg));
+ memcpy(*t, a, sizeof(struct cfg_arg));
+ (*t)->data = strdup(a->data);
+ t = &(*t)->next;
+ a = a->next;
+ }
+ return head;
+}
+
+
+static struct cfg_directive *cfg_directive_copy(struct cfg_directive *d) {
+ struct cfg_directive *head = NULL, **t = &head;
+ if(d == empty_body)
+ return d;
+ while(d) {
+ *t = malloc(sizeof(struct cfg_directive));
+ memcpy(*t, d, sizeof(struct cfg_directive));
+ (*t)->name = strdup(d->name);
+ (*t)->args = cfg_arg_copy(d->args);
+ (*t)->body = cfg_directive_copy(d->body);
+ t = &(*t)->next;
+ d = d->next;
+ }
+ return head;
+}
+
+
+
+
+/***********
+ * PARSING
+ */
+
+struct parse_ctx {
+ struct ctx *ctx;
+ size_t pos, len;
+ char *buf, c;
+};
+
+
+static char parse_take(struct parse_ctx *ctx) {
+ char c = ctx->c;
+ if(c == 0)
+ die(ctx->ctx, "Invalid 0 byte");
+ if(c == '\n') {
+ ctx->ctx->line++;
+ ctx->ctx->offset = 0;
+ } else
+ ctx->ctx->offset++;
+ ctx->pos++;
+ ctx->c = ctx->buf[ctx->pos];
+ return c;
+}
+
+
+// Consumes any number of whirespace characters and comments.
+static void parse_ws(struct parse_ctx *ctx) {
+ while(1) {
+ switch(ctx->c) {
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ parse_take(ctx);
+ break;
+ case '#':
+ while(ctx->c != 0 && ctx->c != '\n')
+ parse_take(ctx);
+ break;
+ default:
+ return;
+ }
+ }
+}
+
+
+static void parse_unqarg(struct parse_ctx *ctx) {
+ while(1) {
+ switch(ctx->c) {
+ case '\\':
+ parse_take(ctx);
+ if(ctx->c == 0)
+ die(ctx->ctx, "Unexpected EOF");
+ break;
+ case 0:
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ case '{':
+ case ';':
+ return;
+ }
+ parse_take(ctx);
+ }
+}
+
+
+static void parse_qarg(struct parse_ctx *ctx) {
+ char c, q = parse_take(ctx);
+ while(1) {
+ if(ctx->c == 0)
+ die(ctx->ctx, "Unexpected EOF");
+ c = parse_take(ctx);
+ if(c == q)
+ return;
+ if(c == '\\') {
+ if(ctx->c == 0)
+ die(ctx->ctx, "Unexpected EOF");
+ parse_take(ctx);
+ }
+ }
+}
+
+
+/* A "proper" directive would start with an identifier, but some modules
+ (e.g. http_core's 'types' directive) have special syntax. So let's just allow
+ any unquoted string that doesn't interfere with the rest of the syntax. */
+static char *parse_directive_name(struct parse_ctx *ctx) {
+ size_t start = ctx->pos;
+ char *str;
+ while(1) {
+ switch(ctx->c) {
+ case 0:
+ die(ctx->ctx, "Unexpected EOF");
+ case '}':
+ case '\\':
+ case '$':
+ case '@':
+ die(ctx->ctx, "Unexpected character in directive name: '%c'", ctx->c);
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ case '#':
+ case ';':
+ case '{':
+ str = malloc(ctx->pos-start+1);
+ memcpy(str, ctx->buf+start, ctx->pos-start);
+ str[ctx->pos-start] = 0;
+ return str;
+ default:
+ parse_take(ctx);
+ }
+ }
+ return NULL;
+}
+
+
+static struct cfg_directive *parse_block(struct parse_ctx *ctx) {
+ struct cfg_directive *head = NULL, *cur = NULL, *tmp;
+ struct cfg_arg **arg;
+ size_t arg_start;
+
+directive_start:
+ parse_ws(ctx);
+ if(ctx->c == 0 || ctx->c == '}')
+ return head ? head : empty_body;
+
+ tmp = calloc(1, sizeof(struct cfg_directive));
+ *(head ? &cur->next : &head) = tmp;
+ cur = tmp;
+
+ cur->name = parse_directive_name(ctx);
+ cur->ctx = *ctx->ctx;
+ arg = &cur->args;
+
+directive_arg:
+ parse_ws(ctx);
+ switch(ctx->c) {
+ case 0:
+ die(ctx->ctx, "Unexpected EOF");
+ case ';':
+ parse_take(ctx);
+ goto directive_start;
+ case '{':
+ parse_take(ctx);
+ cur->body = parse_block(ctx);
+ if(ctx->c != '}')
+ die(ctx->ctx, "Unexpected EOF");
+ parse_take(ctx);
+ goto directive_start;
+ default:
+ *arg = calloc(1, sizeof(struct cfg_arg));
+ (*arg)->ctx = *ctx->ctx;
+
+ arg_start = ctx->pos;
+ (ctx->c == '"' || ctx->c == '\'' ? parse_qarg : parse_unqarg)(ctx);
+ (*arg)->data = malloc(ctx->pos-arg_start+1);
+ memcpy((*arg)->data, ctx->buf+arg_start, ctx->pos-arg_start);
+ (*arg)->data[ctx->pos-arg_start] = 0;
+
+ arg = &(*arg)->next;
+ goto directive_arg;
+ }
+
+ return head; /* unreachable */
+}
+
+
+static struct cfg_directive *parse_file(struct ctx *ctx) {
+ size_t len;
+ char *buf;
+ int fd = strcmp(ctx->fn, "-") == 0 ? 0 : open(ctx->fn, 0);
+ if(fd < 0)
+ return NULL;
+
+ len = slurp_fd(fd, &buf);
+ if(len < 0)
+ return NULL;
+ close(fd);
+
+ struct parse_ctx pc = { ctx, 0, len, buf, *buf };
+ struct cfg_directive *ret = parse_block(&pc);
+ if(pc.c == '}')
+ die(pc.ctx, "Unexpected '}'");
+ free(buf);
+ return ret;
+}
+
+
+
+
+/**************
+ * FORMATTING
+ */
+
+void write_str(FILE *f, const char *s) {
+ if(fputs(s, f) < 0) {
+ fprintf(stderr, "Error writing to output: %s", strerror(errno));
+ exit(1);
+ }
+}
+
+void write_indent(FILE *f, int lvl) {
+ int i;
+ for(i=0; i<lvl; i++)
+ write_str(f, " ");
+}
+
+
+void write_directive(FILE *f, int lvl, struct cfg_directive *d) {
+ struct cfg_arg *arg = d->args;
+ struct cfg_directive *body = d->body;
+
+ if(d == empty_body)
+ return;
+ write_indent(f, lvl);
+ write_str(f, d->name);
+ for(; arg; arg=arg->next) {
+ write_str(f, " ");
+ write_str(f, arg->data);
+ }
+
+ if(body) {
+ write_str(f, " {\n");
+ for(; body && body != empty_body; body=body->next)
+ write_directive(f, lvl+1, body);
+ write_indent(f, lvl);
+ write_str(f, "}\n");
+ } else
+ write_str(f, ";\n");
+}
+
+
+
+
+/**************
+ * PROCESSING
+ */
+
+struct proc_macro;
+
+struct proc_data {
+ /* Array of macros */
+ size_t macrolen, macrosize;
+ struct proc_macro **macros;
+ /* Array of variables, each key/value pair is encoded as "key\0value\0"; values are already expanded and unquoted */
+ size_t varlen, varsize;
+ char **vars;
+};
+
+struct proc_macro {
+ char *name;
+ char **vars; /* Names of positional scalar arguments */
+ char *array; /* Name of the @array argument */
+ char *block; /* Name of the &block argument */
+ struct proc_data data[1]; /* Variables and macros that were available when this macro was defined */
+ struct cfg_directive *body;
+};
+
+struct proc_ctx {
+ struct proc_data data[1];
+ char **search_path;
+ /* When expanding a macro: */
+ struct proc_macro *macro;
+ struct cfg_arg *macro_array;
+ struct cfg_directive *macro_block;
+};
+
+
+/* Turns a quoted argument into an unquoted string. Returns a newly allocated string. */
+static char *str_unquote(const char *str) {
+ char quote = *str == '\'' || *str == '"' ? *(str++) : 0;
+ char *ret = malloc(strlen(str)+1); /* Unquoted string will never be larger than the quoted version */
+ char *cur = ret;
+ while(*str != quote) {
+ if(*str == '\\')
+ str++;
+ *(cur++) = *(str++);
+ }
+ *cur = 0;
+ return ret;
+}
+
+
+/* Turns an unquoted string into a quoted argument using the given quoting style (", ' or 0). Returns a newly allocated string. Does not actually include the quotes. */
+static char *str_quote(char quote, const char *strstart) {
+ const char *str = strstart;
+ char *ret = malloc(strlen(str)*2+1); /* Overly conservative, but w/e */
+ char *cur = ret;
+ while(*str) {
+ if(*str == quote || (quote == 0 &&
+ (*str == ' ' || *str == '\t' || *str == '\r' || *str == '\n' || *str == '\\' || (strstart == str && (*str == '"' || *str == '\'')))))
+ *(cur++) = '\\';
+ *(cur++) = *(str++);
+ }
+ *cur = 0;
+ return ret;
+}
+
+
+/* Returns the length of the variable name starting at str, or 0 if there's no valid variable name */
+static size_t str_varname(const char *str) {
+ size_t len = 1;
+ if(!isAlpha(*str) && !isNum(*str))
+ return 0;
+ while(isAlpha(str[len]) || isNum(str[len]) || str[len] == '_')
+ len++;
+ return len;
+}
+
+
+/* Makes a shallow copy of a proc_data struct. Copies the arrays but not the values, as these can be re-used accross multiple contexts */
+static struct proc_data proc_data_copy(struct proc_data d) {
+ struct proc_data r = d;
+ r.vars = malloc(sizeof(*r.vars)*r.varsize);
+ memcpy(r.vars, d.vars, sizeof(*r.vars)*r.varlen);
+ r.macros = malloc(sizeof(*r.macros)*r.macrosize);
+ memcpy(r.macros, d.macros, sizeof(*r.macros)*r.macrolen);
+ return r;
+}
+
+
+/* Lookup a variable in the current context, returns NULL if not found. */
+static char *proc_var_get(struct proc_ctx *ctx, const char *varname, size_t varlen) {
+ size_t i;
+ if(varlen == 0)
+ return NULL;
+ for(i=0; i<ctx->data->varlen; i++)
+ if(strlen(ctx->data->vars[i]) == varlen && memcmp(ctx->data->vars[i], varname, varlen) == 0)
+ return ctx->data->vars[i]+varlen+1;
+ return NULL;
+}
+
+
+static void proc_var_set(struct proc_ctx *ctx, const char *name, const char *val) {
+ size_t i;
+ char *buf = malloc(strlen(name)+strlen(val)+2);
+ strcpy(buf, name);
+ strcpy(buf+strlen(name)+1, val);
+ for(i=0; i<ctx->data->varlen; i++)
+ if(strcmp(name, ctx->data->vars[i]) == 0) {
+ /* Do not free the old buffer here, it may still be referenced by macros */
+ ctx->data->vars[i] = buf;
+ return;
+ }
+ if(ctx->data->varlen == ctx->data->varsize) {
+ ctx->data->varsize *= 2;
+ ctx->data->vars = realloc(ctx->data->vars, ctx->data->varsize*sizeof(*ctx->data->vars));
+ }
+ ctx->data->vars[ctx->data->varlen++] = buf;
+}
+
+
+/* Substitute variables in a string. Returns a newly allocated string.
+ *
+ * Interpolated variables will be converted to the quoting style of the containing string.
+ * E.g.
+ * a = "abc{"; b = "x\yz"; c = "$something"
+ * $a -> abc\{
+ * $a$b; -> abc\{xyz
+ * "$a$b"; -> "abc{xyz"
+ * $c$a -> $somethingabc\{ <- Or should this be ${something}abc\{? Current solution is simpler, but not sure what the expected behavior is here.
+ */
+static char *proc_subst_vars(struct proc_ctx *ctx, const char *str) {
+ size_t len = 0, size = strlen(str)+1, varlen;
+ char *var, *cur, *ret = malloc(size);
+ char quote = *str == '\'' || *str == '"' ? *str : 0;
+
+#define addc(c) do { if(len == size) { size *= 2; ret = realloc(ret, size); } ret[len++] = c; } while(0)
+
+ while(*str) {
+ if(*str == '\\') {
+ addc(*(str++));
+ addc(*(str++));
+ continue;
+ }
+ if(*str == '$') {
+ varlen = str_varname(str + 1 + (*(str+1) == '{'));
+ if((var = proc_var_get(ctx, str + 1 + (*(str+1) == '{'), varlen)) != NULL) {
+ cur = var = str_quote(quote, var);
+ while(*cur)
+ addc(*(cur++));
+ free(var);
+ str++;
+ if(*str == '{')
+ str++;
+ str += varlen;
+ if(*str == '}')
+ str++;
+ continue;
+ }
+ }
+ addc(*(str++));
+ }
+ addc(0);
+
+#undef addc
+ return ret;
+}
+
+
+static void proc_args(struct proc_ctx *ctx, struct cfg_arg **arg) {
+ struct cfg_arg **a = arg, *next;
+ char *tmp;
+ while(*a) {
+ if(ctx->macro && ctx->macro->array && *(*a)->data == '@' && strcmp((*a)->data+1, ctx->macro->array) == 0) {
+ next = (*a)->next;
+ free((*a)->data);
+ free(*a);
+ *a = cfg_arg_copy(ctx->macro_array);
+ while((*a)->next)
+ a = &(*a)->next;
+ (*a)->next = next;
+ a = &(*a)->next;
+ } else {
+ tmp = proc_subst_vars(ctx, (*a)->data);
+ free((*a)->data);
+ (*a)->data = tmp;
+ a = &(*a)->next;
+ }
+ }
+}
+
+
+static void proc_block(struct proc_ctx *, struct cfg_directive **);
+
+
+static void proc_include(struct proc_ctx *ctx, struct cfg_directive **block) {
+ struct ctx parser = {};
+ char *arg, **inc = ctx->search_path;
+ struct cfg_directive *c = *block, *new;
+ size_t l;
+
+ if(!c->args)
+ die(&c->ctx, "'pre_include' needs a filename argument");
+ if(c->args->next)
+ die(&c->ctx, "Too many arguments to 'pre_include'");
+
+ arg = str_unquote(proc_subst_vars(ctx, c->args->data));
+ if(strcmp(arg, "-") == 0)
+ die(&c->ctx, "Can't include files from standard input");
+ parser.parent = &c->ctx;
+
+ parser.fn = arg;
+ new = parse_file(&parser);
+ while(*arg != '/' && !new && inc && *inc) {
+ l = strlen(*inc) + strlen(arg) + 2;
+ parser.fn = malloc(l);
+ snprintf(parser.fn, l, "%s/%s", *inc, arg);
+ new = parse_file(&parser);
+ if(!new)
+ free(parser.fn);
+ inc++;
+ }
+
+ if(!new || new == empty_body)
+ *block = c->next;
+ else {
+ *block = new;
+ while(new && new->next)
+ new = new->next;
+ new->next = c->next;
+ }
+}
+
+
+static void proc_set(struct proc_ctx *ctx, struct cfg_directive *c) {
+ if(!c->args)
+ die(&c->ctx, "'pre_set' requires two arguments, but found none");
+ if(!c->args->next)
+ die(&c->ctx, "'pre_set' requires two arguments, but found only one");
+ if(c->args->next->next)
+ die(&c->ctx, "Too many arguments to 'pre_set'");
+ if(*c->args->data != '$' || str_varname(c->args->data+1) != strlen(c->args->data+1))
+ die(&c->args->ctx, "Invalid variable name '%s'", c->args->data);
+ proc_var_set(ctx, c->args->data+1, str_unquote(proc_subst_vars(ctx, c->args->next->data)));
+}
+
+
+static void proc_exec(struct proc_ctx *ctx, struct cfg_directive *c) {
+ char *buf, *argv[4];
+ size_t len;
+ posix_spawn_file_actions_t fact;
+ int status, fd[2];
+ pid_t pid;
+
+ if(!c->args)
+ die(&c->ctx, "'pre_exec' requires two arguments, but found none");
+ if(!c->args->next)
+ die(&c->ctx, "'pre_exec' requires two arguments, but found only one");
+ if(c->args->next->next)
+ die(&c->ctx, "Too many arguments to 'pre_exec'");
+ if(*c->args->data != '$' || str_varname(c->args->data+1) != strlen(c->args->data+1))
+ die(&c->args->ctx, "Invalid variable name '%s'", c->args->data);
+
+ argv[0] = "/bin/sh";
+ argv[1] = "-c";
+ argv[2] = str_unquote(proc_subst_vars(ctx, c->args->next->data));
+ argv[3] = NULL;
+
+ if(pipe(fd) < 0
+ || posix_spawn_file_actions_init(&fact) < 0
+ || posix_spawn_file_actions_addclose(&fact, fd[0]) < 0
+ || posix_spawn_file_actions_adddup2(&fact, fd[1], 1) < 0
+ || posix_spawn(&pid, "/bin/sh", &fact, NULL, argv, NULL) < 0
+ || posix_spawn_file_actions_destroy(&fact) < 0)
+ die(&c->ctx, "Error spawning process: %s", strerror(errno));
+
+ close(fd[1]);
+ len = slurp_fd(fd[0], &buf);
+ if(len < 0)
+ die(&c->ctx, "Error reading data from process: %s", strerror(errno));
+ if(strlen(buf) != len)
+ die(&c->ctx, "Invalid 0-byte in process output");
+ close(fd[0]);
+
+ waitpid(pid, &status, 0);
+ if(WIFEXITED(status) && WEXITSTATUS(status) != 0)
+ die(&c->ctx, "Process exited with error status %d", WEXITSTATUS(status));
+ if(WIFSIGNALED(status))
+ die(&c->ctx, "Process was killed by signal %d", WTERMSIG(status));
+
+ if(len > 0 && buf[len-1] == '\n') /* Strip trailing newline */
+ buf[len-1] = 0;
+ proc_var_set(ctx, c->args->data+1, buf);
+}
+
+
+/* pre_if arguments are kind of annoying:
+ * Single arg: x ; (x) ; ( x ) ; (x ) ; ( x)
+ * Two args: x y ; (x y) ; ( x y ) ; (x y ) ; ( x y)
+ * Three args: x y z ; (x y z) ; ( x y z ) ; (x y z ) ; ( x y z)
+ *
+ * This function normalizes to the first variant. */
+static void proc_if_parse_args(struct ctx *ctx, struct cfg_arg *arg, char **out) {
+ size_t i = 0;
+ int braces = 0;
+ out[0] = out[1] = out[2] = NULL;
+
+ if(arg && *arg->data == '(') {
+ braces = 1;
+ if(!arg->data[1])
+ arg = arg->next;
+ else
+ arg->data = arg->data+1;
+ }
+
+ while(arg) {
+ if(braces && !arg->next) {
+ if(arg->data[strlen(arg->data)] != ')')
+ die(ctx, "Missing )");
+ arg->data[strlen(arg->data)] = 0;
+ if(!*arg->data)
+ arg = arg->next;
+ if(!arg)
+ return;
+ }
+ if(i >= 3)
+ die(ctx, "Too many arguments to 'pre_if'");
+ out[i++] = arg->data;
+ arg = arg->next;
+ }
+}
+
+
+static int proc_if_infix(struct proc_ctx *ctx, struct ctx *if_ctx, char *a, char *op, char *b) {
+ regex_t reg;
+ regmatch_t match[9];
+ size_t i, len;
+ int r;
+ char *tmp, buf[1024];
+
+ if(strcmp(op, "==") == 0)
+ return strcmp(a, b) == 0;
+ if(strcmp(op, "!=") == 0)
+ return strcmp(a, b) != 0;
+
+ if(strcmp(op, "~") != 0 && strcmp(op, "~*") != 0 && strcmp(op, "!~") != 0 && strcmp(op, "!~*") != 0)
+ die(if_ctx, "Unknown comparison operator '%s'", op);
+
+ r = regcomp(&reg, b, REG_EXTENDED | (op[strlen(op)-1] == '*' ? REG_ICASE : 0));
+ if(r != 0) {
+ regerror(r, &reg, buf, sizeof(buf));
+ die(if_ctx, "Invalid regular expression: %s", buf);
+ }
+
+ r = regexec(&reg, a, sizeof(match), match, 0);
+ if(r != 0 && r != REG_NOMATCH) {
+ regerror(r, &reg, buf, sizeof(buf));
+ die(if_ctx, "Error executing regular expression: %s", buf);
+ }
+
+ for(i=0; r == 0 && match[i].rm_so != -1; i++) {
+ buf[0] = '0' + (char)i;
+ buf[1] = '\0';
+ len = match[i].rm_eo - match[i].rm_so;
+ tmp = malloc(len + 1);
+ memcpy(tmp, a+match[i].rm_so, len);
+ tmp[len] = 0;
+ proc_var_set(ctx, buf, tmp);
+ }
+
+ regfree(&reg);
+ return *op == '!' ? r : !r;
+}
+
+
+static int proc_if_cond(struct proc_ctx *ctx, struct ctx *if_ctx, char **arg) {
+ int negate, r;
+ struct stat st;
+ char *a, *b;
+
+ /* Single argument: test if true/false */
+ if(!arg[1]) {
+ a = str_unquote(proc_subst_vars(ctx, arg[0]));
+ return *a && strcmp(a, "0") != 0;
+ }
+
+ /* Two arguments: file tests */
+ if(!arg[2]) {
+ a = arg[0];
+ b = str_unquote(proc_subst_vars(ctx, arg[1]));
+ if(*a == '!') {
+ a++;
+ negate = 1;
+ }
+ if(*a != '-' || !a[1] || a[2])
+ die(if_ctx, "Unknown argument to pre_if");
+
+ if(stat(b, &st) < 0) {
+ if(errno != ENOENT)
+ die(if_ctx, "Unable to fetch file information for '%s': %s\n", b, strerror(errno));
+ st.st_mode = 0;
+ }
+
+ switch(a[1]) {
+ case 'f': r = S_ISREG(st.st_mode); break;
+ case 'd': r = S_ISDIR(st.st_mode); break;
+ case 'e': r = st.st_mode != 0; break;
+ case 'x': r = st.st_mode & S_IXUSR || st.st_mode & S_IXGRP || st.st_mode & S_IXOTH; break; /* Maybe use access() instead? */
+ default: die(if_ctx, "Unknown file test flag '%s'", a);
+ }
+ return negate ? !r : r;
+ }
+
+ /* Three arguments: infix comparison operators */
+ a = str_unquote(proc_subst_vars(ctx, arg[0]));
+ b = str_unquote(proc_subst_vars(ctx, arg[2]));
+ return proc_if_infix(ctx, if_ctx, a, arg[1], b);
+}
+
+
+static void proc_if(struct proc_ctx *ctx, struct cfg_directive **cur) {
+ struct cfg_directive *tmp, *c = *cur;
+ char *arg[3];
+ struct proc_data old;
+ int cond;
+
+ proc_if_parse_args(&c->ctx, c->args, arg);
+ if(!arg[0])
+ die(&c->ctx, "No condition provided to 'pre_if'");
+ if(!c->body)
+ die(&c->ctx, "No block argument provided to 'pre_if'");
+
+ old = proc_data_copy(*ctx->data);
+ cond = proc_if_cond(ctx, &c->ctx, arg);
+
+ if(cond && c->body != empty_body) {
+ proc_block(ctx, &c->body);
+ if(c->body != empty_body) {
+ tmp = *cur = c->body;
+ while(tmp->next)
+ tmp = tmp->next;
+ tmp->next = c->next;
+ }
+ } else
+ *cur = c->next;
+
+ free(ctx->data->vars);
+ free(ctx->data->macros);
+ *ctx->data = old;
+}
+
+
+static void proc_macro(struct proc_ctx *ctx, struct cfg_directive *c) {
+ struct proc_macro *m;
+ struct cfg_arg *a;
+ size_t i, varlen = 0, varsize = 0;
+
+ if(!c->args)
+ die(&c->ctx, "No macro name provided");
+ if(!c->body)
+ die(&c->ctx, "Macro definition without a body");
+ if(strlen(c->args->data) != str_varname(c->args->data))
+ die(&c->ctx, "Invalid macro name '%s'", c->args->data);
+
+ m = calloc(1, sizeof(struct proc_macro));
+ m->name = c->args->data;
+ *m->data = proc_data_copy(*ctx->data);
+ m->body = c->body;
+
+ for(a=c->args->next; a; a=a->next) {
+ if(!a->data[1] || strlen(a->data+1) != str_varname(a->data+1))
+ die(&a->ctx, "Invalid variable name '%s'", a->data);
+
+ if(*a->data == '$') {
+ if(m->array || m->block)
+ die(&a->ctx, "Invalid $scalar macro argument after @array or &block");
+ if(varsize == varlen) {
+ varsize = varsize ? varsize*2 : 8;
+ m->vars = realloc(m->vars, sizeof(*m->vars)*(varsize+1));
+ }
+ m->vars[varlen++] = a->data+1;
+ m->vars[varlen] = NULL;
+ } else if(*a->data == '@') {
+ if(m->array || m->block)
+ die(&a->ctx, "Invalid @array macro argument after @array or &block");
+ m->array = a->data+1;
+ } else if(*a->data == '&') {
+ if(m->block)
+ die(&a->ctx, "Invalid duplicate &block macro argument");
+ m->block = a->data+1;
+ } else
+ die(&a->ctx, "Invalid variable argument '%s'", a->data);
+ }
+
+ for(i=0; i<ctx->data->macrolen; i++)
+ if(strcmp(ctx->data->macros[i]->name, m->name) == 0) {
+ ctx->data->macros[i] = m;
+ return;
+ }
+ if(ctx->data->macrolen == ctx->data->macrosize) {
+ ctx->data->macrosize *= 2;
+ ctx->data->macros = realloc(ctx->data->macros, sizeof(*ctx->data->macros)*ctx->data->macrosize);
+ }
+ ctx->data->macros[ctx->data->macrolen++] = m;
+}
+
+
+static struct cfg_directive **proc_directive(struct proc_ctx *ctx, struct cfg_directive **cur) {
+ size_t i;
+ struct proc_macro *m, *old_macro;
+ struct proc_data old_data;
+ struct cfg_arg *old_array;
+ struct cfg_directive *old_block, *c = *cur;
+
+ proc_args(ctx, &c->args);
+ if(c->body)
+ proc_block(ctx, &c->body);
+
+ for(i=0; i<ctx->data->macrolen; i++)
+ if(strcmp(ctx->data->macros[i]->name, c->name) == 0)
+ break;
+ if(i == ctx->data->macrolen)
+ return &c->next;
+
+ m = ctx->data->macros[i];
+ old_data = proc_data_copy(*ctx->data);
+ old_macro = ctx->macro;
+ old_array = ctx->macro_array;
+ old_block = ctx->macro_block;
+
+ free(ctx->data->vars);
+ free(ctx->data->macros);
+ *ctx->data = proc_data_copy(*m->data);
+
+ if(m->block && !c->body)
+ die(&c->ctx, "Macro '%s' requires a block argument, none given", m->name);
+ if(!m->block && c->body)
+ die(&c->ctx, "Macro '%s' does not accept a block argument", m->name);
+ for(i=0; m->vars && m->vars[i]; i++) {
+ if(!c->args)
+ die(&c->ctx, "Not enough arguments given to macro '%s'", m->name);
+ proc_var_set(ctx, m->vars[i], c->args->data);
+ c->args = c->args->next;
+ }
+ if(c->args && !m->array)
+ die(&c->ctx, "Too many arguments given to macro '%s'", m->name);
+ ctx->macro = m;
+ ctx->macro_array = c->args;
+ ctx->macro_block = c->body;
+
+ /* TODO: Errors reported in the macro expansion will be given with the
+ * context of the macro itself, but it would be nice to include the context
+ * of the line that invoked the macro. */
+ *cur = cfg_directive_copy(m->body);
+ proc_block(ctx, cur);
+ if(*cur == empty_body)
+ *cur = c->next;
+ else {
+ while((*cur)->next)
+ cur = &(*cur)->next;
+ (*cur)->next = c->next;
+ cur = &(*cur)->next;
+ }
+
+ free(ctx->data->vars);
+ free(ctx->data->macros);
+ *ctx->data = old_data;
+ ctx->macro = old_macro;
+ ctx->macro_array = old_array;
+ ctx->macro_block = old_block;
+ return cur;
+}
+
+
+static void proc_block(struct proc_ctx *ctx, struct cfg_directive **block) {
+ struct cfg_directive **cur = block, *c = *block;
+ struct cfg_arg *arg;
+
+ while(*cur && *cur != empty_body) {
+ c = *cur;
+ if(strcmp(c->name, "pre_warn") == 0) {
+ proc_args(ctx, &c->args);
+ fputs("[warn]", stderr);
+ for(arg=c->args; arg; arg=arg->next) {
+ fputc(' ', stderr);
+ fputs(str_unquote(arg->data), stderr);
+ };
+ fputc('\n', stderr);
+ *cur = c->next;
+
+ } else if(strcmp(c->name, "pre_include") == 0) {
+ proc_include(ctx, cur);
+
+ } else if(strcmp(c->name, "pre_set") == 0) {
+ proc_set(ctx, *cur);
+ *cur = c->next;
+
+ } else if(strcmp(c->name, "pre_exec") == 0) {
+ proc_exec(ctx, *cur);
+ *cur = c->next;
+
+ } else if(strcmp(c->name, "pre_if") == 0) {
+ proc_if(ctx, cur);
+
+ } else if(strcmp(c->name, "macro") == 0) {
+ proc_macro(ctx, *cur);
+ *cur = c->next;
+
+ } else if(ctx->macro && ctx->macro->block && *c->name == '&' && strcmp(c->name+1, ctx->macro->block) == 0) {
+ if(c->args)
+ die(&c->ctx, "&block variable should not be given any arguments");
+ if(c->body)
+ die(&c->ctx, "Unexpected block after &block variable");
+ *cur = cfg_directive_copy(ctx->macro_block);
+ while((*cur)->next)
+ cur = &c->next;
+ (*cur)->next = c->next;
+ cur = &(*cur)->next;
+
+ } else
+ cur = proc_directive(ctx, cur);
+ }
+ if(!*block)
+ *block = empty_body;
+}
+
+
+
+
+int main(int argc, char **argv) {
+ struct cfg_directive *d;
+ struct ctx toplevel = { "-", 0, 0, NULL };
+ struct proc_ctx proc = {};
+ char *output = "-";
+ FILE *out_fh;
+ int c;
+ size_t searchlen = 0, searchsize = 0;
+ static const struct option long_options[] = {
+ {"help", no_argument, 0, 'h' },
+ {"version", no_argument, 0, 'V' },
+ {0, 0, 0, 0 }
+ };
+
+ while((c = getopt_long(argc, argv, "hVi:o:I:", long_options, NULL)) >= 0) {
+ switch(c) {
+ case 'h':
+ puts("Usage: nginx-confgen <options>\n");
+ puts(" -h,--help This help message");
+ puts(" -V,--version Print version");
+ puts(" -i <FILE> Read input from file, \"-\" for standard input");
+ puts(" -o <FILE> Write output to file, \"-\" for standard output");
+ puts(" -I <DIR> Add directory to the search path for 'pre_include'");
+ exit(0);
+ case 'V':
+ printf("nginx-confgen %s\n", NGCFG_VERSION);
+ exit(0);
+ case 'i':
+ toplevel.fn = optarg;
+ break;
+ case 'o':
+ output = optarg;
+ break;
+ case 'I':
+ if(searchlen == searchsize) {
+ searchsize = searchsize ? searchsize*2 : 8;
+ proc.search_path = realloc(proc.search_path, sizeof(*proc.search_path)*(1+searchsize));
+ }
+ proc.search_path[searchlen++] = optarg;
+ proc.search_path[searchlen] = NULL;
+ break;
+ case '?':
+ exit(1);
+ }
+ }
+ if(optind < argc) {
+ fprintf(stderr, "Unrecognized option: %s\n", argv[optind]);
+ exit(1);
+ }
+
+ d = parse_file(&toplevel);
+ if(!d)
+ die(&toplevel, "Error reading input: %s", strerror(errno));
+
+ proc.data->varsize = 32;
+ proc.data->vars = malloc(proc.data->varsize*sizeof(*proc.data->vars));
+ proc.data->macrosize = 32;
+ proc.data->macros = malloc(proc.data->macrosize*sizeof(*proc.data->macros));
+ proc_block(&proc, &d);
+
+ if(strcmp(output, "-") == 0)
+ out_fh = stdout;
+ else if((out_fh = fopen(output, "w")) == NULL) {
+ fprintf(stderr, "Error writing to '%s': %s\n", output, strerror(errno));
+ exit(1);
+ }
+ for(; d; d=d->next)
+ write_directive(out_fh, 0, d);
+ return 0;
+}
diff --git a/nginx-confgen.cabal b/nginx-confgen.cabal
deleted file mode 100644
index 7f0eac4..0000000
--- a/nginx-confgen.cabal
+++ /dev/null
@@ -1,28 +0,0 @@
-name: nginx-confgen
-version: 1.2
-synopsis: Nginx configuration file macro language and preprocessor
-homepage: https://dev.yorhel.nl/nginx-confgen
-license: MIT
-license-file: LICENSE
-author: Yoran Heling
-maintainer: projects@yorhel.nl
-copyright: MIT
-category: CLI
-build-type: Simple
-cabal-version: >=1.10
-extra-source-files: README nginx-confgen.pod
-
-executable nginx-confgen
- ghc-options: -Wall
- main-is: Main.hs
- default-language: Haskell2010
- build-depends:
- array
- , base
- , directory
- , filepath
- , megaparsec == 7.*
- , optparse-applicative
- , process
- , regex-tdfa
- , unordered-containers
diff --git a/nginx-confgen.pod b/nginx-confgen.pod
index d7f7a25..561930e 100644
--- a/nginx-confgen.pod
+++ b/nginx-confgen.pod
@@ -76,18 +76,15 @@ Relative paths are searched for in the directories given with the C<-I> flag.
=head2 pre_set
Similar to the C<set> directive in nginx, except that variables defined with
-C<pre_set> are resolved during preprocessing. Note that variables defined with
-C<pre_set> are only available in the same scope as they are defined, for
+C<pre_set> are resolved during preprocessing. Variables are set in the order
+that they are encountered in the configuration file, regardless of scoping. For
example:
pre_set $var outer;
location / {
pre_set $var inner;
- # $var is now "inner" within this location block.
}
- # $var is "outer" again after the location block.
-
-(This may change in the future)
+ # $var is "inner" at this point.
=head2 pre_exec
@@ -107,14 +104,13 @@ new command).
=head2 pre_if
Similar to the C<if> directive in nginx, except that this is evaluated during
-preprocessing. Also unlike C<if>, parenthesis around the arguments are not
-supported. Some examples:
+preprocessing. Braces around the condition are optional. Some examples:
pre_if -f $certdir/ocsp.der {
ssl_stapling on;
ssl_stapling_file $certdir/ocsp.der;
}
- pre_if !-f $certdir/ocsp.der {
+ pre_if (!-f $certdir/ocsp.der) {
ssl_stapling off;
}
@@ -153,7 +149,7 @@ The general syntax is as follows:
# contents
}
-The optional C<@remaining_vars> argument will capture any number of variables,
+The optional C<@remaining_vars> argument will capture any number of variables
and can be passed to another directive inside the macro contents. The optional
C<&block_var> allows the macro to be invoked with a block argument, which will
expand to any number of directives. Some examples:
@@ -204,13 +200,17 @@ not necessarily the most useful):
redir;
Similarly, macro arguments will not be available inside C<&block> expansion or
-nested macro expansion.
+nested macro expansion and any variables set inside a macro will not be
+available outside of the macro body.
=head1 BUGS & WARTS
nginx-confgen is a quickly written hack to solve a particular use case, it is
-quite likely to have some weird behavior and bugs.
+quite likely to have some weird behavior and bugs. In particular, processing
+performance may suffer on large configuration files with may macros and/or
+variables. Performance has simply not been a problem for me, but if you do run
+into trouble with your use case, let me know so I can fix it.
Comments and whitespace in the input files are thrown away and ignored. The
generated output is completely reformatted.
@@ -223,8 +223,6 @@ errors or will not survive a round-trip through nginx-confgen.
This applies to all I<*_by_lua_block> directives in the I<ngx_http_lua_module>.
The I<_by_lua> directives that accept a string should work just fine.
-The error messages given by C<nginx-confgen> aren't always helpful.
-
=head1 AUTHOR
diff --git a/stack.yaml b/stack.yaml
deleted file mode 100644
index 353473c..0000000
--- a/stack.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-resolver: lts-14.27
-packages:
-- .
diff --git a/test/main.conf b/test/main.conf
index 761bc77..b0e1976 100644
--- a/test/main.conf
+++ b/test/main.conf
@@ -17,6 +17,7 @@ pre_if $name ~* (B)c {
pre_set $var 2;
# Not visible outside of this pre_if block... hmmmm.
}
+#pre_warn "This just prints \$1: $1"
pre_if "" {
pre_warn This warn is never called;