summaryrefslogtreecommitdiff
path: root/logstat.conf
blob: af336d4538c928f3a95b2641ba4db9aa164c4abd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# comment

# Function definitions can be used to write re-usable expressions. Recursion is
# not allowed.  All functions are pure. In fact, all expressions are pure.
func pi() 3.1415;
func isbot(ua) ua ~ /googlebot|bing/i;

# This defines a procedure, which is a list of statements that can be run or
# re-used in other procedures.
proc apache {
    # Apache log parser. Reads the log entry from the 'msg' variable, and
    # stores the parsed results in separate variables.
    regex msg /^ (?<ip>        [^ ]+        )
      [ ]        (?<vhost>     [^ ]+        )
      [ ]        (?<user>      [^ ]+        )
      [ ] \[     (?<timestamp> [^\]]+       ) \]
      [ ] "      (?<method>    [^ ]+        )
      [ ]        (?<path>      [^ ]+        )
      [ ] HTTP\/ (?<version>   [012]\.[0-9] ) "
      [ ]        (?<code>      [0-9]{3}     )
      [ ]       ((?<size>      [0-9]+       )|-)
      [ ] "      (?<referer>   [^"]+        ) "
      [ ] "      (?<useragent> [^"]+        ) "/x;
}

proc foo {
    # Setting and using variables/fields (I still need to settle on consistent
    # terminology)
    set somefield 2e100;
    set somefield2 somefield.1;

    # 'include' the apache proc defined above. This behaves as if the
    # statements from the referenced procedure are copy-pasted into this
    # location.
    use apache;

    # Remove events that do not match an expression.
    # XXX: 'filter' is a shitty name for this, I still can't tell whether this
    # is supposed to throw away or keep the events that match the expression.
    # Better would be 'ignore' or 'skip' or 'exclude' or whatever.
    filter size != 0;
    filter !isbot(useragent);

    ## Aggregation syntax:
    group ip, code;
    ## After a 'group', all grouped-on fields are directly acessible, the
    ## fields from above the 'group' statement must be accessed through
    ## aggregation functions (like in SQL). e.g.
    # set avgsize = avg(size)
    ## Syntax unclear, but the following SQL-like things would be nice:
    # set avgsize = sum(size) / count() where isint(size)
    # set avg200 = avg(size) where code = 200
    # filter count() < 50 # Less than 50 hits from a single IP -> not interesting
    ## Double aggregation (rather contrived example)
    # group avgsize
    # filter count() < 5 # Less than 5 IP addresses for a single average size

    ## ORDER BY .. LIMIT .. (we don't need the generality of SQL, so stick to getting a top n sort)
    # sort <number|string> <expr> <asc|desc> limit <num>
    #sort string ip asc limit 10;
    sort string ip desc limit 10;

    # The expression(s) to output. Only one 'show' is allowed in a program, and it
    # must be the last statement.
    show ip, code;
    ## Multiple expressions should generate a suitably-sized table.
    ## ("suitably-sized" is hard to tell when there's no top-n-sort or
    ## aggregation going on. Probably stick to CSV or so in that case?)
    ## If no 'show' statement is provided, then by default it should behave as
    ## if a 'show msg' command was used for non-aggregated procedures. For
    ## aggregated procedures it could be 'show <expr-grouped-on>'.
}


# Type system notes:
#   I'd prefer actually typed variables, with automatic type inference. This
#   could be kept simple with just a few basic types, e.g.:
#     String, Int, Float, Bool, Timestamp, IP
#   Each type implementing from-string and to-string, to allow for displaying
#   and automatic parsing from a regex. Operators and functions can be made
#   strict, e.g. (Int > Float) is a type error.
#
#   Unfortunately, that doesn't seem terribly easy to implement. So as an
#   alternative I'll stick with string typing instead. I.e. everything is a
#   string, it is up to the function or operator to define how they interpret
#   their arguments. This, of course, requires separate operators for string
#   and integer comparison.
#
#   Ints vs. Floats? Use the (old) Lua approach and force everything to be a
#   Float? Or, better yet, Haskell 'Rational'? But I suspect that might get
#   slow though.
#
#   Boolean context: "" and "0" are false, otherwise true.