summaryrefslogtreecommitdiff
path: root/lib/POE/Filter/VNDBAPI.pm
blob: 1d99e4e13bdff87af59992d94db31ea28ca9b22d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# Implements a POE::Filter for the VNDB API, and includes basic error checking
#
# Mapping between the request/response data and perl data structure:
#
# <command> -> [ '<command>' ]
#
# <command> <arg1> <arg2> ..  -> [ 'command', <arg1>, <arg2>, .. ]
#
# <arg>: <JSON-text> | <filter> | <unescaped-string>
#
# <JSON-text>: JSON object or array -> perl object or array
#
# <filter>:
#   string: ((<field> <op> <json-value>) <bool-op> (<field> <op> <json-value> ))
#   perl:   bless [ [ 'field', 'op', value ], 'bool-op', [ 'field', 'op', value ] ], 'POE::Filter::VNDBAPI::filter'
# <field> must match /[a-z_]+/
# <op> must be one of =, !=, <, >, >=, <= or ~
# whitespace around fields/ops/json-values/bool-ops are ignored.
#
# <unescaped-string>: Any string not starting with (, [ or { and not containing
#   whitespace. In perl represented as a normal string.
#
# When invalid data is given to put(), ...don't do that, seriously.
# When invalid data is given to get(), it will return the following arrayref:
#   [ undef, { id => 'parse', msg => 'error message' } ]
# When type='server', a valid error response can be sent back simply by
#   changing the undef to 'error' and forwarding the arrayref to put()
#
# See the POE::Filter documentation for information on how to use this module.
# This module supports filter switching (which will be required to implement
#   gzip compression or backwards compatibility on API changes)
# Note that this module is also suitable for use outside of the POE framework.


package POE::Filter::VNDBAPI;

use strict;
use warnings;
use JSON::XS;
use Encode 'decode_utf8', 'encode_utf8';
use Exporter 'import';

our @EXPORT_OK = qw|decode_filters encode_filters|;


my $EOT          = "\x04"; # End Of Transmission, this string is searched in the binary data using index()
my $WS           = qr/[\x20\x09\x0a\x0d]/;       # witespace as defined by RFC4627
my $FILTER_FIELD = qr/(?:[a-z_]+)/;              # <field> in the filters
my $FILTER_OP    = qr/(?:=|!=|<|>|>=|<=|~)/;     # <op> in the filters
my $FILTER_BOOL  = qr/(?:and|or)/;               # <boolean-op> in the filters


sub new {
  my($class, %o) = @_;
  my $b = '';
  return bless \$b, $class;
}


sub clone {
  my $self = shift;
  my $b = '';
  return bless \$b, ref $self;
}


sub get {
  my ($self, $data) = @_;
  my @r;

  $self->get_one_start($data);
  my $d;
  do {
    $d = $self->get_one();
    push @r, @$d if @$d;
  } while(@$d);

  return \@r;
}


sub get_one_start {
  my($self, $data) = @_;
  $$self .= join '', @$data;
}


sub get_pending {
  my $self = shift;
  return $$self ne '' ? [ $$self ] : undef;
}


sub _err($) { [ [ undef, { id => 'parse', msg => $_[0] } ] ] };

sub get_one {
  my $self = shift;
  # look for EOT
  my $end = index $$self, $EOT;
  return [] if $end < 0;
  my $str = substr $$self, 0, $end;
  $$self = substr $$self, $end+1;

  # $str now contains our request/response encoded in UTF8, time to decode
  $str = eval { decode_utf8($str, Encode::FB_CROAK); };
  if(!defined $str) {
    my $err = $@;
    $err =~ s/,? at .+ line [0-9]+[\.\r\n ]*$//;
    return _err "Encoding error: $err" if !defined $str;
  }

  # get command
  return _err "Invalid command" if !($str =~ s/^$WS*([a-z]+)$WS*//);
  my @ret = ($1);

  # parse arguments
  while($str) {
    $str =~ s/^$WS*//;

    # JSON text, starts with { or [
    if($str =~ /^[\[{]/) {
      my($value, $chars) = eval { JSON::XS->new->decode_prefix($str) };
      if(!defined $chars) {
        my $err = $@;
        $err =~ s/,? at .+ line [0-9]+[\.\r\n ]*$//;
        return _err "Invalid JSON value in filter expression: $err";
      }
      $str = $chars > length($str) ? substr $str, $chars : '';
      push @ret, $value;
    }

    # filter expression, starts with (
    elsif($str =~ /^\(/) {
      my($value, $rest) = decode_filters($str);
      return _err $value if !ref $value;
      $str = $rest;
      push @ret, bless $value, 'POE::Filter::VNDBAPI::filter';
    }

    # otherwise it's an unescaped string
    else {
      my ($value, $rest) = split /$WS+/, $str, 2;
      $str = $rest;
      push @ret, $value if length $value;
    }
  }

  return [ \@ret ];
}


# arguments come from the application and are assumed to be correct,
# passing incorrect arguments will result in undefined behaviour.
sub put {
  my($self, $cmds) = @_;
  my @r;
  for my $p (@$cmds) {
    my $cmd = shift @$p;
    for (@$p) {
      $cmd .= ' '.(
        ref($_) eq 'POE::Filter::VNDBAPI::filter' ? encode_filters $_ :
        ref($_) eq 'ARRAY' || ref($_) eq 'HASH'   ? JSON::XS->new->encode($_) : $_
      );
    }
    push @r, $cmd;
  }
  # the $EOT can also be passed through encode_utf8(), the result is the same.
  return [ map encode_utf8($_).$EOT, @r ];
}


# decodes "<field> <op> <value>", and returns the arrayref and the remaining (unparsed) string after <value>
sub decode_filter_expr {
  my $str = shift;
  return ('Invalid filter expression') if $str !~ /^$WS*($FILTER_FIELD)$WS*($FILTER_OP)([^=].*)$/s;
  my($field, $op, $val) = ($1, $2, $3);
  my($value, $chars) = eval { JSON::XS->new->allow_nonref->decode_prefix($val) };
  if(!defined $chars) {
    my $err = $@;
    $err =~ s/,? at .+ line [0-9]+[\.\r\n ]*$//;
    return ("Invalid JSON value in filter expression: $err");
  }
  $str = substr $val, $chars;
  return ([ $field, $op, $value ], $str);
}


sub decode_filters {
  my($str, $sub) = @_;
  $sub ||= 0;
  my @r;
  return ('Too many nested filter expressions') if $sub > 10;
  return ('Filter must start with a (') if !$sub && $str !~ s/^$WS*\(//;
  while(length $str && $str !~ /^$WS*\)/) {
    my $ret;
    $str =~ s/^$WS+//;
    # AND/OR
    if(@r%2 == 1 && $str =~ s/^($FILTER_BOOL)//) {
      push @r, $1;
      next;
    }
    # sub-expression ()
    if($str =~ s/^\(//) {
      ($ret, $str) = decode_filters($str, $sub+1);
      return ($ret) if !ref $ret;
      return ('Unterminated ( in filter expression') if $str !~ s/^$WS*\)//;
      push @r, $ret;
      next;
    }
    # <expr>
    ($ret, $str) = decode_filter_expr($str);
    return ($ret) if !ref $ret;
    push @r, $ret;
  }
  return ('Unterminated ( in filter expression') if !$sub && $str !~ s/^$WS*\)//;
  # validate what we have parsed
  return ('Empty filter expression') if !@r;
  return ('Invalid filter expression') if @r % 2 != 1 || grep ref $r[$_] eq ($_%2 ? 'ARRAY' : ''), 0..$#r;
  return (@r == 1 ? @r : \@r, $str);
}


# arguments: arrayref returned by decode_filters and an optional serialize function,
#  this function is called for earch filter expression, in the same order the expressions
#  are serialized. Should return the serialized string. This can be used to easily
#  convert the filters into SQL.
sub encode_filters {
  my($fil, $func, @extra) = @_;
  return '('.join('', map {
    if(!ref $_) { # and/or
      " $_ "
    } elsif(ref $_->[0]) { # sub expression
      my $v = encode_filters($_, $func, @extra);
      return undef if !defined $v;
      $v
    } else { # expression
      my $v = $func ? $func->($_, @extra) : "$_->[0] $_->[1] ".JSON::XS->new->allow_nonref->encode($_->[2]);
      return undef if !defined $v;
      $v
    }
  } ref($fil->[0]) ? @$fil : $fil).')';
}


1;


__END__

# and here is a relatively comprehensive test suite for the above implementation of decode_filter()

use lib '/home/yorhel/dev/vndb/lib';
use POE::Filter::VNDBAPI 'decode_filters';
require Test::More;
use utf8;
my @tests = (
  # these should all parse fine
  [q|(test = 1)|,                         ['test', '=', '1'], ''],
  [q|((vn_name ~ "20") and length > 2)|,  [['vn_name', '~', '20'], 'and', ['length', '>', 2]], ''],
  [q|(padding < ["val1", 4]) padding|,    ['padding', '<', ['val1', 4]], ' padding' ],
  [q|(s=nulland_f<3)()|,                  [['s', '=', undef], 'and', ['_f', '<', 3]], '()'],
  [qq|\r(p\r\t=\n \t\r3\n\n)|,            ['p', '=', 3], ''],
  [q|(s=4and((m="3"ort="str")or_g_={}))|, [['s','=',4],'and',[[['m','=','3'],'or',['t','=','str']],'or',['_g_','=',{}]]], ''],
  [q|(z = ")\"){})")|,                    ['z', '=', ')"){})'], '' ],
  [q| (name ~ "月姫") |,                  ['name', '~', '月姫'], ' ' ],
  [q| (id >= 2) |,                        ['id', '>=', 2], ' '],
  [q|(original = null)|,                  ['original', '=', undef],  ''],
  # and these should fail
  [q|(name = true|],
  [q|(and (f=1) or g=1)|],
  [q|name = null|],
  [q|(invalid-field > 6)|],
  [q|(invalid ~ "JSON)|],
  [q|()|],
  [q|(v = 2 and ())|],
);
import Test::More tests => ($#tests+1)*2;
for (@tests) {
  my @ret = decode_filters($_->[0]);
  if($_->[1]) {
    is_deeply($ret[0], $_->[1]);
    is($ret[1], $_->[2]);
  } else {
    ok(ref $ret[0] eq '', "nonref: $_->[0]");
    is($ret[1], undef, "rest: $_->[0]");
  }
}