diff options
author | Yorhel <git@yorhel.nl> | 2009-11-03 22:15:57 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2009-11-04 20:57:02 +0100 |
commit | eb093ac9fe5d9dcbde98c92ba2720749bb814def (patch) | |
tree | 57fd8ab3443a483b6addeafda954808663992d16 | |
parent | 6576cf8921cbb6e8e50717e071e3f3201c088763 (diff) |
API: Initial commit of the W.I.P. public API
d11 does not fully reflect what has been implemented, and things are
likely to change.
-rw-r--r-- | README | 2 | ||||
-rw-r--r-- | data/docs/11 | 207 | ||||
-rw-r--r-- | data/global.pl | 1 | ||||
-rw-r--r-- | data/style.css | 8 | ||||
-rw-r--r-- | lib/Multi/API.pm | 318 | ||||
-rw-r--r-- | lib/POE/Filter/VNDBAPI.pm | 303 |
6 files changed, 839 insertions, 0 deletions
@@ -28,6 +28,8 @@ Requirements PerlIO::gzip (optional, for output compression) util/multi.pl: + API: + JSON::XS Core: DBI DBD::Pg diff --git a/data/docs/11 b/data/docs/11 new file mode 100644 index 00000000..7927182b --- /dev/null +++ b/data/docs/11 @@ -0,0 +1,207 @@ +:TITLE:Public Database API + +<div class="warning"> + <h2>IN DEVELOPMENT</h2> + <p>This API is currently in development, pretty much everything is subject to change without notice.</p> +</div> + +:INC:index + +:SUB:Introduction +<b>Design goals</b> +<ul> + <li> + Simple in implementation of both client and server. "Simple" here means that + it shouldn't take much code to write a secure and full implementation and that + client applications don't require huge dependency trees just to use this API. + </li> + <li>Powerful: Not as powerful as raw SQL, but not as rigid as commonly used REST or RPC protocols.</li> + <li>Fast: minimal bandwidth overhead</li> + <li> + High-level: nobody is interested in the internal database structure of VNDB + (ok, maybe you are, but you wouldn't want to write an application using it: it + changes quite often) + </li> + <li>Stateful</li> +</ul> +<br /> + +<b>Design overview</b> +<ul> + <li>TCP-based, all communication between the client and the server is done + using one TCP connection. This connection stays alive until it is explicitely + closed by either the client or the server.</li> + <li>Request/response, client sends a request and server replies with a response.</li> + <li>Session-based: clients are required to login before issuing commands to + the server. A session is created by issuing the 'login' command, this session + stays valid for the lifetime of the TCP connection.</li> + <li><b>Everything</b> sent between the client and the server is encoded in UTF-8.</li> +</ul> +<br /> + +<b>Limits</b> +<p>The following limits are enforced by the server, in order to limit the +server resources and prevent abuse of this service.</p> +<ul> + <li>5 connections per IP. All connections that are opened after reaching this limit will be immediately closed.</li> + <li>3 connections per user. The login command will reply with a 'sesslimit' error when reaching this limit.</li> + <li><i>more to come...</i></li> +</ul> + +<br /> +<b>Test version:</b> +<dl> + <dt>Host (beta)</dt><dd>beta.vndb.org</dd> + <dt>Port</dt><dd>19534 ('VN')</dd> +</dl> + + +:SUB:Request/response syntax +<p> + The VNDB API uses the JSON format for data in various places, this document assumes + you are familiar with it. See <a href="http://json.org/">JSON.org</a> for a quick + overview and <a href="http://www.ietf.org/rfc/rfc4627.txt?number=4627">RFC 4627</a> + for the glory details. + <br /><br /> + The words <i>object</i>, <i>array</i>, <i>value</i>, <i>string</i>, + <i>number</i> and <i>int</i> refer to the JSON data types. In addition the following + definitions are used in this document: +</p> +<dl> + <dt><i>request</i> or <i>command</i></dt><dd> + Message sent from the client to the server. + </dd><dt><i>response</i></dt><dd> + Message sent from the server to the client. + </dd><dt><i>whitespace</i></dt><dd> + Any sequence of the following characters: space, tab, line feed and carriage + return. (hexadecimal: 20, 09, 10, 0D, respectively). This is in line with the + definition of whitespace in the JSON specification. + </dd><dt><i>date</i></dt><dd> + A <i>string</i> signifying a date (in particular: release date). The + following formats are used: "yyyy" (when day and month are unknown), "yyyy-mm" + (when day is unknown) "yyyy-mm-dd", and "tba" (To Be Announced). If the year is + not known and the date is not "tba", the special value <b>null</b> is used. + </dd> +</dl> +<br /> + +<b>Message format</b> +<p> + A message is formatted as a command or response name, followed by any number of + arguments, followed by the End Of Transmission character (04 in hexadecimal). + Arguments are separated by one or more whitespace characters, and any sequence + of whitespace characters is allowed before and after the message.<br /> + The command or response name is an unescaped string containing only lowercase + alphabetical ASCII characters, and indicates what kind of command or response + this message contains.<br /> + An argument can either be an unescaped string (not containing whitespace), any + JSON value, or a filter string. The following two examples demonstrate a + 'login' command, with an object as argument. Both messages are equivalent, as + the whitespace is ignored. '0x04' is used to indicate the End Of Transmission + character. +</p> +<pre> + login {"protocol":1,"username":"ayo"}<b class="standout">0x04</b> +</pre><pre> + login { + "protocol" : 1, + "username" : "ayo" + } + <b class="standout">0x04</b> +</pre> +The 0x04 byte will be ommitted in the other examples in this document. It is +however still required.<br /> + +<br /> +<b>Filter string syntax</b> +<p> + Some commands accept a filter string as argument. This argument is formatted + similar to boolean expressions in most programming languages. A filter consists + of one or more <i>expressions</i>, separated by the boolean operators "and" and + "or" (lowercase). Each filter expression can be surrounded by parentheses to + indicate precedence, the filter argument itself must be surrounded by parentheses. + <br /> + An <i>expression</i> consists of a <i>field name</i>, followed by an + <i>operator</i> and a <i>value</i>. The field name must consist entirely of + lowercase alphanumeric characters and can also contain an underscore. The + operator must be one of the following characters: =, !=, <, <=, >, + >= or ~. The <i>value</i> can be any valid JSON value. Whitespace + characters are allowed, but not required, between all expressions, field names, + operators and values.<br /> + The following two filters are equivalent: +</p> +<pre> + (title~"osananajimi"or(id=2)) +</pre><pre> + ( + id = 2 + or + title ~ "osananajimi" + ) +</pre> +<p>More complex things are also possible:</p> +<pre> + ((platforms = ["win", "ps2"] or languages = "ja") and released > "2009-01-10") +</pre> +<p>See the individual commands for more details.</p> + + +:SUB:The 'login' command +<pre> + login {"protocol":1,"client":"test","clientver":0.1,"username":"ayo","password":"hi-mi-tsu!"} +</pre> +<p> + Every client is required to login before issuing other commands. The login + command accepts a JSON object as argument. This object must have the following members: +</p> +<dl> + <dt>protocol</dt><dd>An integer that indicates which protocol version the client implements. Must be 1.</dd> + <dt>client</dt><dd> + A string identifying the client application. Between the 3 and 50 characters, + must contain only alphanumeric ASCII characters, space, underscore and hyphens. + When writing a client, think of a funny (unique) name and hardcode it into + your application. + </dd><dt>clientver</dt><dd>A positive number indicating the software version of the client.</dd> + <dt>username</dt><dd>String containing the username of the person using the client.</dd> + <dt>password</dt><dd>String, password of that user in plain text.</dd> +</dl> +<p> + The server replies with either 'ok' (no arguments), or 'error' (see below). +</p> + + + +:SUB:The 'error' response +<p> + Every command to the server can receive an 'error' response, this response has one + argument: a JSON object containing at least a member named "id", which identifies + the error, and a "msg", which contains a human readable message explaining what + went wrong. Other members are also possible, depending on the value of "id". + Example error message: +</p> +<pre> + error {"id":"parse", "msg":"Invalid command or argument"} +</pre> +<p> + Note that the value of "msg" is not directly linked to the error identifier: + the message explains what went wrong in more detail, there are several + different messages for the same id. The following error identifiers are currently + defined: +</p> +<dl> + <dt>parse</dt><dd>Syntax error or unknown command.</dd> + <dt>missing</dt><dd>A JSON object argument is missing a required member. The name of which is given in the additional "field" member.</dd> + <dt>badarg</dt><dd>A JSON value is of the wrong type or in the wrong format. The name of the incorrect field is given in a "field" member.</dd> + <dt>needlogin</dt><dd>Need to be logged in to issue this command.</dd> + <dt>auth</dt><dd>(login) Incorrect username/password combination.</dd> + <dt>loggedin</dt><dd>(login) Already logged in. Only one successful login command can be issues on one connection.</dd> + <dt>sesslimit</dt><dd>(login) Too many open sessions for the current user.</dd> + <dt>gettype</dt><dd>(get) Unknown type argument to the 'get' command.</dd> + <dt>getinfo</dt><dd>(get) Unknown info flag to the 'get' command. The name of the unrecognised flag is given in an additional "flag" member.</dd> + <dt>filterop</dt><dd>(get) Bad operator in filter expression for the field/value combination. Includes three additional members: "field", "op" and "value" of the incorrect expression.</dd> + <dt>filterval</dt><dd>(get) Wrong type or format for the value in a filter expression. Adds same three members as the 'filterop' error.</dd> + <dt>filterfield</dt><dd>(get) Unknown field in filter expression. Adds same three members as the 'filterop' error.</dd> +</dl> + + + diff --git a/data/global.pl b/data/global.pl index 6dae908f..8edd25fa 100644 --- a/data/global.pl +++ b/data/global.pl @@ -117,6 +117,7 @@ our %S = (%S, our %M = ( log_dir => $ROOT.'/data/log', modules => { + #API => {}, # disabled by default, not really needed RG => {}, Image => {}, Sitemap => {}, diff --git a/data/style.css b/data/style.css index ff0844ad..5dcd7643 100644 --- a/data/style.css +++ b/data/style.css @@ -120,6 +120,14 @@ b.spoiler_shown { font-weight: normal } text-align: left; } .linethrough { text-decoration: line-through } + pre { + padding:1px 5px; + margin: 5px 15px; + border: 1px dotted $border$; + border-right: none; + border-left: 1px solid $border$; + background: url($_boxbg$) repeat; +} diff --git a/lib/Multi/API.pm b/lib/Multi/API.pm new file mode 100644 index 00000000..1a00e565 --- /dev/null +++ b/lib/Multi/API.pm @@ -0,0 +1,318 @@ + +# +# Multi::API - The public VNDB API +# + +package Multi::API; + +use strict; +use warnings; +use Socket 'inet_ntoa', 'SO_KEEPALIVE', 'SOL_SOCKET', 'IPPROTO_TCP'; +use Errno 'ECONNABORTED', 'ECONNRESET'; +use POE 'Wheel::SocketFactory', 'Wheel::ReadWrite'; +use POE::Filter::VNDBAPI 'encode_filters'; +use Digest::SHA 'sha256_hex'; +use Encode 'encode_utf8'; + + +# not exported by Socket, taken from netinet/tcp.h (specific to Linux, AFAIK) +sub TCP_KEEPIDLE { 4 } +sub TCP_KEEPINTVL { 5 } +sub TCP_KEEPCNT { 6 } + + +sub spawn { + my $p = shift; + POE::Session->create( + package_states => [ + $p => [qw| + _start shutdown log server_error client_connect client_error client_input + login login_res + get_vn get_vn_res + |], + ], + heap => { + port => 19534, + logfile => "$VNDB::M{log_dir}/api.log", + conn_per_ip => 5, + sess_per_user => 3, + tcp_keepalive => [ 120, 60, 3 ], # time, intvl, probes + @_, + c => {}, + }, + ); +} + + +## Non-POE helper functions + +sub cerr { + my($c, $id, $msg, %o) = @_; + $c->{wheel}->put([ error => { id => $id, msg => $msg, %o }]); + # using $poe_kernel here isn't really a clean solution... + $poe_kernel->yield(log => $c, 'error: %s, %s', $id, $msg); + return undef; +} + + +sub formatdate { + return undef if $_[0] == 0; + (local $_ = sprintf '%08d', $_[0]) =~ + s/^(\d{4})(\d{2})(\d{2})$/$1 == 9999 ? 'tba' : $2 == 99 ? $1 : $3 == 99 ? "$1-$2" : "$1-$2-$3"/e; + return $_; +} + + +sub filtertosql { + my($c, $p, $t, $field, $op, $value) = ($_[1], $_[2], $_[3], @{$_[0]}); + my %e = ( field => $field, op => $op, value => $value ); + + $t = (grep $_->[0] eq $field, @$t)[0]; + return cerr $c, filterfield => "Unknown field '$field'", %e if !$t; + shift @$t; # field name + my $type = shift @$t; + my %o = @$t; + + # integer, options: dbfield + if($type eq 'int') { + if($value && ref $value eq 'ARRAY') { + return cerr $c, filterop => "Operator for '$field' must be either = or != for array values", %e if $op ne '=' && $op ne '!='; + return cerr $c, filterval => "Array elements for '$field' must be integers", %e if grep !defined($_) || !/^\d+$/, @$value; + push @$p, @$value; + return sprintf '%s %s(%s)', $o{dbfield}, $op eq '=' ? 'IN' : 'NOT IN', join ',', map '?', @$value; + } elsif(defined $value && !ref $value && $value =~ /^\d+$/) { + my @ops = qw(= != > >= < <=); + return cerr $c, filterop => "Operator for '$field' must be one of ".join(', ', @ops), %e if !grep $op eq $_, @ops; + push @$p, $value; + return sprintf '%s %s ?', $o{dbfield}, $op eq '!=' ? '<>' : $op; + } + return cerr $c, filterval => "Value for '$field' must be either an integer or an array of integers", %e; + } + + # string, options: dbfield, null + if($type eq 'str') { + if(!defined $value) { + return cerr $c, filterval => "null not allowed for '$field'", %e if !exists $o{null}; + return cerr $c, filterop => "Operator for '$field' must be either = or != for null", %e if $op ne '=' && $op ne '!='; + return sprintf '%s %s', $o{dbfield}, $op eq '=' ? 'IS NULL' : 'IS NOT NULL' if !defined $o{null}; + push @$p, $o{null}; + return sprintf '%s %s ?', $o{dbfield}, $op eq '=' ? '=' : '<>'; + } elsif(ref($value) eq 'ARRAY') { + return cerr $c, filterop => "Operator for '$field' must be either = or != for array values", %e if $op ne '=' && $op ne '!='; + return cerr $c, filterval => "Array elements for '$field' must be scalars", %e if grep !defined($_) || ref($_), @$value; + push @$p, @$value; + return sprintf '%s %s(%s)', $o{dbfield}, $op eq '=' ? 'IN' : 'NOT IN', join ',', map '?', @$value; + } elsif(!ref $value) { + my @ops = qw(= != ~); + if($op eq '=' || $op eq '!=') { + push @$p, $value; + return sprintf '%s %s ?', $o{dbfield}, $op eq '!=' ? '<>' : $op; + } elsif($op eq '~') { + $value =~ s/%//; + push @$p, "%$value%"; + return sprintf '%s ILIKE ?', $o{dbfield}; + } else { + return cerr $c, filterop => "Operator for '$field' must be =, != or ~", %e; + } + } else { + return cerr $c, filterval => "Value for '$field' must be a string or an array of strings.", %e; + } + } + + die "This shouldn't happen!"; +} + + +## POE handlers + +sub _start { + $_[KERNEL]->alias_set('api'); + $_[KERNEL]->sig(shutdown => 'shutdown'); + + # create listen socket + $_[HEAP]{listen} = POE::Wheel::SocketFactory->new( + BindPort => $_[HEAP]{port}, + Reuse => 1, + FailureEvent => 'server_error', + SuccessEvent => 'client_connect', + ); + $_[KERNEL]->yield(log => 0, 'API starting up on port %d', $_[HEAP]{port}); +} + + +sub shutdown { + $_[KERNEL]->alias_remove('api'); + $_[KERNEL]->yield(log => 0, 'API shutting down'); + delete $_[HEAP]{listen}; + delete $_[HEAP]{c}{$_}{wheel} for (keys %{$_[HEAP]{c}}); +} + + +sub log { + my($c, $msg, @args) = @_[ARG0..$#_]; + if(open(my $F, '>>', $_[HEAP]{logfile})) { + printf $F "[%s] %s: %s\n", scalar localtime, + $c ? sprintf '%d %s', $c->{wheel}->ID(), $c->{ip} : 'global', + @args ? sprintf $msg, @args : $msg; + close $F; + } +} + + +sub server_error { + return if $_[ARG0] eq 'accept' && $_[ARG1] == ECONNABORTED; + $_[KERNEL]->yield(log => 0, 'Server socket failed on %s: (%s) %s', @_[ ARG0..ARG2 ]); + $_[KERNEL]->call(core => log => 'API shutting down due to error.'); + $_[KERNEL]->yield('shutdown'); +} + + +sub client_connect { + my $ip = inet_ntoa($_[ARG1]); + my $sock = $_[ARG0]; + + if($_[HEAP]{conn_per_ip} <= grep $ip eq $_[HEAP]{c}{$_}{ip}, keys %{$_[HEAP]{c}}) { + $_[KERNEL]->yield(log => 0, + 'Connect from %s denied, limit of %d connections per IP reached', $ip, $_[HEAP]{conn_per_ip}); + close $sock; + return; + } + + # set TCP keepalive (silently ignoring errors, it's not really important) + my $keep = $_[HEAP]{tcp_keepalive}; + $keep && eval { + setsockopt($sock, SOL_SOCKET, SO_KEEPALIVE, 1); + setsockopt($sock, IPPROTO_TCP, TCP_KEEPIDLE, $keep->[0]); + setsockopt($sock, IPPROTO_TCP, TCP_KEEPINTVL, $keep->[1]); + setsockopt($sock, IPPROTO_TCP, TCP_KEEPCNT, $keep->[2]); + }; + + # the wheel + my $w = POE::Wheel::ReadWrite->new( + Handle => $sock, + Filter => POE::Filter::VNDBAPI->new(type => 'server'), + ErrorEvent => 'client_error', + InputEvent => 'client_input', + ); + $_[HEAP]{c}{ $w->ID() } = { + wheel => $w, + ip => $ip, + }; + $_[KERNEL]->yield(log => $_[HEAP]{c}{ $w->ID() }, 'Connected'); +} + + +sub client_error { # func, errno, errmsg, wheelid + my $c = $_[HEAP]{c}{$_[ARG3]}; + if($_[ARG0] eq 'read' && ($_[ARG1] == 0 || $_[ARG1] == ECONNRESET)) { + $_[KERNEL]->yield(log => $c, 'Disconnected'); + } else { + $_[KERNEL]->yield(log => $c, 'SOCKET ERROR on operation %s: (%s) %s', @_[ARG0..ARG2]); + } + delete $_[HEAP]{c}{$_[ARG3]}; +} + + +sub client_input { + my($arg, $id) = @_[ARG0,ARG1]; + my $cmd = shift @$arg; + my $c = $_[HEAP]{c}{$id}; + + return cerr $c, $arg->[0]{id}, $arg->[0]{msg} if !defined $cmd; + + # when we're here, we can assume that $cmd contains a valid command + # and the arguments are syntactically valid + + # login + return $_[KERNEL]->yield(login => $c, @$arg) if $cmd eq 'login'; + + return cerr $c, needlogin => 'Not logged in.' if !$c->{username}; + # TODO: throttling + + # get + return cerr $c, 'parse', "Unkown command '$cmd'" if $cmd ne 'get'; + my $type = shift @$arg; + return cerr $c, 'gettype', "Unknown get type: '$type'" if $type ne 'vn'; + $_[KERNEL]->yield("get_$type", $c, @$arg); +} + + +sub login { + my($c, $arg) = @_[ARG0,ARG1]; + + # validation (bah) + return cerr $c, loggedin => 'Already logged in, please reconnect to start a new session' if $c->{username}; + for (qw|protocol client clientver username password|) { + !exists $arg->{$_} && return cerr $c, missing => "Required field '$_' is missing", field => $_; + !defined $arg->{$_} && return cerr $c, badarg => "Field '$_' cannot be null", field => $_; + # note that 'true' and 'false' are also refs + ref $arg->{$_} && return cerr $c, badarg => "Field '$_' must be a scalar", field => $_; + } + return cerr $c, badarg => 'Unkonwn protocol version', field => 'protocol' if $arg->{protocol} ne '1'; + return cerr $c, badarg => 'Invalid client name', field => 'client' if $arg->{client} !~ /^[a-zA-Z0-9 _-]{3,50}$/; + return cerr $c, badarg => 'Invalid client version', field => 'clientver' if $arg->{clientver} !~ /^\d+(\.\d+)?$/; + return cerr $c, sesslimit => "Too many open sessions for user '$arg->{username}'", max_allowed => $_[HEAP]{sess_per_user} + if $_[HEAP]{sess_per_user} <= grep $arg->{username} eq $_[HEAP]{c}{$_}{username}, keys %{$_[HEAP]{c}}; + + # fetch user info + $_[KERNEL]->post(pg => query => "SELECT rank, salt, encode(passwd, 'hex') as passwd FROM users WHERE username = ?", + [ $arg->{username} ], 'login_res', [ $c, $arg ]); +} + + +sub login_res { # num, res, [ c, arg ] + my($num, $res, $c, $arg) = (@_[ARG0, ARG1], $_[ARG2][0], $_[ARG2][1]); + + return cerr $c, auth => "No user with the name '$arg->{username}'" if $num == 0; + return cerr $c, auth => "Outdated password format, please relogin on $VNDB::S{url}/ and try again" if $res->[0]{salt} =~ /^ +$/; + + my $encrypted = sha256_hex($VNDB::S{global_salt}.encode_utf8($arg->{password}).encode_utf8($res->[0]{salt})); + return cerr $c, auth => "Wrong password for user '$arg->{username}'" if lc($encrypted) ne lc($res->[0]{passwd}); + + $c->{wheel}->put(['ok']); + $c->{username} = $arg->{username}; + $_[KERNEL]->yield(log => $c, + 'Successful login by %s using client "%s" ver. %s', $arg->{username}, $arg->{client}, $arg->{clientver}); +} + + +sub get_vn { + my($c, $info, $filters) = @_[ARG0..$#_]; + + return cerr $c, getinfo => "Unkown info flag '$_'", flag => $_ for (grep $_ ne 'basic', @$info); + + my $select = 'v.id, vr.title, vr.original, v.c_released, v.c_languages, v.c_platforms'; + + my @placeholders; + my $where = encode_filters $filters, \&filtertosql, $c, \@placeholders, [ + [ id => 'int', dbfield => 'v.id' ], + [ title => 'str', dbfield => 'vr.title' ], + [ original => 'str', dbfield => 'vr.original', null => '' ], + ]; + warn $where; + return if !$where; + + $_[KERNEL]->post(pg => query => + qq|SELECT $select FROM vn v JOIN vn_rev vr ON v.latest = vr.id WHERE NOT v.hidden AND $where LIMIT 10|, + \@placeholders, 'get_vn_res', [ $c, $info, $filters ]); +} + + +sub get_vn_res { + my($num, $res, $c, $info, $filters, $time) = (@_[ARG0, ARG1], @{$_[ARG2]}, $_[ARG3]); + + for (@$res) { + $_->{id}*=1; + $_->{original} ||= undef; + $_->{platforms} = [ split /\//, delete $_->{c_platforms} ]; + $_->{languages} = [ split /\//, delete $_->{c_languages} ]; + $_->{released} = formatdate delete $_->{c_released}; + } + + $c->{wheel}->put([ results => { num => $#$res+1, items => $res }]); + $_[KERNEL]->yield(log => $c, "%4.0fms %2d get vn %s %s", $time*1000, $#$res+1, join (',', @$info), encode_filters $filters); +} + + +1; + diff --git a/lib/POE/Filter/VNDBAPI.pm b/lib/POE/Filter/VNDBAPI.pm new file mode 100644 index 00000000..0eca3eb2 --- /dev/null +++ b/lib/POE/Filter/VNDBAPI.pm @@ -0,0 +1,303 @@ +# Implements a POE::Filter for the VNDB API, and includes basic error checking +# +# Currently recognised commands and their mapping between Perl and strings +# (this is just a simple overview, actual implementation is more advanced) +# +# C: login <json-object> +# [ 'login', {object} ] +# +# C: get <type> <info> <filters> +# [ 'get', <type>, <info>[ split ',', $2 ], [ filters ] ] +# <type> must match /[a-z\/_]+/ +# <info> as string: /[a-z_]+(,[a-z_]+)*/, in perl: [ /[a-z_]+/, .. ] +# +# S: ok +# [ 'ok' ] +# +# S: results <json-object> +# [ 'results', {object} ] +# +# S: error <json-object> +# [ 'error', {object} ] +# +# <filters>: +# string: ((<field> <op> <json-value>) <bool-op> (<field> <op> <json-value> )) +# perl: [ [ 'field', 'op', value ], 'bool-op', [ 'field', 'op', value ] ] +# <field> must match /[a-z_]+/ +# <op> must be one of =, !=, <, >, >=, <= or ~ +# whitespace around fields/ops/json-values/bool-ops are ignored. +# +# When type='server', put() will accept the objects marked by 'S' and get() will accept the strings marked by 'C' +# When type='client', put() will accept the objects marked by 'C' and get() will accept the strings marked by 'S' +# +# When invalid data is given to put(), ...don't do that, seriously. +# When invalid data is given to get(), it will return the following arrayref: +# [ undef, { id => 'parse', msg => 'error message' } ] +# When type='server', a valid error response can be sent back simply by +# changing the undef to 'error' and forwarding the arrayref to put() +# +# See the POE::Filter documentation for information on how to use this module. +# This module supports filter switching (which will be required to implement +# gzip compression or backwards compatibility on API changes) +# Note that this module is also suitable for use outside of the POE framework. + + +package POE::Filter::VNDBAPI; + +use strict; +use warnings; +use JSON::XS; +use Encode 'decode_utf8', 'encode_utf8'; +use Exporter 'import'; + +our @EXPORT_OK = qw|decode_filters encode_filters|; + + +my $EOT = "\x04"; # End Of Transmission, this string is searched in the binary data using index() +my $WS = qr/[\x20\x09\x0a\x0d]/; # witespace as defined by RFC4627 +my $GET_TYPE = qr/(?:[a-z\/_]+)/; # get <type> +my $GET_INFO = qr/(?:[a-z_]+(?:,[a-z_]+)*)/; # get <info> +my $FILTER_FIELD = qr/(?:[a-z_]+)/; # <field> in the filters +my $FILTER_OP = qr/(?:=|!=|<|>|>=|<=|~)/; # <op> in the filters +my $FILTER_BOOL = qr/(?:and|or)/; # <boolean-op> in the filters + + +sub new { + my($class, %o) = @_; + my $type = ($o{type}||'') eq 'server' ? 'server' : 'client'; + return bless { + type => $type, + buffer => '' + }, $class; +} + + +sub clone { + my $self = shift; + return bless { + type => $self->{type}, + }, ref $self; +} + + +sub get { + my ($self, $data) = @_; + my @r; + + $self->get_one_start($data); + my $d; + do { + $d = $self->get_one(); + push @r, @$d if @$d; + } while(@$d); + + return \@r; +} + + +sub get_one_start { + my($self, $data) = @_; + $self->{buffer} .= join '', @$data; +} + + +sub get_pending { + my $self = shift; + return $self->{buffer} ne '' ? [ $self->{buffer} ] : undef; +} + + +sub _err($) { [ [ undef, { id => 'parse', msg => $_[0] } ] ] }; + +sub get_one { + my $self = shift; + # look for EOT + my $end = index $self->{buffer}, $EOT; + return [] if $end < 0; + my $str = substr $self->{buffer}, 0, $end; + $self->{buffer} = substr $self->{buffer}, $end+1; + + # $str now contains our request/response encoded in UTF8, time to decode + $str = eval { decode_utf8($str, Encode::FB_CROAK); }; + return _err "Encoding error: $@" if !defined $str; + + # C: login + # S: error, results + if($str =~ /^$WS*(login|error|results)$WS+(.+)$/s && ($self->{type} eq 'server' && $1 eq 'login' || $self->{type} eq 'client' && $1 ne 'login')) { + my($cmd, $json) = ($1, $2); + $json = eval { JSON::XS->new->decode($json) }; + if(!defined $json) { + my $err = $@; + $err =~ s/,? at .+ line [0-9]+[\.\r\n ]*$//; + return _err "JSON-decode: $err" if !defined $json; + } + return _err qq|"$cmd" command requires a JSON object| if ref($json) ne 'HASH'; + return [[ $cmd, $json ]]; + } + + # C: get + if($self->{type} eq 'server' && $str =~ /^$WS*get$WS+($GET_TYPE)$WS+($GET_INFO)$WS+(.+)$/s) { + my($type, $info) = ($1, $2); + my($filters, $rest) = decode_filters($3); + return _err $filters if !ref $filters; + return _err 'Leading characters' if length $rest && $rest !~ /^$WS+$/; + return [[ 'get', $type, [ split /,/, $info ], $filters ]]; + } + + # S: ok + if($self->{type} eq 'client' && $str =~ /^$WS*ok$WS*$/) { + return [[ 'ok' ]]; + } + + # if we're here, we've received something strange + return _err 'Invalid command or argument'; +} + + +# arguments come from the application and are assumed to be correct, +# passing incorrect arguments will result in undefined behaviour. +sub put { + my($self, $cmds) = @_; + my @r; + for my $p (@$cmds) { + my $cmd = shift @$p; + + # C: login + push @r, 'login '.JSON::XS->new->encode($p->[0]) + if $self->{type} eq 'client' && $cmd eq 'login'; + + # C: get + push @r, sprintf 'get %s %s %s', $p->[0], join(',',@{$p->[1]}), encode_filters($p->[2]) + if $self->{type} eq 'client' && $cmd eq 'get'; + + # S: ok + push @r, 'ok' + if $self->{type} eq 'server' && $cmd eq 'ok'; + + # S: error, results + push @r, "$cmd ".JSON::XS->new->encode($p->[0]) + if $self->{type} eq 'server' && ($cmd eq 'error' || $cmd eq 'results'); + } + # the $EOT can also be passed through encode_utf8(), the result is the same. + return [ map encode_utf8($_).$EOT, @r ]; +} + + +# decodes "<field> <op> <value>", and returns the arrayref and the remaining (unparsed) string after <value> +sub decode_filter_expr { + my $str = shift; + return ('Invalid filter expression') if $str !~ /^$WS*($FILTER_FIELD)$WS*($FILTER_OP)([^=].*)$/s; + my($field, $op, $val) = ($1, $2, $3); + my($value, $chars) = eval { JSON::XS->new->allow_nonref->decode_prefix($val) }; + if(!defined $chars) { + my $err = $@; + $err =~ s/,? at .+ line [0-9]+[\.\r\n ]*$//; + return ("Invalid JSON value in filter expression: $err"); + } + $str = substr $val, $chars; + return ([ $field, $op, $value ], $str); +} + + +sub decode_filters { + my($str, $sub) = @_; + $sub ||= 0; + my @r; + return ('Too many nested filter expressions') if $sub > 10; + return ('Filter must start with a (') if !$sub && $str !~ s/^$WS*\(//; + while(length $str && $str !~ /^$WS*\)/) { + my $ret; + $str =~ s/^$WS+//; + # AND/OR + if(@r%2 == 1 && $str =~ s/^($FILTER_BOOL)//) { + push @r, $1; + next; + } + # sub-expression () + if($str =~ s/^\(//) { + ($ret, $str) = decode_filters($str, $sub+1); + return ($ret) if !ref $ret; + return ('Unterminated ( in filter expression') if $str !~ s/^$WS*\)//; + push @r, $ret; + next; + } + # <expr> + ($ret, $str) = decode_filter_expr($str); + return ($ret) if !ref $ret; + push @r, $ret; + } + return ('Unterminated ( in filter expression') if !$sub && $str !~ s/^$WS*\)//; + # validate what we have parsed + return ('Empty filter expression') if !@r; + return ('Invalid filter expression') if @r % 2 != 1 || grep ref $r[$_] eq ($_%2 ? 'ARRAY' : ''), 0..$#r; + return (@r == 1 ? @r : \@r, $str); +} + + +# arguments: arrayref returned by decode_filters and an optional serialize function, +# this function is called for earch filter expression, in the same order the expressions +# are serialized. Should return the serialized string. This can be used to easily +# convert the filters into SQL. +sub encode_filters { + my($fil, $func, @extra) = @_; + return '('.join('', map { + if(!ref $_) { # and/or + " $_ " + } elsif(ref $_->[0]) { # sub expression + my $v = encode_filters($_, $func, @extra); + return undef if !defined $v; + $v + } else { # expression + my $v = $func ? $func->($_, @extra) : "$_->[0] $_->[1] ".JSON::XS->new->allow_nonref->encode($_->[2]); + return undef if !defined $v; + $v + } + } ref($fil->[0]) ? @$fil : $fil).')'; +} + + +1; + + +__END__ + +# and here is a relatively comprehensive test suite for the above implementation of decode_filter() + +use lib '/home/yorhel/dev/vndb/lib'; +use POE::Filter::VNDBAPI 'decode_filters'; +require Test::More; +use utf8; +my @tests = ( + # these should all parse fine + [q|(test = 1)|, ['test', '=', '1'], ''], + [q|((vn_name ~ "20") and length > 2)|, [['vn_name', '~', '20'], 'and', ['length', '>', 2]], ''], + [q|(padding < ["val1", 4]) padding|, ['padding', '<', ['val1', 4]], ' padding' ], + [q|(s=nulland_f<3)()|, [['s', '=', undef], 'and', ['_f', '<', 3]], '()'], + [qq|\r(p\r\t=\n \t\r3\n\n)|, ['p', '=', 3], ''], + [q|(s=4and((m="3"ort="str")or_g_={}))|, [['s','=',4],'and',[[['m','=','3'],'or',['t','=','str']],'or',['_g_','=',{}]]], ''], + [q|(z = ")\"){})")|, ['z', '=', ')"){})'], '' ], + [q| (name ~ "月姫") |, ['name', '~', '月姫'], ' ' ], + [q| (id >= 2) |, ['id', '>=', 2], ' '], + [q|(original = null)|, ['original', '=', undef], ''], + # and these should fail + [q|(name = true|], + [q|(and (f=1) or g=1)|], + [q|name = null|], + [q|(invalid-field > 6)|], + [q|(invalid ~ "JSON)|], + [q|()|], + [q|(v = 2 and ())|], +); +import Test::More tests => ($#tests+1)*2; +for (@tests) { + my @ret = decode_filters($_->[0]); + if($_->[1]) { + is_deeply($ret[0], $_->[1]); + is($ret[1], $_->[2]); + } else { + ok(ref $ret[0] eq '', "nonref: $_->[0]"); + is($ret[1], undef, "rest: $_->[0]"); + } +} + + |