diff options
author | Yorhel <git@yorhel.nl> | 2014-08-16 15:28:44 +0200 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2014-08-16 15:29:11 +0200 |
commit | 31e505ca87059dd62a3ab20e8f3f635ee66f0837 (patch) | |
tree | 1d9669cfbe58643251b79f2fc5b6fd3009d46479 | |
parent | d4d667434ef692e718a4a1a0a1cb61735fbe7109 (diff) |
Add value parsing function + some hex test fixes
-rw-r--r-- | Makefile.am | 5 | ||||
-rw-r--r-- | doc/ypc.pod | 13 | ||||
-rw-r--r-- | lib/val/ypc_val_parse.c | 165 | ||||
-rw-r--r-- | lib/ypc.h | 16 | ||||
-rw-r--r-- | test/hex.c | 14 | ||||
-rw-r--r-- | test/val_parse.c | 81 |
6 files changed, 287 insertions, 7 deletions
diff --git a/Makefile.am b/Makefile.am index e441eb5..2d14a29 100644 --- a/Makefile.am +++ b/Makefile.am @@ -27,6 +27,7 @@ libypc_la_SOURCES=\ lib/util/str2sockaddr_ip.c\ lib/util/str2sockaddr_port.c\ lib/util/tweetnacl.c\ + lib/val/ypc_val_parse.c\ lib/ypc_init.c\ lib/ypc_msg_free.c\ lib/ypc_msg_new.c @@ -38,12 +39,14 @@ EXTRA_DIST=\ include_HEADERS=lib/ypc.h -TESTS=test/str2sockaddr test/hex +TESTS=test/str2sockaddr test/hex test/val_parse check_PROGRAMS=$(TESTS) test_str2sockaddr_SOURCES=test/str2sockaddr.c lib/util/str2sockaddr.c lib/util/str2sockaddr_ip.c lib/util/str2sockaddr_port.c test_str2sockaddr_CFLAGS=$(AM_CFLAGS) test_hex_SOURCES=test/hex.c lib/util/hex2bin.c lib/util/bin2hex.c test_hex_CFLAGS=$(AM_CFLAGS) +test_val_parse_SOURCES=test/val_parse.c +test_val_parse_LDADD=libypc.la MOSTLYCLEANFILES=ypc.dll ypc.lib ypc.exp ypc.pdb ypc.ilk *.obj ACLOCAL_AMFLAGS=-I ac diff --git a/doc/ypc.pod b/doc/ypc.pod index 26c487b..21fc707 100644 --- a/doc/ypc.pod +++ b/doc/ypc.pod @@ -37,6 +37,19 @@ can be called from multiple threads, too. =over +=item void ypc_val_parse_init(ypc_val_parser *p) + +Initialize a value parsing object for use with ypc_val_parse(). + +=item int ypc_val_parse(ypc_val_parser *p, const uint8_t *buf, size_t *len) + +Parse (or, actually, validate) a serialized value. Returns -1 on error, 0 if +the buffer parsed correctly, but does not finish a complete value, and 1 if the +buffer completes a value. C<len> is updated to reflect the number of bytes that +have been verified in C<buf>. On error, this indicates the offset into the +buffer where the error occured, and on a complete value this indicates the end +of the value. + =item ypc_msg *ypc_msg_new(const char *, size_t) =item void ypc_msg_free(ypc_msg *) diff --git a/lib/val/ypc_val_parse.c b/lib/val/ypc_val_parse.c new file mode 100644 index 0000000..22124b0 --- /dev/null +++ b/lib/val/ypc_val_parse.c @@ -0,0 +1,165 @@ +#include "../internal.h" + +typedef enum { + YPCP_TYPE, /* Expecting a type byte */ + YPCP_SKIP, /* Skipping over 'len' bytes */ + YPCP_BIN1, /* Expecting first length byte of binary string */ + YPCP_BIN2, /* ^ second */ + YPCP_BIN3, /* ^ third */ + YPCP_TEXT, /* Expecting UTF-8 text */ +} ypc__parse_state; + + +#define YPCT_NULL 0 +#define YPCT_FALSE 1 +#define YPCT_TRUE 2 +#define YPCT_INT8 3 +#define YPCT_INT16 4 +#define YPCT_INT32 5 +#define YPCT_INT64 6 +#define YPCT_FLOAT 7 +#define YPCT_BIN 8 +#define YPCT_TEXT 9 +#define YPCT_ARRAY 10 +#define YPCT_MAP 11 +#define YPCT_CLOSE 12 + + +static int ypc__parse_endval(ypc_val_parser *p) { + p->state = YPCP_TYPE; + return p->depth == 0; +} + + +static int ypc__parse_type(ypc_val_parser *p, uint8_t type) { + if(p->depth && p->bitmap & 1 && !p->wantval) { + if(!(type == YPCT_INT8 || type == YPCT_INT16 || type == YPCT_INT32 || type == YPCT_INT64 + || type == YPCT_BIN || type == YPCT_TEXT || type == YPCT_CLOSE)) + return -1; + p->wantval = true; + } else if(p->wantval) + p->wantval = false; + + switch(type) { + + case YPCT_NULL: + case YPCT_FALSE: + case YPCT_TRUE: + return ypc__parse_endval(p); + + case YPCT_INT8: + p->len = 1; + p->state = YPCP_SKIP; + return 0; + + case YPCT_INT16: + p->len = 2; + p->state = YPCP_SKIP; + return 0; + + case YPCT_INT32: + p->len = 4; + p->state = YPCP_SKIP; + return 0; + + case YPCT_INT64: + case YPCT_FLOAT: + p->len = 8; + p->state = YPCP_SKIP; + return 0; + + case YPCT_BIN: + p->state = YPCP_BIN1; + return 0; + + case YPCT_TEXT: + p->state = YPCP_TEXT; + return 0; + + case YPCT_ARRAY: + case YPCT_MAP: + if(p->depth == 32) + return -1; + p->depth++; + p->bitmap <<= 1; + if(type == YPCT_MAP) + p->bitmap |= 1; + p->state = YPCP_TYPE; + return 0; + + case YPCT_CLOSE: + if(!p->depth) + return -1; + p->bitmap >>= 1; + p->depth--; + return ypc__parse_endval(p); + } + return -1; +} + +YPC_EXPORT int ypc_val_parse(ypc_val_parser *p, const uint8_t *buf, size_t *lenp) { + size_t len = *lenp; + int r = 0; + + while(r == 0 && len) { + + switch(p->state) { + + case YPCP_TYPE: + r = ypc__parse_type(p, *buf); + break; + + case YPCP_SKIP: + { + size_t skip = p->len > len ? len : p->len; + /* -1 because the outer loop we're in already 'consumes' one byte */ + len -= skip-1; + buf += skip-1; + p->len -= skip; + } + if(!p->len) + r = ypc__parse_endval(p); + break; + + case YPCP_BIN1: + p->len = (uint32_t)*buf << 16; + p->state = YPCP_BIN2; + break; + + case YPCP_BIN2: + p->len |= (uint32_t)*buf << 8; + p->state = YPCP_BIN3; + break; + + case YPCP_BIN3: + p->len |= (uint32_t)*buf; + if(p->len) + p->state = YPCP_SKIP; + else + r = ypc__parse_endval(p); + break; + + case YPCP_TEXT: + /* TODO: Validate UTF-8 */ + if(*buf == 0) + r = ypc__parse_endval(p); + break; + } + + len--; + buf++; + } + + *lenp -= len; + return r; +} + + +YPC_EXPORT void ypc_val_parse_init(ypc_val_parser *p) { + p->bitmap = 0; + p->len = 0; + p->depth = 0; + p->wantval = 0; + p->state = YPCP_TYPE; +} + @@ -37,12 +37,28 @@ typedef struct ypc_serv ypc_serv; #define YPC_EFAIL -1 /* Generic fail code, for when no specific error is available or useful */ +/* I don't like having this struct part of the public ABI, but it is the kind + * of struct one might want to embed. Let's just pray that 4x32bit will suffice + * for eternity. */ +typedef struct { + uint32_t len; + uint32_t bitmap; + uint32_t depth : 6; + uint32_t state : 3; + bool wantval : 1; + uint32_t pad; +} ypc_val_parser; + + #ifdef __cplusplus extern "C" { #endif YPC_IMPORT int ypc_init(); +YPC_IMPORT void ypc_val_parse_init(ypc_val_parser *); +YPC_IMPORT int ypc_val_parse(ypc_val_parser *, const uint8_t *, size_t *); + YPC_IMPORT ypc_msg *ypc_msg_new(const char *, size_t); YPC_IMPORT void ypc_msg_free(ypc_msg *); @@ -22,17 +22,19 @@ int main(int argc, char **argv) { char buf2[len*2+1];\ assert(ypc__hex2bin(buf, hex, len) == 0);\ assert(memcmp(buf, bin, len) == 0);\ - ypc__bin2hex(buf2, bin, len);\ + ypc__bin2hex(buf2, (const uint8_t *)bin, len);\ assert(buf2[len*2] == 0);\ assert(strcasecmp(buf2, hex) == 0);\ } while(0) T("", "", 0); T("00", "\0", 1); - T("FF", "\777", 1); - T("ff", "\777", 1); - T("fF", "\777", 1); - T("FF00", "\777\0", 2); + T("FF", "\377", 1); + T("ff", "\377", 1); + T("fF", "\377", 1); + T("FF00", "\377\0", 2); T("000102030405060708090A0B0C0D0E0F", "\0\1\2\3\4\5\6\7\10\11\12\13\14\15\16\17", 16); - T("102030405060708090A0B0C0D0E0F0", "\20\40\60\100\120\140\160\200\220\240\260\300\320\340\360\400", 15); + T("102030405060708090A0B0C0D0E0F0", "\20\40\60\100\120\140\160\200\220\240\260\300\320\340\360", 15); #undef T + + return 0; } diff --git a/test/val_parse.c b/test/val_parse.c new file mode 100644 index 0000000..9b72e00 --- /dev/null +++ b/test/val_parse.c @@ -0,0 +1,81 @@ +#include <assert.h> +#include "../lib/ypc.h" + +#define T(str, ret) do {\ + /* one-shot validation */\ + ypc_val_parser p[1];\ + ypc_val_parse_init(p);\ + size_t l = sizeof str - 1;\ + const uint8_t *buf = (const uint8_t *)str;\ + assert(1 && ypc_val_parse(p, buf, &l) == ret);\ + if(ret == 1)\ + assert(1 && l == sizeof str - 1);\ + /* one-shot validation with trailing garbage */\ + const uint8_t bin[] = str "garbage";\ + l = sizeof bin - 1;\ + assert(2 && ypc_val_parse(p, bin, &l) == ret);\ + if(ret == 1)\ + assert(2 && l == sizeof str - 1);\ + /* one byte at a time */\ + ypc_val_parse_init(p);\ + int r = 0;\ + size_t i;\ + l = 0;\ + while(r == 0) {\ + i = 1;\ + r = ypc_val_parse(p, buf, &i);\ + assert(i == 1);\ + l++;\ + buf++;\ + }\ + assert(r == ret);\ + if(ret == 1)\ + assert(l == sizeof str - 1);\ + } while(0) + +int main(int argc, char **argv) { + /* null */ + T("\0", 1); + /* false */ + T("\1", 1); + /* true */ + T("\2", 1); + /* int8, 0 */ + T("\3\0", 1); + /* int16, 0 */ + T("\4\0\0", 1); + /* int32, 0 */ + T("\5\0\0\0\0", 1); + /* int64, 0 */ + T("\6\0\0\0\0\0\0\0\0", 1); + /* float, 0 */ + T("\7\0\0\0\0\0\0\0\0", 1); + /* binary stuff (TODO: Test longer strings) */ + T("\10\0\0\0", 1); + T("\10\0\0\5\1\2\3\4\5", 1); + /* text strings */ + T("\11\0", 1); + T("\11Hey\0", 1); + T("\11オリジナルサウンドトラック\0", 1); + T("\11\xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\0", 1); /* UTF-8 examples from Wikipedia */ + T("\11\xC2\0", 1); /* XXX: WRONG! */ + T("\11\xC0\x80\0", 1); /* XXX: WRONG! */ + T("\11\xF0\x82\xAC\0", 1); /* XXX: WRONG! Overlong (Example from wikipedia) */ + /* Arrays */ + T("\12\0\14", 1); + T("\12\0\1\11abc\0\14", 1); + T("\12\12\12\14\14\14", 1); + T("\14", -1); + /* Maximum nesting is 32 */ + T("\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14\14", 1); + T("\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12\12", -1); + /* Maps */ + T("\13\14", 1); + T("\13\0\14", -1); + T("\13\0\0\14", -1); + T("\13\3\0\0\14", 1); + T("\13\10\0\0\0\10\0\0\1\1\14", 1); + T("\13\3\0\0\3\1\0\14", 1); + T("\12\13\14\14", 1); + return 0; +} |