diff options
author | Yorhel <git@yorhel.nl> | 2012-03-14 19:16:02 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2012-03-14 19:16:02 +0100 |
commit | 86e20cd6c361cb43a9e2cf24cd9d8b3afddb1437 (patch) | |
tree | 37b303fe085c2628254c99764970032d2063aafc | |
parent | d17d94cefafd25471234307c3b4a67c276d0b651 (diff) |
Added JSON parser + fixed some bugs in formatter + tuple handling
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | tanja.c | 396 | ||||
-rw-r--r-- | tanja.h | 1 | ||||
-rw-r--r-- | test.c | 65 |
4 files changed, 444 insertions, 19 deletions
@@ -1,5 +1,6 @@ CC=gcc CFLAGS=-Wall -Wextra -g -O3 -DTANJA_THREADSAFE +LDFLAGS=-lm tanja.o: Makefile tanja.c tanja.h $(CC) $(CFLAGS) -c tanja.c -o tanja.o @@ -4,6 +4,7 @@ #include <stdio.h> #include <assert.h> #include <string.h> +#include <math.h> #include "khash.h" #include "tanja.h" @@ -90,6 +91,8 @@ typedef struct { + + // Generic tuple management @@ -159,25 +162,29 @@ tn_element tn_array_new(char *lst, ...) { int i; for(i=0; i<a.count; i++) a.v.a[i] = create_element(lst[i], va); - } + } else + a.v.a = NULL; va_end(va); return a; } +static inline void tn_array_grow(tn_element *a, int new) { + if(a->count+new > a->size) { + a->size *= 2; + if(a->size < new+a->count) + a->size = new+a->count; + a->v.a = realloc(a->v.a, a->size*sizeof(tn_element)); + } +} + + void tn_array_append(tn_element *a, char *lst, ...) { assert(a && a->type == TN_VT_AR); - // Grow, if necessary int n = strlen(lst); - if(n+a->count > a->size) { - a->size *= 2; - if(a->size < n+a->count) - a->size = n+a->count; - a->v.a = realloc(a->v.a, a->size*sizeof(tn_element)); - } + tn_array_grow(a, n); - // And add va_list va; va_start(va, lst); int i; @@ -200,27 +207,31 @@ tn_element tn_map_new(char *lst, ...) { a.v.m[i].key = va_arg(va, char *); a.v.m[i].val = create_element(lst[i], va); } - } + } else + a.v.m = NULL; va_end(va); return a; } +static inline void tn_map_grow(tn_element *m, int new) { + if(m->count+new > m->size) { + m->size *= 2; + if(m->size < m->count+new) + m->size = m->count+new; + m->v.m = realloc(m->v.m, m->size*sizeof(tn_map_element)); + } +} + + // Appends key/values to the map. Currently doesn't overwrite existing keys, // though it probably should. void tn_map_set(tn_element *m, char *lst, ...) { assert(m && m->type == TN_VT_MAP); - // Grow, if necessary int n = strlen(lst); - if(n+m->count > m->size) { - m->size *= 2; - if(m->size < n+m->count) - m->size = n+m->count; - m->v.m = realloc(m->v.m, m->size*sizeof(tn_map_element)); - } + tn_map_grow(m, n); - // And add va_list va; va_start(va, lst); int i; @@ -317,10 +328,13 @@ static inline void json_fmt_string(char *str, lbuf *buf) { switch(*str) { case '\n': ac('\\'); ac('n'); break; case '\r': ac('\\'); ac('r'); break; + case '\b': ac('\\'); ac('b'); break; + case '\t': ac('\\'); ac('t'); break; + case '\f': ac('\\'); ac('f'); break; case '\\': ac('\\'); ac('\\'); break; case '"': ac('\\'); ac('"'); break; default: - if(*str <= 31 || *str == 127) { + if((unsigned char)*str <= 31 || (unsigned char)*str == 127) { char b[8] = {}; snprintf(b, 8, "\\u00%02x", *str); as(b); @@ -405,6 +419,350 @@ char *tn_json_fmt(tn_tuple *tup) { +// Json-to-tuple parser +// Error handling should be correct, but informative error messages are completely absent. +// TODO: limit max. recursion +// TODO: validate that it's correct UTF-8? + + +#define con(n) do { *buf += n; *len -= n; } while(0) +#define cons() do { while(*len > 0 && (**buf == 0x20 || **buf == 0x09 || **buf == 0x0A || **buf == 0x0D)) { con(1); } } while(0) + +static tn_element tn_json_invalid; // global vars are initialized to zero by default + +static inline tn_element tn_json_parse_val(char **buf, int *len); + + +static int tn_json_parse_string_esc(char **buf, int *len, lbuf *b) { +#define ap(c) do { lbuf_append_c((*b), c); con(1); } while(0) + switch(**buf) { + case '"': ap('"'); break; + case '\\': ap('\\'); break; + case '/': ap('/'); break; + case 'b': ap(0x08); break; + case 'f': ap(0x0C); break; + case 'n': ap(0x0A); break; + case 'r': ap(0x0D); break; + case 't': ap(0x09); break; + case 'u': + if(*len < 5) + return 0; +#define hn(n) (n >= '0' && n <= '9' ? n-'0' : n >= 'A' && n <= 'F' ? n-'A'+10 : n >= 'a' && n <= 'f' ? n-'a'+10 : 1<<16) + uint32_t n = (hn((*buf)[1])<<12) + (hn((*buf)[2])<<8) + (hn((*buf)[3])<<4) + hn((*buf)[4]); +#undef hn + if(n <= 0x007F) + lbuf_append_c((*b), n); + else if(n <= 0x07FF) { + lbuf_append_c((*b), 0xC0 | (n>>6)); + lbuf_append_c((*b), 0x80 | (n & 0x3F)); + } else if(n <= 0xFFFF) { + lbuf_append_c((*b), 0xE0 | (n>>12)); + lbuf_append_c((*b), 0x80 | ((n>>6) & 0x3F)); + lbuf_append_c((*b), 0x80 | (n & 0x3F)); + } else // this happens if there was an invalid character (n >= (1<<16)) + return 0; + con(5); + break; + default: + return 0; + } +#undef ap + return 1; +} + + +static tn_element tn_json_parse_string(char **buf, int *len) { + con(1); // consume " character + lbuf b; + lbuf_init(b); + while(1) { + if(!*len) + goto err; + // Complete string, return + if(**buf == '"') { + con(1); + lbuf_append_c(b, 0); + tn_element el; + el.type = TN_VT_STR; + el.v.s = b.dat; + return el; + } + // Control characters are not allowed + if((unsigned char)**buf <= 0x1F || (unsigned char)**buf == 0x7F) + goto err; + // backspace + if(**buf == '\\') { + con(1); + if(!*len || !tn_json_parse_string_esc(buf, len, &b)) + goto err; + } else { + // Normal character + lbuf_append_c(b, **buf); + con(1); + } + } +err: + lbuf_free(b); + return tn_json_invalid; +} + + +// flags: +// 1: allow a leading zero. (i.e. !1 allows '0' but not '0xxx') +// 2: allow a leading '-' sign +// 4: allow a leading '+' sign +// Result is stored in *res, number of digits used in *dig, number of (least +// significant) digits ignored in *ign. (So actual value is in the order of +// res*(10^ign)). Returns 0 on error, 1 otherwise. +static inline int tn_json_parse_int(char **buf, int *len, int flags, int64_t *res, int *dig, int *ign) { + // Special-case -2^63, it's not handled by the code below. (And luckily it + // only has a single representation in JSON. Unless allowzero is true... + // but you don't want -2^63 as an exponent and allowsign is disabled for + // the fractional part.) + if((flags & 2) && *len >= 20 && strncmp(*buf, "-9223372036854775808", 20) == 0) { + con(20); + *res = INT64_MIN; + *dig = 19; + *ign = 0; + return 1; + } + *res = 0; + *ign = *dig = 0; + int hassign = 0; // 1 == -, 2 == + + int haszero = 0; + while(1) { + if(!*len) + break; + if(**buf == '+') { + if(hassign || *dig || !(flags & 4)) + break; + hassign = 2; + } else if(**buf == '-') { + if(hassign || *dig || !(flags & 2)) + break; + hassign = 1; + } else if(**buf >= '0' && **buf <= '9') { + if(haszero && !(flags & 1)) + break; + if(!*dig && **buf == '0') + haszero = 1; + if(*ign) + (*ign)++; + else { + int64_t n = ((*res)*10) + **buf-'0'; + if(n < *res) // overflow + (*ign)++; + else { + (*dig)++; + *res = n; + } + } + } else + break; + con(1); + } + if(hassign == 1) + *res = -1*(*res); + return *dig > 0; +} + + +static tn_element tn_json_parse_num(char **buf, int *len) { + int64_t i, d = 0, e = 0; + int id, ii, dd = 0, di, ed, ei; + // Integer part + if(!tn_json_parse_int(buf, len, 2, &i, &id, &ii)) + return tn_json_invalid; + // Decimal + if(len && **buf == '.') { + con(1); + if(!tn_json_parse_int(buf, len, 1, &d, &dd, &di)) + return tn_json_invalid; + } + // Exponent + if(len && (**buf == 'e' || **buf == 'E')) { + con(1); + if(!tn_json_parse_int(buf, len, 1|2|4, &e, &ed, &ei)) + return tn_json_invalid; + } + // Create element + tn_element el; + if(!d && !e && !ii) { // fits into a regular int64 + el.type = TN_VT_INT; + el.v.i = i; + } else { + el.type = TN_VT_NUM; + el.v.n = (double)i; + // Can these operations be done more efficiently by playing around with the bits instead of using pow()? + if(e || ii) { // exponent + el.v.n *= pow(10.0, (double)(e + ii)); + if(el.v.n == HUGE_VAL) + return tn_json_invalid; + } + if(d && !ii) // decimal (don't bother if ii>0) + el.v.n += (double)(i < 0 ? -d : d) * pow(10.0, (double)(e - dd)); + } + return el; +} + + +static tn_element tn_json_parse_array(char **buf, int *len) { + con(1); // consume '[' character + tn_element el = tn_array_new(""); + int first = 1; + while(1) { + cons(); + if(!*len) + goto err; + // end of array, return. + if(**buf == ']') { + con(1); + return el; + } + // consume comma + if(first) + first = 0; + else { + if(**buf != ',') + goto err; + con(1); + cons(); + if(!*len) + goto err; + } + // get next value + tn_array_grow(&el, 1); + el.v.a[el.count] = tn_json_parse_val(buf, len); + if(!el.v.a[el.count].type) + goto err; + el.count++; + } +err: + tn_element_free(el); + return tn_json_invalid; +} + + +static tn_element tn_json_parse_map(char **buf, int *len) { + con(1); // consume '{' character + tn_element el = tn_map_new(""); + int first = 1; + while(1) { + cons(); + if(!*len) + goto err; + // end of map, return. + if(**buf == '}') { + con(1); + return el; + } + // consume comma + if(first) + first = 0; + else { + if(**buf != ',') + goto err; + con(1); + cons(); + if(!*len) + goto err; + } + // get key + tn_map_grow(&el, 1); + if(**buf != '"') + goto err; + tn_element key = tn_json_parse_string(buf, len); + if(!key.type) + goto err; + el.v.m[el.count].key = key.v.s; + // consume separator + cons(); + if(!*len || **buf != ':') + goto err; + con(1); + cons(); + if(!*len) + goto err; + // get value + el.v.m[el.count].val = tn_json_parse_val(buf, len); + if(!el.v.m[el.count].val.type) { + free(key.v.s); + goto err; + } + el.count++; + } +err: + tn_element_free(el); + return tn_json_invalid; +} + + +static inline tn_element tn_json_parse_val(char **buf, int *len) { + if(*len >= 2 && **buf == '[') + return tn_json_parse_array(buf, len); + else if(*len >= 2 && **buf == '"') + return tn_json_parse_string(buf, len); + else if(*len >= 2 && **buf == '{') + return tn_json_parse_map(buf, len); + else if(*len >= 1 && (**buf == '-' || (**buf >= '0' && **buf <= '9'))) + return tn_json_parse_num(buf, len); + else if(*len >= 4 && strncmp(*buf, "true", 4) == 0) { + con(4); + tn_element el; + el.type = TN_VT_INT; + el.v.i = 1; + return el; + } else if(*len >= 5 && strncmp(*buf, "false", 5) == 0) { + con(5); + tn_element el; + el.type = TN_VT_INT; + el.v.i = 0; + return el; + } else if(*len >= 4 && strncmp(*buf, "null", 4) == 0) { + con(4); + tn_element el; + el.type = TN_VT_WC; + return el; + } + return tn_json_invalid; +} + + +// Returns NULL on error. If not NULL, *rd will indicate the number of bytes +// that have been read from the buffer. If there was an error, this points to +// the problem. +tn_tuple *tn_json_parse(const char *_buf, int _len, int *rd) { + int l = _len; + int *len = &l; + char *b = (char *)_buf; + char **buf = &b; + cons(); + if(*len < 2 || **buf != '[') + return NULL; + tn_element el = tn_json_parse_array(buf, len); + if(rd) + *rd = _len - l; + // Convert array into tuple + if(el.type) { + tn_tuple *tup = malloc(offsetof(tn_tuple, e) + el.count*sizeof(tn_element)); + tup->n = el.count; + tup->ref = 1; + memcpy(&tup->e, el.v.a, el.count*sizeof(tn_element)); + if(el.size) + free(el.v.a); + return tup; + } + return NULL; +} + +#undef con +#undef cons + + + + + + // Return-path struct tn_returnpath { @@ -82,6 +82,7 @@ tn_tuple *tn_tuple_new(char *, ...); int tn_tuple_match(tn_tuple *, tn_tuple *); char *tn_json_fmt(tn_tuple *); +tn_tuple *tn_json_parse(const char *, int, int *); void tn_reply(tn_returnpath *, tn_tuple *); void tn_reply_close(tn_returnpath *rp); @@ -24,7 +24,72 @@ void print_error(tn_link *l, int code, char *msg) { } +// Not really a "test", as it still requires human verification. Definitely useful for finding bugs, though. +static void test_tuples() { + puts("==> Start of tuple/JSON test"); +#define t(s) do {\ + int rd = 0;\ + printf("\n%3d: %s\n", (int)strlen(s), s);\ + tn_tuple *u = tn_json_parse(s, strlen(s), &rd);\ + printf("%3d: ", rd);\ + if(!u)\ + puts("(invalid)");\ + else {\ + char *f = tn_json_fmt(u);\ + if(!f)\ + puts("(format error!)");\ + else {\ + puts(f);\ + free(f);\ + }\ + tn_tuple_unref(u);\ + }\ + } while(0) + // TODO: also test invalid JSON, of course + t("[]"); + t("[null]"); + t("[true]"); + t("[false]"); + t("[1]"); + t("[1.0]"); + t("[0.23]"); + t("[1.2]"); + t("[-1.5]"); + t("[10e-1]"); + t("[10E-1]"); + t("[10e+1]"); + t("[10e10]"); + t("[10e0010]"); + t("[10.001e0010]"); + t("[-9223372036854775808]"); + t("[123456789012345678901234567890]"); + t("[0.123456789012345678901234567890]"); + t("[123456789012345678901234567890.123456789012345678901234567890]"); + t("[1e-9223372036854775808]"); + t("[0.0e0]"); + t("[1.5e1]"); + t("[ null ,\t0 ]"); + t("[\"a\"]"); + t("[\"a¿月\"]"); + t("[\"\\u0061\\u00BF\\u6708\"]"); // same as above + t("[\"\\\\ \\t \\b \\r \\n \\f \\/ '\\\"\"]"); + t("[[[]]]"); + t("[ [ 1 ] ]"); + t("[{}]"); + t("[ { } ]"); + t("[{\"\":true}]"); + t("[{\"a\":null,\"b\":\"str\"}]"); + t("[{\"1\" : \"1\" , \"\\t\" \t : 0 } ]"); +#undef t + puts(""); + puts("==> End of tuple/JSON test"); + puts(""); +} + + int main() { + test_tuples(); + return 0; tn_tuple *t = tn_tuple_new("i*sam", 1293, strdup("som\\\"e_strin\01g"), tn_array_new("i", INT64_MIN), tn_map_new("sn", strdup("k\ney"), strdup("va\rlue"), strdup("number\""), 1.5e10) |