summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.am10
-rw-r--r--deps/yxml.c1024
-rw-r--r--deps/yxml.h134
-rw-r--r--src/fl_load.c412
-rw-r--r--src/xmlread.c537
5 files changed, 1360 insertions, 757 deletions
diff --git a/Makefile.am b/Makefile.am
index 7cf3459..c9186c0 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,7 +1,7 @@
EXTRA_DIST=ChangeLog
noinst_PROGRAMS=
AM_CFLAGS=$(GLIB_CFLAGS) $(GNUTLS_CFLAGS) $(SQLITE_CFLAGS)
-AM_CPPFLAGS=-I$(builddir)/src -I$(srcdir)/deps/ylib
+AM_CPPFLAGS=-I$(builddir)/src -I$(srcdir)/deps -I$(srcdir)/deps/ylib
@@ -37,8 +37,8 @@ makeheaders_SOURCES=deps/makeheaders.c
noinst_LIBRARIES=libdeps.a
-libdeps_a_SOURCES=deps/ylib/yuri.c
-EXTRA_DIST+=deps/ylib/yuri.h
+libdeps_a_SOURCES=deps/ylib/yuri.c deps/yxml.c
+EXTRA_DIST+=deps/ylib/yuri.h deps/yxml.h
bin_PROGRAMS=ncdc
@@ -72,8 +72,7 @@ ncdc_SOURCES=\
src/uit_userlist.c\
src/ui_util.c\
src/util.c\
- src/vars.c\
- src/xmlread.c
+ src/vars.c
auto_headers=$(ncdc_SOURCES:.c=.h)
noinst_HEADERS=src/doc.h src/ncdc.h
@@ -138,4 +137,3 @@ src/uit_userlist.$(OBJEXT): src/uit_userlist.h
src/ui_util.$(OBJEXT): src/ui_util.h
src/util.$(OBJEXT): src/util.h
src/vars.$(OBJEXT): src/vars.h
-src/xmlread.$(OBJEXT): src/xmlread.h
diff --git a/deps/yxml.c b/deps/yxml.c
new file mode 100644
index 0000000..17c2dd2
--- /dev/null
+++ b/deps/yxml.c
@@ -0,0 +1,1024 @@
+/* THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT! */
+
+/* Copyright (c) 2013 Yoran Heling
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <yxml.h>
+#include <string.h>
+
+typedef enum {
+ YXMLS_string,
+ YXMLS_attr0,
+ YXMLS_attr1,
+ YXMLS_attr2,
+ YXMLS_attr3,
+ YXMLS_attr4,
+ YXMLS_cd0,
+ YXMLS_cd1,
+ YXMLS_cd2,
+ YXMLS_comment0,
+ YXMLS_comment1,
+ YXMLS_comment2,
+ YXMLS_comment3,
+ YXMLS_comment4,
+ YXMLS_dt0,
+ YXMLS_dt1,
+ YXMLS_dt2,
+ YXMLS_dt3,
+ YXMLS_dt4,
+ YXMLS_elem0,
+ YXMLS_elem1,
+ YXMLS_elem2,
+ YXMLS_elem3,
+ YXMLS_enc0,
+ YXMLS_enc1,
+ YXMLS_enc2,
+ YXMLS_enc3,
+ YXMLS_etag0,
+ YXMLS_etag1,
+ YXMLS_etag2,
+ YXMLS_init,
+ YXMLS_le0,
+ YXMLS_le1,
+ YXMLS_le2,
+ YXMLS_le3,
+ YXMLS_lee1,
+ YXMLS_lee2,
+ YXMLS_leq0,
+ YXMLS_misc0,
+ YXMLS_misc1,
+ YXMLS_misc2,
+ YXMLS_misc2a,
+ YXMLS_misc3,
+ YXMLS_pi0,
+ YXMLS_pi1,
+ YXMLS_pi2,
+ YXMLS_pi3,
+ YXMLS_pi4,
+ YXMLS_std0,
+ YXMLS_std1,
+ YXMLS_std2,
+ YXMLS_std3,
+ YXMLS_ver0,
+ YXMLS_ver1,
+ YXMLS_ver2,
+ YXMLS_ver3,
+ YXMLS_xmldecl0,
+ YXMLS_xmldecl1,
+ YXMLS_xmldecl2,
+ YXMLS_xmldecl3,
+ YXMLS_xmldecl4,
+ YXMLS_xmldecl5,
+ YXMLS_xmldecl6,
+ YXMLS_xmldecl7
+} yxml_state_t;
+
+
+#define yxml_isChar(c) 1
+/* 0xd should be part of SP, too, but yxml_parse() already normalizes that into 0xa */
+#define yxml_isSP(c) (c == 0x20 || c == 0x09 || c == 0x0a)
+#define yxml_isAlpha(c) ((c|32)-'a' < 26)
+#define yxml_isNum(c) (c-'0' < 10)
+#define yxml_isHex(c) (yxml_isNum(c) || (c|32)-'a' < 6)
+#define yxml_isEncName(c) (yxml_isAlpha(c) || yxml_isNum(c) || c == '.' || c == '_' || c == '-')
+#define yxml_isNameStart(c) (yxml_isAlpha(c) || c == ':' || c == '_' || c >= 128)
+#define yxml_isName(c) (yxml_isNameStart(c) || yxml_isNum(c) || c == '-' || c == '.')
+/* XXX: The valid characters are dependent on the quote char, hence the access to x->quote */
+#define yxml_isAttValue(c) (yxml_isChar(c) && c != x->quote && c != '<' && c != '&')
+/* Anything between '&' and ';', the yxml_ref* functions will do further
+ * validation. Strictly speaking, this is "yxml_isName(c) || c == '#'", but
+ * this parser doesn't understand entities with '.', ':', etc, anwyay. */
+#define yxml_isRef(c) (yxml_isNum(c) || yxml_isAlpha(c) || c == '#')
+
+
+/* Set the given char value to ch (0<=ch<=255).
+ * This can't be done with simple assignment because char may be signed, and
+ * unsigned-to-signed overflow is implementation defined in C. This function
+ * /looks/ inefficient, but gcc compiles it down to a single movb instruction
+ * on x86, even with -O0. */
+static inline void yxml_setchar(char *dest, unsigned ch) {
+ unsigned char _ch = ch;
+ memcpy(dest, &_ch, 1);
+}
+
+
+/* Similar to yxml_setchar(), but will convert ch (any valid unicode point) to
+ * UTF-8 and appends a '\0'. dest must have room for at least 5 bytes. */
+static void yxml_setutf8(char *dest, unsigned ch) {
+ if(ch <= 0x007F)
+ yxml_setchar(dest++, ch);
+ else if(ch <= 0x07FF) {
+ yxml_setchar(dest++, 0xC0 | (ch>>6));
+ yxml_setchar(dest++, 0x80 | (ch & 0x3F));
+ } else if(ch <= 0xFFFF) {
+ yxml_setchar(dest++, 0xE0 | (ch>>12));
+ yxml_setchar(dest++, 0x80 | ((ch>>6) & 0x3F));
+ yxml_setchar(dest++, 0x80 | (ch & 0x3F));
+ } else {
+ yxml_setchar(dest++, 0xF0 | (ch>>18));
+ yxml_setchar(dest++, 0x80 | ((ch>>12) & 0x3F));
+ yxml_setchar(dest++, 0x80 | ((ch>>6) & 0x3F));
+ yxml_setchar(dest++, 0x80 | (ch & 0x3F));
+ }
+ *dest = 0;
+}
+
+
+static inline int yxml_datacontent(yxml_t *x, unsigned ch) {
+ yxml_setchar(x->data, ch);
+ x->data[1] = 0;
+ return YXML_CONTENT;
+}
+
+
+static inline int yxml_datapi1(yxml_t *x, unsigned ch) {
+ yxml_setchar(x->data, ch);
+ x->data[1] = 0;
+ return YXML_PICONTENT;
+}
+
+
+static inline int yxml_datapi2(yxml_t *x, unsigned ch) {
+ x->data[0] = '?';
+ yxml_setchar(x->data+1, ch);
+ x->data[2] = 0;
+ return YXML_PICONTENT;
+}
+
+
+static inline int yxml_datacd1(yxml_t *x, unsigned ch) {
+ x->data[0] = ']';
+ yxml_setchar(x->data+1, ch);
+ x->data[2] = 0;
+ return YXML_CONTENT;
+}
+
+
+static inline int yxml_datacd2(yxml_t *x, unsigned ch) {
+ x->data[0] = ']';
+ x->data[1] = ']';
+ yxml_setchar(x->data+2, ch);
+ x->data[3] = 0;
+ return YXML_CONTENT;
+}
+
+
+static inline int yxml_dataattr(yxml_t *x, unsigned ch) {
+ /* Normalize attribute values according to the XML spec section 3.3.3. */
+ yxml_setchar(x->data, ch == 0x9 || ch == 0xa ? 0x20 : ch);
+ x->data[1] = 0;
+ return YXML_ATTRVAL;
+}
+
+
+static int yxml_pushstack(yxml_t *x, char **res, unsigned ch) {
+ if(x->stacklen+2 >= x->stacksize)
+ return YXML_ESTACK;
+ x->stacklen++;
+ *res = (char *)x->stack+x->stacklen;
+ x->stack[x->stacklen] = ch;
+ x->stacklen++;
+ x->stack[x->stacklen] = 0;
+ return YXML_OK;
+}
+
+
+static int yxml_pushstackc(yxml_t *x, unsigned ch) {
+ if(x->stacklen+1 >= x->stacksize)
+ return YXML_ESTACK;
+ x->stack[x->stacklen] = ch;
+ x->stacklen++;
+ x->stack[x->stacklen] = 0;
+ return YXML_OK;
+}
+
+
+static void yxml_popstack(yxml_t *x) {
+ do
+ x->stacklen--;
+ while(x->stack[x->stacklen]);
+}
+
+
+static inline int yxml_elemstart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->elem, ch); }
+static inline int yxml_elemname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); }
+static inline int yxml_elemnameend(yxml_t *x, unsigned ch) { return YXML_ELEMSTART; }
+
+
+/* Also used in yxml_elemcloseend(), since this function just removes the last
+ * element from the stack and returns ELEMEND. */
+static int yxml_selfclose(yxml_t *x, unsigned ch) {
+ yxml_popstack(x);
+ if(x->stacklen) {
+ x->elem = (char *)x->stack+x->stacklen-1;
+ while(*(x->elem-1))
+ x->elem--;
+ return YXML_ELEMEND;
+ }
+ x->elem = (char *)x->stack;
+ x->state = YXMLS_misc3;
+ return YXML_ELEMEND;
+}
+
+
+static inline int yxml_elemclose(yxml_t *x, unsigned ch) {
+ if(*((unsigned char *)x->elem) != ch)
+ return YXML_ECLOSE;
+ x->elem++;
+ return YXML_OK;
+}
+
+
+static inline int yxml_elemcloseend(yxml_t *x, unsigned ch) {
+ if(*x->elem)
+ return YXML_ECLOSE;
+ return yxml_selfclose(x, ch);
+}
+
+
+static inline int yxml_attrstart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->attr, ch); }
+static inline int yxml_attrname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); }
+static inline int yxml_attrnameend(yxml_t *x, unsigned ch) { return YXML_ATTRSTART; }
+static inline int yxml_attrvalend (yxml_t *x, unsigned ch) { yxml_popstack(x); return YXML_ATTREND; }
+
+
+static inline int yxml_pistart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->pi, ch); }
+static inline int yxml_piname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); }
+static inline int yxml_pinameend(yxml_t *x, unsigned ch) {
+ return (x->pi[0]|32) == 'x' && (x->pi[1]|32) == 'm' && (x->pi[2]|32) == 'l' && !x->pi[3] ? YXML_ESYN : YXML_PISTART;
+}
+static inline int yxml_pivalend (yxml_t *x, unsigned ch) { yxml_popstack(x); x->pi = (char *)x->stack; return YXML_PIEND; }
+
+
+static inline int yxml_refstart(yxml_t *x, unsigned ch) {
+ memset(x->data, 0, sizeof(x->data));
+ x->reflen = 0;
+ return YXML_OK;
+}
+
+
+static int yxml_ref(yxml_t *x, unsigned ch) {
+ if(x->reflen >= sizeof(x->data)-1)
+ return YXML_EREF;
+ yxml_setchar(x->data+x->reflen, ch);
+ x->reflen++;
+ return YXML_OK;
+}
+
+
+static int yxml_refend(yxml_t *x, int ret) {
+ unsigned char *r = (unsigned char *)x->data;
+ unsigned ch = 0;
+ if(*r == '#') {
+ if(r[1] == 'x')
+ for(r += 2; yxml_isHex((unsigned)*r); r++)
+ ch = (ch<<4) + (*r <= '9' ? *r-'0' : (*r|32)-'a' + 10);
+ else
+ for(r++; yxml_isNum((unsigned)*r); r++)
+ ch = (ch*10) + (*r-'0');
+ if(*r)
+ ch = 0;
+ } else {
+ uint64_t ri;
+ memcpy(&ri, r, 8);
+ if(ri == *((uint64_t *)"lt\0\0\0\0\0"))
+ ch = '<';
+ else if(ri == *((uint64_t *)"gt\0\0\0\0\0"))
+ ch = '>';
+ else if(ri == *((uint64_t *)"amp\0\0\0\0"))
+ ch = '&';
+ else if(ri == *((uint64_t *)"apos\0\0\0"))
+ ch = '\'';
+ else if(ri == *((uint64_t *)"quot\0\0\0"))
+ ch = '"';
+ }
+
+ /* Codepoints not allowed in the XML 1.1 definition of a Char */
+ if(!ch || ch > 0x10FFFF || ch == 0xFFFE || ch == 0xFFFF || (ch-0xDFFF) < 0x7FF)
+ return YXML_EREF;
+ yxml_setutf8(x->data, ch);
+ return ret;
+}
+
+
+static inline int yxml_refcontent(yxml_t *x, unsigned ch) { return yxml_refend(x, YXML_CONTENT); }
+static inline int yxml_refattrval(yxml_t *x, unsigned ch) { return yxml_refend(x, YXML_ATTRVAL); }
+
+
+void yxml_init(yxml_t *x, char *stack, size_t stacksize) {
+ memset(x, 0, sizeof(*x));
+ x->line = 1;
+ x->stack = (unsigned char *)stack;
+ x->stacksize = stacksize;
+ *x->stack = 0;
+ x->elem = x->pi = (char *)x->stack;
+ x->state = YXMLS_init;
+}
+
+
+yxml_ret_t yxml_parse(yxml_t *x, int _ch) {
+ /* Ensure that characters are in the range of 0..255 rather than -126..125.
+ * All character comparisons are done with positive integers. */
+ unsigned ch = (unsigned)(_ch+256) & 0xff;
+ if(!ch)
+ return YXML_ESYN;
+ x->total++;
+
+ /* End-of-Line normalization, "\rX", "\r\n" and "\n" are recognized and
+ * normalized to a single '\n' as per XML 1.0 section 2.11. XML 1.1 adds
+ * some non-ASCII character sequences to this list, but we can only handle
+ * ASCII here without making assumptions about the input encoding. */
+ if(x->ignore == ch) {
+ x->ignore = 0;
+ return YXML_OK;
+ }
+ x->ignore = (ch == 0xd) * 0xa;
+ if(ch == 0xa || ch == 0xd) {
+ ch = 0xa;
+ x->line++;
+ x->byte = 0;
+ }
+ x->byte++;
+
+ switch((yxml_state_t)x->state) {
+ case YXMLS_string:
+ if(ch == *x->string) {
+ x->string++;
+ if(!*x->string)
+ x->state = x->nextstate;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_attr0:
+ if(yxml_isName(ch))
+ return yxml_attrname(x, ch);
+ if(yxml_isSP(ch)) {
+ x->state = YXMLS_attr1;
+ return yxml_attrnameend(x, ch);
+ }
+ if(ch == (unsigned char)'=') {
+ x->state = YXMLS_attr2;
+ return yxml_attrnameend(x, ch);
+ }
+ break;
+ case YXMLS_attr1:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'=') {
+ x->state = YXMLS_attr2;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_attr2:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
+ x->state = YXMLS_attr3;
+ x->quote = ch;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_attr3:
+ if(yxml_isAttValue(ch))
+ return yxml_dataattr(x, ch);
+ if(ch == (unsigned char)'&') {
+ x->state = YXMLS_attr4;
+ return yxml_refstart(x, ch);
+ }
+ if(x->quote == ch) {
+ x->state = YXMLS_elem2;
+ return yxml_attrvalend(x, ch);
+ }
+ break;
+ case YXMLS_attr4:
+ if(yxml_isRef(ch))
+ return yxml_ref(x, ch);
+ if(ch == (unsigned char)'\x3b') {
+ x->state = YXMLS_attr3;
+ return yxml_refattrval(x, ch);
+ }
+ break;
+ case YXMLS_cd0:
+ if(ch == (unsigned char)']') {
+ x->state = YXMLS_cd1;
+ return YXML_OK;
+ }
+ if(yxml_isChar(ch))
+ return yxml_datacontent(x, ch);
+ break;
+ case YXMLS_cd1:
+ if(ch == (unsigned char)']') {
+ x->state = YXMLS_cd2;
+ return YXML_OK;
+ }
+ if(yxml_isChar(ch)) {
+ x->state = YXMLS_cd0;
+ return yxml_datacd1(x, ch);
+ }
+ break;
+ case YXMLS_cd2:
+ if(ch == (unsigned char)']')
+ return yxml_datacontent(x, ch);
+ if(ch == (unsigned char)'>') {
+ x->state = YXMLS_misc2;
+ return YXML_OK;
+ }
+ if(yxml_isChar(ch)) {
+ x->state = YXMLS_cd0;
+ return yxml_datacd2(x, ch);
+ }
+ break;
+ case YXMLS_comment0:
+ if(ch == (unsigned char)'-') {
+ x->state = YXMLS_comment1;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_comment1:
+ if(ch == (unsigned char)'-') {
+ x->state = YXMLS_comment2;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_comment2:
+ if(ch == (unsigned char)'-') {
+ x->state = YXMLS_comment3;
+ return YXML_OK;
+ }
+ if(yxml_isChar(ch))
+ return YXML_OK;
+ break;
+ case YXMLS_comment3:
+ if(ch == (unsigned char)'-') {
+ x->state = YXMLS_comment4;
+ return YXML_OK;
+ }
+ if(yxml_isChar(ch)) {
+ x->state = YXMLS_comment2;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_comment4:
+ if(ch == (unsigned char)'>') {
+ x->state = x->nextstate;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_dt0:
+ if(ch == (unsigned char)'>') {
+ x->state = YXMLS_misc1;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
+ x->state = YXMLS_dt1;
+ x->quote = ch;
+ x->nextstate = YXMLS_dt0;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'<') {
+ x->state = YXMLS_dt2;
+ return YXML_OK;
+ }
+ if(yxml_isChar(ch))
+ return YXML_OK;
+ break;
+ case YXMLS_dt1:
+ if(x->quote == ch) {
+ x->state = x->nextstate;
+ return YXML_OK;
+ }
+ if(yxml_isChar(ch))
+ return YXML_OK;
+ break;
+ case YXMLS_dt2:
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_pi0;
+ x->nextstate = YXMLS_dt0;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'!') {
+ x->state = YXMLS_dt3;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_dt3:
+ if(ch == (unsigned char)'-') {
+ x->state = YXMLS_comment1;
+ x->nextstate = YXMLS_dt0;
+ return YXML_OK;
+ }
+ if(yxml_isChar(ch)) {
+ x->state = YXMLS_dt4;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_dt4:
+ if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
+ x->state = YXMLS_dt1;
+ x->quote = ch;
+ x->nextstate = YXMLS_dt4;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'>') {
+ x->state = YXMLS_dt0;
+ return YXML_OK;
+ }
+ if(yxml_isChar(ch))
+ return YXML_OK;
+ break;
+ case YXMLS_elem0:
+ if(yxml_isName(ch))
+ return yxml_elemname(x, ch);
+ if(yxml_isSP(ch)) {
+ x->state = YXMLS_elem1;
+ return yxml_elemnameend(x, ch);
+ }
+ if(ch == (unsigned char)'/') {
+ x->state = YXMLS_elem3;
+ return yxml_elemnameend(x, ch);
+ }
+ if(ch == (unsigned char)'>') {
+ x->state = YXMLS_misc2;
+ return yxml_elemnameend(x, ch);
+ }
+ break;
+ case YXMLS_elem1:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'/') {
+ x->state = YXMLS_elem3;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'>') {
+ x->state = YXMLS_misc2;
+ return YXML_OK;
+ }
+ if(yxml_isNameStart(ch)) {
+ x->state = YXMLS_attr0;
+ return yxml_attrstart(x, ch);
+ }
+ break;
+ case YXMLS_elem2:
+ if(yxml_isSP(ch)) {
+ x->state = YXMLS_elem1;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'/') {
+ x->state = YXMLS_elem3;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'>') {
+ x->state = YXMLS_misc2;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_elem3:
+ if(ch == (unsigned char)'>') {
+ x->state = YXMLS_misc2;
+ return yxml_selfclose(x, ch);
+ }
+ break;
+ case YXMLS_enc0:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'=') {
+ x->state = YXMLS_enc1;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_enc1:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
+ x->state = YXMLS_enc2;
+ x->quote = ch;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_enc2:
+ if(yxml_isAlpha(ch)) {
+ x->state = YXMLS_enc3;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_enc3:
+ if(yxml_isEncName(ch))
+ return YXML_OK;
+ if(x->quote == ch) {
+ x->state = YXMLS_xmldecl4;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_etag0:
+ if(yxml_isNameStart(ch)) {
+ x->state = YXMLS_etag1;
+ return yxml_elemclose(x, ch);
+ }
+ break;
+ case YXMLS_etag1:
+ if(yxml_isName(ch))
+ return yxml_elemclose(x, ch);
+ if(yxml_isSP(ch)) {
+ x->state = YXMLS_etag2;
+ return yxml_elemcloseend(x, ch);
+ }
+ if(ch == (unsigned char)'>') {
+ x->state = YXMLS_misc2;
+ return yxml_elemcloseend(x, ch);
+ }
+ break;
+ case YXMLS_etag2:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'>') {
+ x->state = YXMLS_misc2;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_init:
+ if(ch == (unsigned char)'\xef') {
+ x->state = YXMLS_string;
+ x->nextstate = YXMLS_misc0;
+ x->string = (unsigned char *)"\xbb\xbf";
+ return YXML_OK;
+ }
+ if(yxml_isSP(ch)) {
+ x->state = YXMLS_misc0;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'<') {
+ x->state = YXMLS_le0;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_le0:
+ if(ch == (unsigned char)'!') {
+ x->state = YXMLS_lee1;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_leq0;
+ return YXML_OK;
+ }
+ if(yxml_isNameStart(ch)) {
+ x->state = YXMLS_elem0;
+ return yxml_elemstart(x, ch);
+ }
+ break;
+ case YXMLS_le1:
+ if(ch == (unsigned char)'!') {
+ x->state = YXMLS_lee1;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_pi0;
+ x->nextstate = YXMLS_misc1;
+ return YXML_OK;
+ }
+ if(yxml_isNameStart(ch)) {
+ x->state = YXMLS_elem0;
+ return yxml_elemstart(x, ch);
+ }
+ break;
+ case YXMLS_le2:
+ if(ch == (unsigned char)'!') {
+ x->state = YXMLS_lee2;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_pi0;
+ x->nextstate = YXMLS_misc2;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'/') {
+ x->state = YXMLS_etag0;
+ return YXML_OK;
+ }
+ if(yxml_isNameStart(ch)) {
+ x->state = YXMLS_elem0;
+ return yxml_elemstart(x, ch);
+ }
+ break;
+ case YXMLS_le3:
+ if(ch == (unsigned char)'!') {
+ x->state = YXMLS_comment0;
+ x->nextstate = YXMLS_misc3;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_pi0;
+ x->nextstate = YXMLS_misc3;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_lee1:
+ if(ch == (unsigned char)'-') {
+ x->state = YXMLS_comment1;
+ x->nextstate = YXMLS_misc1;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'D') {
+ x->state = YXMLS_string;
+ x->nextstate = YXMLS_dt0;
+ x->string = (unsigned char *)"OCTYPE";
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_lee2:
+ if(ch == (unsigned char)'-') {
+ x->state = YXMLS_comment1;
+ x->nextstate = YXMLS_misc2;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'[') {
+ x->state = YXMLS_string;
+ x->nextstate = YXMLS_cd0;
+ x->string = (unsigned char *)"CDATA[";
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_leq0:
+ if(ch == (unsigned char)'x') {
+ x->state = YXMLS_string;
+ x->nextstate = YXMLS_xmldecl0;
+ x->string = (unsigned char *)"ml";
+ return YXML_OK;
+ }
+ if(yxml_isNameStart(ch)) {
+ x->state = YXMLS_pi1;
+ x->nextstate = YXMLS_misc1;
+ return yxml_pistart(x, ch);
+ }
+ break;
+ case YXMLS_misc0:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'<') {
+ x->state = YXMLS_le0;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_misc1:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'<') {
+ x->state = YXMLS_le1;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_misc2:
+ if(ch == (unsigned char)'<') {
+ x->state = YXMLS_le2;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'&') {
+ x->state = YXMLS_misc2a;
+ return yxml_refstart(x, ch);
+ }
+ if(yxml_isChar(ch))
+ return yxml_datacontent(x, ch);
+ break;
+ case YXMLS_misc2a:
+ if(yxml_isRef(ch))
+ return yxml_ref(x, ch);
+ if(ch == (unsigned char)'\x3b') {
+ x->state = YXMLS_misc2;
+ return yxml_refcontent(x, ch);
+ }
+ break;
+ case YXMLS_misc3:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'<') {
+ x->state = YXMLS_le3;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_pi0:
+ if(yxml_isNameStart(ch)) {
+ x->state = YXMLS_pi1;
+ return yxml_pistart(x, ch);
+ }
+ break;
+ case YXMLS_pi1:
+ if(yxml_isName(ch))
+ return yxml_piname(x, ch);
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_pi4;
+ return yxml_pinameend(x, ch);
+ }
+ if(yxml_isSP(ch)) {
+ x->state = YXMLS_pi2;
+ return yxml_pinameend(x, ch);
+ }
+ break;
+ case YXMLS_pi2:
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_pi3;
+ return YXML_OK;
+ }
+ if(yxml_isChar(ch))
+ return yxml_datapi1(x, ch);
+ break;
+ case YXMLS_pi3:
+ if(ch == (unsigned char)'>') {
+ x->state = x->nextstate;
+ return yxml_pivalend(x, ch);
+ }
+ if(yxml_isChar(ch)) {
+ x->state = YXMLS_pi2;
+ return yxml_datapi2(x, ch);
+ }
+ break;
+ case YXMLS_pi4:
+ if(ch == (unsigned char)'>') {
+ x->state = x->nextstate;
+ return yxml_pivalend(x, ch);
+ }
+ break;
+ case YXMLS_std0:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'=') {
+ x->state = YXMLS_std1;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_std1:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
+ x->state = YXMLS_std2;
+ x->quote = ch;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_std2:
+ if(ch == (unsigned char)'y') {
+ x->state = YXMLS_string;
+ x->nextstate = YXMLS_std3;
+ x->string = (unsigned char *)"es";
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'n') {
+ x->state = YXMLS_string;
+ x->nextstate = YXMLS_std3;
+ x->string = (unsigned char *)"o";
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_std3:
+ if(x->quote == ch) {
+ x->state = YXMLS_xmldecl6;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_ver0:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'=') {
+ x->state = YXMLS_ver1;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_ver1:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
+ x->state = YXMLS_string;
+ x->quote = ch;
+ x->nextstate = YXMLS_ver2;
+ x->string = (unsigned char *)"1.";
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_ver2:
+ if(yxml_isNum(ch)) {
+ x->state = YXMLS_ver3;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_ver3:
+ if(yxml_isNum(ch))
+ return YXML_OK;
+ if(x->quote == ch) {
+ x->state = YXMLS_xmldecl2;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_xmldecl0:
+ if(yxml_isSP(ch)) {
+ x->state = YXMLS_xmldecl1;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_xmldecl1:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'v') {
+ x->state = YXMLS_string;
+ x->nextstate = YXMLS_ver0;
+ x->string = (unsigned char *)"ersion";
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_xmldecl2:
+ if(yxml_isSP(ch)) {
+ x->state = YXMLS_xmldecl3;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_xmldecl7;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_xmldecl3:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_xmldecl7;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'e') {
+ x->state = YXMLS_string;
+ x->nextstate = YXMLS_enc0;
+ x->string = (unsigned char *)"ncoding";
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'s') {
+ x->state = YXMLS_string;
+ x->nextstate = YXMLS_std0;
+ x->string = (unsigned char *)"tandalone";
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_xmldecl4:
+ if(yxml_isSP(ch)) {
+ x->state = YXMLS_xmldecl5;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_xmldecl7;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_xmldecl5:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_xmldecl7;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'s') {
+ x->state = YXMLS_string;
+ x->nextstate = YXMLS_std0;
+ x->string = (unsigned char *)"tandalone";
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_xmldecl6:
+ if(yxml_isSP(ch))
+ return YXML_OK;
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_xmldecl7;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_xmldecl7:
+ if(ch == (unsigned char)'>') {
+ x->state = YXMLS_misc1;
+ return YXML_OK;
+ }
+ break;
+ }
+ return YXML_ESYN;
+}
+
+
+yxml_ret_t yxml_eof(yxml_t *x) {
+ if(x->state != YXMLS_misc3)
+ return YXML_EEOF;
+ return YXML_OK;
+}
+
+
+/* vim: set noet sw=4 ts=4: */
diff --git a/deps/yxml.h b/deps/yxml.h
new file mode 100644
index 0000000..f1ebaeb
--- /dev/null
+++ b/deps/yxml.h
@@ -0,0 +1,134 @@
+/* Copyright (c) 2013 Yoran Heling
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <stdint.h>
+#include <stddef.h>
+
+
+typedef enum {
+ YXML_EEOF = -6, /* Unexpected EOF */
+ YXML_EREF = -5, /* Invalid character or entity reference (&whatever;) */
+ YXML_ECLOSE = -4, /* Close tag does not match open tag (<Tag> .. </OtherTag>) */
+ YXML_ESTACK = -3, /* Stack overflow (too deeply nested tags or too long element/attribute name) */
+ YXML_EATTR = -2, /* Too long attribute name */
+ YXML_ESYN = -1, /* Syntax error (unexpected byte) */
+ YXML_OK = 0, /* Character consumed, no new token present */
+ YXML_ELEMSTART = 1, /* Start of an element: '<Tag ..' */
+ YXML_CONTENT = 2, /* Element content */
+ YXML_ELEMEND = 3, /* End of an element: '.. />' or '</Tag>' */
+ YXML_ATTRSTART = 4, /* Attribute: 'Name=..' */
+ YXML_ATTRVAL = 5, /* Attribute value */
+ YXML_ATTREND = 6, /* End of attribute '.."' */
+ YXML_PISTART = 7, /* Start of a processing instruction */
+ YXML_PICONTENT = 8, /* Content of a PI */
+ YXML_PIEND = 9 /* End of a processing instruction */
+} yxml_ret_t;
+
+/* When, exactly, are tokens returned?
+ *
+ * <TagName
+ * '>' ELEMSTART
+ * '/' ELEMSTART, '>' ELEMEND
+ * ' ' ELEMSTART
+ * '>'
+ * '/', '>' ELEMEND
+ * Attr
+ * '=' ATTRSTART
+ * "X ATTRVAL
+ * 'Y' ATTRVAL
+ * 'Z' ATTRVAL
+ * '"' ATTREND
+ * '>'
+ * '/', '>' ELEMEND
+ *
+ * </TagName
+ * '>' ELEMEND
+ */
+
+
+typedef struct {
+ /* PUBLIC (read-only) */
+
+ /* Name of the current element, zero-length if not in any element. Changed
+ * after YXML_ELEMSTART. The pointer will remain valid up to and including
+ * the next non-YXML_ATTR* token, the pointed-to buffer will remain valid
+ * up to and including the YXML_ELEMCLOSE for the corresponding element. */
+ char *elem;
+
+ /* The last read character(s) of an attribute value (YXML_ATTRVAL), element
+ * data (YXML_CONTENT), or processing instruction (YXML_PICONTENT). Changed
+ * after one of the respective YXML_ values is returned, and only valid
+ * until the next yxml_parse() call. Usually, this string only consists of
+ * a single byte, but multiple bytes are returned in the following cases:
+ * - "<?SomePI ?x ?>": The two characters "?x"
+ * - "<![CDATA[ ]x ]]>": The two characters "]x"
+ * - "<![CDATA[ ]]x ]]>": The three characters "]]x"
+ * - "&#N;" and "&#xN;", where dec(n) > 127. The referenced Unicode
+ * character is then encoded in multiple UTF-8 bytes.
+ */
+ char data[8];
+
+ /* Name of the current attribute. Changed after YXML_ATTRSTART, valid up to
+ * and including the next YXML_ATTREND. */
+ char *attr;
+
+ /* Name/target of the current processing instruction, zero-length if not in
+ * a PI. Changed after YXML_PISTART, valid up to (but excluding)
+ * the next YXML_PIEND. */
+ char *pi;
+
+ /* Line number, byte offset within that line, and total bytes read. These
+ * values refer to the position _after_ the last byte given to
+ * yxml_parse(). These are useful for debugging and error reporting. */
+ uint64_t byte;
+ uint64_t total;
+ uint32_t line;
+
+
+ /* PRIVATE */
+ int state;
+ unsigned char *stack; /* Stack of element names + attribute/PI name, separated by \0. Also starts with a \0. */
+ size_t stacksize, stacklen;
+ unsigned reflen;
+ unsigned quote;
+ int nextstate; /* Used for '@' state remembering and for the "string" consuming state */
+ unsigned ignore;
+ unsigned char *string;
+} yxml_t;
+
+
+void yxml_init(yxml_t *x, char *stack, size_t stacksize);
+
+
+yxml_ret_t yxml_parse(yxml_t *x, int ch);
+
+
+/* May be called after the last character has been given to yxml_parse().
+ * Returns YXML_OK if the XML document is valid, YXML_EEOF otherwise. Using
+ * this function isn't really necessary, but can be used to detect documents
+ * that don't end correctly. In particular, an error is returned when the XML
+ * document did not contain a (complete) root element, or when the document
+ * ended while in a comment or processing instruction. */
+yxml_ret_t yxml_eof(yxml_t *x);
+
+
+/* vim: set noet sw=4 ts=4: */
diff --git a/src/fl_load.c b/src/fl_load.c
index c1a4825..f86846d 100644
--- a/src/fl_load.c
+++ b/src/fl_load.c
@@ -26,6 +26,16 @@
#include "ncdc.h"
#include "fl_load.h"
+#include <yxml.h>
+
+
+#define STACKSIZE (8*1024)
+#define READBUFSIZE (32*1024)
+
+// Only used for attributes that we care about, and those tend to be short,
+// file names being the longest possible values. I am unaware of a filesystem
+// that allows filenames longer than 256 bytes, so this should be a safe value.
+#define MAXATTRVAL 1024
#define S_START 0 // waiting for <FileListing>
@@ -34,97 +44,42 @@
#define S_INDIR 3 // In a <Directory>..</Directory> or <FileListing>..</FileListing>
#define S_FILEOPEN 4 // In a <File ..>
#define S_INFILE 5 // In a <File>..</File>
-#define S_UNKNOWN 6 // In some tag we didn't recognize
-#define S_END 7 // Received </FileListing>
-typedef struct ctx_t {
- BZFILE *fh_bz;
- FILE *fh_f;
- gboolean eof;
+typedef struct ctx_t {
gboolean local;
int state;
- char *name;
char filetth[24];
gboolean filehastth;
guint64 filesize;
- gboolean dirincomplete;
+ char *name;
fl_list_t *root;
fl_list_t *cur;
int unknown_level;
-} ctx_t;
-
-static int readcb(void *context, char *buf, int len, GError **err) {
- ctx_t *x = context;
-
- if(x->fh_bz) {
- if(x->eof)
- return 0;
- int bzerr;
- int r = BZ2_bzRead(&bzerr, x->fh_bz, buf, len);
- if(bzerr != BZ_OK && bzerr != BZ_STREAM_END) {
- g_set_error(err, 1, 0, "bzip2 decompression error (%d): %s", bzerr, g_strerror(errno));
- return -1;
- }
- if(bzerr == BZ_STREAM_END)
- x->eof = TRUE;
- return r;
+ int consume;
+ char *attrp;
+ char attr[MAXATTRVAL];
- }
+ yxml_t x;
+ char stack[STACKSIZE];
+ char buf[READBUFSIZE];
+} ctx_t;
- int r = fread(buf, 1, len, x->fh_f);
- if(r < 0 && feof(x->fh_f))
- r = 0;
- if(r < 0)
- g_set_error(err, 1, 0, "Read error: %s", g_strerror(errno));
- return r;
-}
#define isvalidfilename(x) (\
!(((x)[0] == '.' && (!(x)[1] || ((x)[1] == '.' && !(x)[2])))) && !strchr((x), '/'))
-static int entitycb(void *context, int type, const char *arg1, const char *arg2, GError **err) {
- ctx_t *x = context;
- //printf("%d,%d: %s, %s\n", x->state, type, arg1, arg2);
- switch(x->state) {
-
- // The first token must always be a <FileListing>
- case S_START:
- if(type == XMLT_OPEN && g_ascii_strcasecmp(arg1, "FileListing") == 0) {
- x->state = S_FLOPEN;
- return 0;
- }
- break;
-
- // Any attributes in a <FileListing> are currently ignored.
- case S_FLOPEN:
- if(type == XMLT_ATTR)
- return 0;
- if(type == XMLT_ATTDONE) {
- x->state = S_INDIR;
- return 0;
- }
- break;
-
- // Handling the attributes of a Directory element.
- case S_DIROPEN:
- if(type == XMLT_ATTR && g_ascii_strcasecmp(arg1, "Name") == 0 && !x->name) {
- x->name = g_utf8_validate(arg2, -1, NULL) ? g_strdup(arg2) : str_convert("UTF-8", "UTF-8", arg2);
- if(!isvalidfilename(x->name)) {
- g_set_error(err, 1, 0, "Invalid directory name");
- return -1;
- }
- return 0;
- }
- if(type == XMLT_ATTDONE) {
+static void fl_load_token(ctx_t *x, yxml_ret_t r, GError **err) {
+ // Detect the end of the attributes for an open XML element.
+ if(r != YXML_ATTRSTART && r != YXML_ATTRVAL && r != YXML_ATTREND) {
+ if(x->state == S_DIROPEN) {
if(!x->name) {
- g_set_error(err, 1, 0, "Missing Name attribute in Directory element");
- return -1;
+ g_set_error_literal(err, 1, 0, "Missing Name attribute in Directory element");
+ return;
}
- // Create the directory entry
fl_list_t *new = fl_list_create(x->name, FALSE);
new->isfile = FALSE;
new->sub = g_ptr_array_new_with_free_func(fl_list_free);
@@ -134,77 +89,12 @@ static int entitycb(void *context, int type, const char *arg1, const char *arg2,
g_free(x->name);
x->name = NULL;
x->state = S_INDIR;
- return 0;
- }
- // Ignore unknown or duplicate attributes.
- if(type == XMLT_ATTR)
- return 0;
- break;
-
- // In a directory listing.
- case S_INDIR:
- if(type == XMLT_OPEN && g_ascii_strcasecmp(arg1, "Directory") == 0) {
- x->state = S_DIROPEN;
- return 0;
- }
- if(type == XMLT_OPEN && g_ascii_strcasecmp(arg1, "File") == 0) {
- x->state = S_FILEOPEN;
- return 0;
- }
- if(type == XMLT_OPEN) {
- x->state = S_UNKNOWN;
- x->unknown_level = 1;
- return 0;
- }
- if(type == XMLT_CLOSE) {
- char *expect = x->root == x->cur ? "FileListing" : "Directory";
- if(arg1 && g_ascii_strcasecmp(arg1, expect) != 0) {
- g_set_error(err, 1, 0, "Invalid close tag, expected </%s> but got </%s>", expect, arg1);
- return -1;
- }
- fl_list_sort(x->cur);
- if(x->cur == x->root)
- x->state = S_END;
- else
- x->cur = x->cur->parent;
- return 0;
- }
- break;
- // Handling the attributes of a File element. (If there are multiple
- // attributes with the same name, only the first is used.)
- case S_FILEOPEN:
- if(type == XMLT_ATTR && g_ascii_strcasecmp(arg1, "Name") == 0 && !x->name) {
- x->name = g_utf8_validate(arg2, -1, NULL) ? g_strdup(arg2) : str_convert("UTF-8", "UTF-8", arg2);
- if(!isvalidfilename(x->name)) {
- g_set_error(err, 1, 0, "Invalid file name");
- return -1;
- }
- return 0;
- }
- if(type == XMLT_ATTR && g_ascii_strcasecmp(arg1, "TTH") == 0 && !x->filehastth) {
- if(!istth(arg2)) {
- g_set_error(err, 1, 0, "Invalid TTH");
- return -1;
- }
- base32_decode(arg2, x->filetth);
- x->filehastth = TRUE;
- return 0;
- }
- if(type == XMLT_ATTR && g_ascii_strcasecmp(arg1, "Size") == 0 && x->filesize == G_MAXUINT64) {
- char *end = NULL;
- x->filesize = g_ascii_strtoull(arg2, &end, 10);
- if(!end || *end) {
- g_set_error(err, 1, 0, "Invalid file size");
- return -1;
- }
- return 0;
- }
- if(type == XMLT_ATTDONE) {
+ } else if(x->state == S_FILEOPEN) {
if(!x->name || !x->filehastth || x->filesize == G_MAXUINT64) {
g_set_error(err, 1, 0, "Missing %s attribute in File element",
!x->name ? "Name" : !x->filehastth ? "TTH" : "Size");
- return -1;
+ return;
}
// Create the file entry
fl_list_t *new = fl_list_create(x->name, x->local);
@@ -219,115 +109,209 @@ static int entitycb(void *context, int type, const char *arg1, const char *arg2,
g_free(x->name);
x->name = NULL;
x->state = S_INFILE;
- return 0;
- }
- // Ignore unknown or duplicate attributes.
- if(type == XMLT_ATTR)
- return 0;
- break;
- // In a File element. Nothing is allowed here exept a close of the File
- // element. (Really?)
- case S_INFILE:
- if(type == XMLT_CLOSE && (!arg1 || g_ascii_strcasecmp(arg1, "File") == 0)) {
+ } else if(x->state == S_FLOPEN)
x->state = S_INDIR;
- return 0;
+ }
+
+ switch(r) {
+ case YXML_ELEMSTART:
+ if(x->unknown_level)
+ x->unknown_level++;
+ else if(x->state == S_START) {
+ if(g_ascii_strcasecmp(x->x.elem, "FileListing") == 0)
+ x->state = S_FLOPEN;
+ else
+ g_set_error_literal(err, 1, 0, "XML root element is not <FileListing>");
+ } else {
+ if(g_ascii_strcasecmp(x->x.elem, "File") == 0)
+ x->state = S_FILEOPEN;
+ else if(g_ascii_strcasecmp(x->x.elem, "Directory") == 0)
+ x->state = S_DIROPEN;
+ else
+ x->unknown_level++;
}
break;
- // No idea in what kind of tag we are, just count start/end tags so we can
- // continue parsing when we're out of this unknown tag.
- case S_UNKNOWN:
- if(type == XMLT_OPEN)
- x->unknown_level++;
- else if(type == XMLT_CLOSE && !--x->unknown_level)
+ case YXML_ELEMEND:
+ if(x->unknown_level)
+ x->unknown_level--;
+ else if(x->state == S_INFILE)
x->state = S_INDIR;
- return 0;
- }
-
- g_set_error(err, 1, 0, "Unexpected token in state %s: %s, %s",
- x->state == S_START ? "START" :
- x->state == S_FLOPEN ? "FLOPEN" :
- x->state == S_DIROPEN ? "DIROPEN" :
- x->state == S_INDIR ? "INDIR" :
- x->state == S_FILEOPEN ? "FILEOPEN" :
- x->state == S_INFILE ? "INFILE" :
- x->state == S_END ? "END" : "UNKNOWN",
- type == XMLT_OPEN ? "OPEN" :
- type == XMLT_CLOSE ? "CLOSE" :
- type == XMLT_ATTR ? "ATTR" :
- type == XMLT_ATTDONE ? "ATTDONE" : "???",
- arg1 ? arg1 : "<NULL>");
- return -1;
-}
-
+ else {
+ fl_list_sort(x->cur);
+ x->cur = x->cur->parent;
+ }
+ break;
-static int ctx_open(ctx_t *x, const char *file, GError **err) {
- memset(x, 0, sizeof(ctx_t));
+ case YXML_ATTRSTART:
+ x->consume = !x->unknown_level && (
+ (x->state == S_DIROPEN && g_ascii_strcasecmp(x->x.attr, "Name") == 0) ||
+ (x->state == S_FILEOPEN && (
+ g_ascii_strcasecmp(x->x.attr, "Name") == 0 ||
+ g_ascii_strcasecmp(x->x.attr, "Size") == 0 ||
+ g_ascii_strcasecmp(x->x.attr, "TTH") == 0
+ ))
+ );
+ x->attrp = x->attr;
+ break;
- // open file
- x->fh_f = fopen(file, "r");
- if(!x->fh_f) {
- g_set_error_literal(err, 1, 0, g_strerror(errno));
- return -1;
- }
+ case YXML_ATTRVAL:
+ if(!x->consume)
+ break;
+ if(x->attrp-x->attr > sizeof(x->attr)-5) {
+ g_set_error_literal(err, 1, 0, "Too long XML attribute");
+ return;
+ }
+ char *v = x->x.data;
+ while(*v)
+ *(x->attrp++) = *(v++);
+ break;
- // open BZ2 decompression
- if(strlen(file) > 4 && strcmp(file+(strlen(file)-4), ".bz2") == 0) {
- int bzerr;
- x->fh_bz = BZ2_bzReadOpen(&bzerr, x->fh_f, 0, 0, NULL, 0);
- if(bzerr != BZ_OK) {
- g_set_error(err, 1, 0, "Unable to open bzip2 file (%d): %s", bzerr, g_strerror(errno));
- return -1;
+ case YXML_ATTREND:
+ if(!x->consume)
+ break;
+ *x->attrp = 0;
+ // Name, for either file or directory
+ if((*x->x.attr|32) == 'n' && !x->name) {
+ x->name = g_utf8_validate(x->attr, -1, NULL) ? g_strdup(x->attr) : str_convert("UTF-8", "UTF-8", x->attr);
+ if(!isvalidfilename(x->name))
+ g_set_error_literal(err, 1, 0, "Invalid file name");
}
- }
+ // TTH, for files
+ if((*x->x.attr|32) == 't' && !x->filehastth) {
+ if(!istth(x->attr))
+ g_set_error_literal(err, 1, 0, "Invalid TTH");
+ else {
+ base32_decode(x->attr, x->filetth);
+ x->filehastth = TRUE;
+ }
+ }
+ // Size, for files
+ if((*x->x.attr|32) == 's' && x->filesize == G_MAXUINT64) {
+ char *end = NULL;
+ x->filesize = g_ascii_strtoull(x->attr, &end, 10);
+ if(!end || *end)
+ g_set_error_literal(err, 1, 0, "Invalid file size");
+ }
+ break;
- return 0;
+ default:
+ break;
+ }
}
-static void ctx_close(ctx_t *x) {
- if(x->fh_bz) {
- int bzerr;
- BZ2_bzReadClose(&bzerr, x->fh_bz);
+static fl_list_t *fl_load_parse(FILE *fh, BZFILE *bzfh, gboolean local, GError **err) {
+ ctx_t *x = g_new(ctx_t, 1);
+ x->state = S_START;
+ x->root = fl_list_create("", FALSE);
+ x->root->sub = g_ptr_array_new_with_free_func(fl_list_free);
+ x->cur = x->root;
+ x->filesize = G_MAXUINT64;
+ x->local = local;
+ x->unknown_level = 0;
+ x->filehastth = FALSE;
+ x->name = NULL;
+
+ yxml_init(&x->x, x->stack, STACKSIZE);
+ int buflen = 0;
+ int bzeof = 0;
+
+ while(1) {
+ // Fill buffer
+ if(bzfh) {
+ if(bzeof)
+ break;
+ int bzerr;
+ buflen = BZ2_bzRead(&bzerr, bzfh, x->buf, READBUFSIZE);
+ if(bzerr == BZ_STREAM_END)
+ bzeof = 1;
+ else if(bzerr != BZ_OK) {
+ g_set_error(err, 1, 0, "bzip2 decompression error (%d): %s", bzerr, g_strerror(errno));
+ break;
+ }
+ } else {
+ buflen = fread(x->buf, 1, READBUFSIZE, fh);
+ if(buflen < 0 && feof(fh))
+ break;
+ if(buflen < 0) {
+ g_set_error(err, 1, 0, "Read error: %s", g_strerror(errno));
+ break;
+ }
+ }
+
+ // And parse
+ char *pbuf = x->buf;
+ while(!*err && buflen > 0) {
+ yxml_ret_t r = yxml_parse(&x->x, *pbuf);
+ pbuf++;
+ buflen--;
+ if(r == YXML_OK)
+ continue;
+ if(r < 0) {
+ g_set_error_literal(err, 1, 0, "XML parsing error");
+ break;
+ }
+ fl_load_token(x, r, err);
+ }
+ if(*err) {
+ g_prefix_error(err, "Line %"G_GUINT32_FORMAT":%"G_GUINT64_FORMAT": ", x->x.line, x->x.byte);
+ break;
+ }
}
- if(x->fh_f)
- fclose(x->fh_f);
+ if(!*err && yxml_eof(&x->x) < 0)
+ g_set_error_literal(err, 1, 0, "XML document did not end correctly");
- if(x->name)
- g_free(x->name);
+ fl_list_t *root = x->root;
+ g_free(x->name);
+ g_free(x);
+ return root;
}
fl_list_t *fl_load(const char *file, GError **err, gboolean local) {
g_return_val_if_fail(err == NULL || *err == NULL, NULL);
- ctx_t x;
+ fl_list_t *root = NULL;
+ FILE *fh;
+ BZFILE *bzfh = NULL;
GError *ierr = NULL;
- if(ctx_open(&x, file, &ierr))
+
+ // open file
+ fh = fopen(file, "r");
+ if(!fh) {
+ g_set_error_literal(&ierr, 1, 0, g_strerror(errno));
goto end;
+ }
- x.state = S_START;
- x.root = fl_list_create("", FALSE);
- x.root->sub = g_ptr_array_new_with_free_func(fl_list_free);
- x.cur = x.root;
- x.filesize = G_MAXUINT64;
- x.local = local;
+ // open BZ2 decompression
+ if(strlen(file) > 4 && strcmp(file+(strlen(file)-4), ".bz2") == 0) {
+ int bzerr;
+ bzfh = BZ2_bzReadOpen(&bzerr, fh, 0, 0, NULL, 0);
+ if(bzerr != BZ_OK) {
+ g_set_error(&ierr, 1, 0, "Unable to open bzip2 file (%d): %s", bzerr, g_strerror(errno));
+ goto end;
+ }
+ }
- if(xml_parse(entitycb, readcb, &x, &ierr))
- goto end;
+ root = fl_load_parse(fh, bzfh, local, &ierr);
end:
- g_return_val_if_fail(ierr || x.state == S_END, NULL);
- ctx_close(&x);
+ if(bzfh) {
+ int bzerr;
+ BZ2_bzReadClose(&bzerr, bzfh);
+ }
+ if(fh)
+ fclose(fh);
if(ierr) {
g_propagate_error(err, ierr);
- if(x.root)
- fl_list_free(x.root);
- x.root = NULL;
+ if(root)
+ fl_list_free(root);
+ root = NULL;
}
- return x.root;
+ return root;
}
diff --git a/src/xmlread.c b/src/xmlread.c
deleted file mode 100644
index 9c0c716..0000000
--- a/src/xmlread.c
+++ /dev/null
@@ -1,537 +0,0 @@
-/* ncdc - NCurses Direct Connect client
-
- Copyright (c) 2011-2013 Yoran Heling
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-*/
-
-/* This is a minimal XML stream parser designed for parsing ADC files.xml
- * documents. As these documents don't tend to use the full XML specification,
- * this parser lacks a few features:
- *
- * - Character entities (&#...;) are validated to be syntactically correct, but
- * are otherwise ignored.
- * - Only ASCII characters are allowed in element and attribute names, Unicode
- * characters in these constructs result in an error.
- * - The contents of attribute values are not validated to contain only
- * characters in the allowed ranges. These values are passed to the
- * application even if they don't form a valid UTF-8 sequence. The only
- * exception to this is the 0 byte, which will result in an error.
- * - Element contents (<Tag> ..contents.. </Tag>) are validated but otherwise
- * ignored.
- * - An element may have multiple attributes with the same name, it is assumed
- * that the application handles this situation.
- * - No validation is performed that open tags are properly closed. E.g.
- * "<a></b>" is valid. The application is responsible for this validation.
- * - The 'encoding' information in the <?xml ..> tag is ignored.
- * - The following features are not supported, and will result in a parse error
- * when present in the XML document:
- * - CDATA sections (<![CDATA ..)
- * - Processing instructions (<? .. ?>
- * - Document type declaration (<!DOCTYPE ..>)
- * - Attribute-list declarations (<!ATTLIST ..>)
- * - Element type declarations (<!ELEMENT ..>)
- * - Entity declarations (<!ENTITY ..>)
- * - Conditional sections (<![IGNORE .. or <![INCLUDE ..)
- * - Notation declarations (<!NOTATION ..>)
- *
- * (To my knowledge, the parser in DC++ and derivatives behave similarly).
- *
- * TODO: Since this parser is recursive, figure out some maximum bound on the
- * stack space used. (There should be a maximum, limited by MAX_DEPTH)
- */
-
-#include "ncdc.h"
-#include "xmlread.h"
-
-
-#if INTERFACE
-
-#define XMLT_OPEN 1 // arg1 = tag name
-#define XMLT_CLOSE 2 // arg1 = tag name or NULL for self-closing tags
-#define XMLT_ATTR 3 // arg1 = name, arg2 = value (not validated to be correct UTF-8)
-#define XMLT_ATTDONE 4 // no args, indicates that there are no more attributes for the last opened tag
-
-// Called whenever an XMLT_ entity has been found. Should return 0 to
-// continue processing, anything else to abort.
-typedef int (*xml_cb_t)(void *, int, const char *, const char *, GError **);
-
-// Read callback. Should return -1 on error, 0 on EOF, number of bytes read
-// otherwise.
-typedef int (*xml_read_t)(void *, char *, int, GError **);
-
-#endif
-
-
-#define MAX_NAME 128
-#define MAX_ATTRVAL (8*1024) // this is more than enough for file lists.
-#define MAX_DEPTH 50
-#define READ_BUF_SIZE (32*1024)
-
-typedef struct ctx_t {
- xml_cb_t cb;
- xml_read_t read;
- void *dat;
-
- char name[MAX_NAME];
- char val[MAX_ATTRVAL];
- char readbuf[READ_BUF_SIZE];
- char *buf;
- gboolean readeof;
- int len;
-
- int level;
- int line;
- int byte;
- GError *err;
- jmp_buf jmp;
-} ctx_t;
-
-
-
-// Helper functions
-
-
-static void err(ctx_t *x, const char *fmt, ...) {
- va_list arg;
- va_start(arg, fmt);
- if(!x->err) {
- char *msg = g_strdup_vprintf(fmt, arg);
- g_set_error(&x->err, 1, 0, "Line %d:%d: %s", x->line, x->byte, msg);
- g_free(msg);
- }
- va_end(arg);
- longjmp(x->jmp, 1);
-}
-
-
-static void callcb(ctx_t *x, int type, const char *arg1, const char *arg2) {
- if(x->cb(x->dat, type, arg1, arg2, &x->err)) {
- g_prefix_error(&x->err, "Line %d:%d: ", x->line, x->byte);
- err(x, "Processing aborted by the application");
- }
-}
-
-
-// Make sure we have more than n bytes in the buffer. Returns the buffer
-// length, which may be smaller on EOF. Also validates that the XML data does
-// not contain the 0 byte (this simplifies error checking a bit).
-static int fill(ctx_t *x, int n) {
- if(G_LIKELY(x->len >= n))
- return x->len;
- if(x->readeof)
- return x->len;
-
- if(x->len > 0)
- memmove(x->readbuf, x->buf, x->len);
- x->buf = x->readbuf;
-
- do {
- int r = x->read(x->dat, x->readbuf + x->len, READ_BUF_SIZE - x->len, &x->err);
- if(r < 0)
- err(x, "Parse error");
- if(!r) {
- x->readeof = TRUE;
- break;
- }
- if(memchr(x->readbuf + x->len, 0, r) != NULL)
- err(x, "Invalid zero byte in XML data");
- x->len += r;
- } while(x->len < n);
-
- return x->len;
-}
-
-
-// Require n bytes to be present, set error otherwise.
-static void rfill(ctx_t *x, int n) {
- if(G_UNLIKELY(n >= x->len) && fill(x, n) < n)
- err(x, "Unexpected EOF");
-}
-
-
-// consume some characters (also updates ->bytes and ->lines)
-static void con(ctx_t *x, int n) {
- int i = 0;
- while(i < n) {
- if(x->buf[i++] == '\n') {
- x->line++;
- x->byte = 0;
- }
- x->byte++;
- }
- x->buf += n;
- x->len -= n;
-}
-
-
-// Validate and consume a string literal
-static void lit(ctx_t *x, const char *str) {
- int len = strlen(str);
- rfill(x, len);
- if(strncmp(x->buf, str, len) != 0)
- err(x, "Expected '%s'", str);
- con(x, len);
-}
-
-
-
-
-// Language definition
-
-
-#define isWhiteSpace(x) (x == 0x20 || x == 0x09 || x == 0x0d || x == 0x0a)
-#define isDecimal(x) ('0' <= x && x <= '9')
-#define isHex(x) (isDecimal(x) || ('a' <= x && x <= 'f') || ('A' <= x && x <= 'F'))
-#define isNameStartChar(x) (x == ':' || ('A' <= x && x <= 'Z') || x == '_' || ('a' <= x && x <= 'z'))
-#define isNameChar(x) (isNameStartChar(x) || x == '-' || x == '.' || isDecimal(x))
-#define isCharData(x) (x != '&' && x != '<')
-
-
-// Consumes whitespace until an other character or EOF was found. If req, then
-// there must be at least one whitespace character, otherwise it's optional.
-static void S(ctx_t *x, int req) {
- if(req) {
- rfill(x, 1);
- if(!isWhiteSpace(*x->buf))
- err(x, "White space expected, got '%c'", *x->buf);
- }
- while((x->len > 0 || fill(x, 1) > 0) && isWhiteSpace(*x->buf))
- con(x, 1);
-}
-
-
-static void Eq(ctx_t *x) {
- S(x, 0);
- lit(x, "=");
- S(x, 0);
-}
-
-
-// Parses a CharRef or EntityRef and writes the result to x->val+n, returning
-// the number of bytes written (either 0 or 1).
-// Note: CharRef's are parsed but ignored. This is what DC++ does, and
-// simplifies things a bit. Custom EntityRefs are not supported, only those
-// predefined in the XML standard can be used.
-static int Reference(ctx_t *x, int n) {
- con(x, 1); // Assuming the caller has already verified that this is indeed a Reference.
-
- // We're currently parsing [^;]* here, while the standard requires a (more
- // strict) 'Name' token or a CharRef. This doesn't really matter, since we
- // validate the contents of name later on.
- char name[16] = {};
- int i = 0;
- rfill(x, 1);
- while(i < 15 && *x->buf != ';') {
- name[i++] = *x->buf;
- con(x, 1);
- rfill(x, 1);
- }
- if(i >= 15)
- err(x, "Entity name too long");
- con(x, 1);
-
- // Predefined entities
-#define p(s, c) if(strcmp(name, s) == 0) {x->val[n] = c; return 1;}
- p("lt", '<');
- p("gt", '>');
- p("amp", '&');
- p("apos", '\'');
- p("quot", '"');
-#undef p
-
- // CharRefs
- if(name[0] == '#' && name[1] == 'x') {
- i = 2;
- do
- if(!isHex(name[i]))
- err(x, "Invalid character reference '&%s;'", name);
- while(++i < strlen(name));
- return 0;
- }
-
- // decimal CharRef
- if(name[0] == '#') {
- i = 1;
- do
- if(!isDecimal(name[i]))
- err(x, "Invalid character reference '&%s;'", name);
- while(++i < strlen(name));
- return 0;
- }
-
- // Anything else is an error
- err(x, "Unknown entity reference '&%s;'", name);
- return 0;
-}
-
-
-// Parses an attribute value and writes its (decoded) contents to x->val.
-static void AttValue(ctx_t *x) {
- rfill(x, 2);
- char esc = *x->buf;
- if(esc != '"' && esc != '\'')
- err(x, "' or \" expected, got '%c'", *x->buf);
- con(x, 1);
-
- int n = 0;
- while(*x->buf != esc) {
- if(*x->buf == '<')
- err(x, "Invalid '<' in attribute value");
- if(n >= MAX_ATTRVAL-4)
- err(x, "Too long attribute value.");
- if(*x->buf == '&')
- n += Reference(x, n);
- else {
- x->val[n++] = *x->buf;
- con(x, 1);
- }
- rfill(x, 1);
- }
- x->val[n] = 0;
-
- if(*x->buf != esc)
- err(x, "%c expected, got %c", esc, *x->buf);
- con(x, 1);
-}
-
-
-static void comment(ctx_t *x) {
- lit(x, "<!--");
- while(1) {
- rfill(x, 3);
- if(x->buf[0] == '-' && x->buf[1] == '-') {
- if(x->buf[2] != '>')
- err(x, "'--' not allowed in XML comment");
- con(x, 3);
- break;
- }
- con(x, 1);
- }
-}
-
-
-// Consumes any number of whitespace and comments. (So it's actually Misc*)
-static void Misc(ctx_t *x) {
- while(fill(x, 4) >= 4) {
- if(strncmp(x->buf, "<!--", 4) == 0) {
- comment(x);
- continue;
- }
- if(!isWhiteSpace(*x->buf))
- break;
- S(x, 0);
- }
- S(x, 0);
-}
-
-
-// Consumes a name and stores it in x->name.
-static void Name(ctx_t *x) {
- rfill(x, 1);
- int n = 0;
- if(!isNameStartChar(*x->buf))
- err(x, "Invalid character in element or attribute name");
- x->name[n++] = *x->buf;
- con(x, 1);
- while(n < MAX_NAME-1 && fill(x, 1) > 0 && isNameChar(*x->buf)) {
- x->name[n++] = *x->buf;
- con(x, 1);
- }
- if(n >= MAX_NAME-1)
- err(x, "Too long element or attribute name");
- x->name[n] = 0;
-}
-
-
-// Returns the number of bytes consumed.
-static int CharData(ctx_t *x) {
- int r = 0;
- while(fill(x, 3) >= 3) {
- if(!isCharData(*x->buf))
- return r;
- if(strncmp(x->buf, "]]>", 3) == 0)
- err(x, "']]>' not allowed in content");
- r++;
- con(x, 1);
- }
-
- while(fill(x, 1) >= 1) {
- if(!isCharData(*x->buf))
- return r;
- r++;
- con(x, 1);
- }
- return r;
-}
-
-
-static void element(ctx_t *x);
-
-static void content(ctx_t *x) {
- CharData(x);
- while(1) {
- // Getting an EOF 2 bytes after content is always an error regardless of
- // the content (since content always follows a close tag), so this rfill
- // usage is safe.
- rfill(x, 2);
- if(x->buf[0] == '<' && x->buf[1] == '/')
- return;
- else if(x->buf[0] == '<' && x->buf[1] == '!')
- comment(x);
- else if(x->buf[0] == '<')
- element(x);
- else if(x->buf[0] == '&')
- Reference(x, 0);
- else if(!CharData(x)) // shouldn't happen, actually.
- err(x, "Invalid character in content");
- }
-}
-
-
-static void element(ctx_t *x) {
- if(x->level <= 0)
- err(x, "Maximum element depth exceeded");
-
- lit(x, "<");
- Name(x);
- callcb(x, XMLT_OPEN, x->name, NULL);
-
- while(1) {
- // Is this tag ending yet?
- rfill(x, 1);
- if(*x->buf == '>' || *x->buf == '/')
- break;
- S(x, 1);
- if(*x->buf == '>' || *x->buf == '/')
- break;
-
- // Otherwise, we have an attribute
- Name(x);
- Eq(x);
- AttValue(x);
- callcb(x, XMLT_ATTR, x->name, x->val);
- }
-
- callcb(x, XMLT_ATTDONE, NULL, NULL);
-
- // EmptyElementTag
- if(*x->buf == '/') {
- lit(x, "/>");
- callcb(x, XMLT_CLOSE, NULL, NULL);
- return;
- }
-
- // Otherwise, this was an STag
- lit(x, ">");
- x->level--;
- content(x);
- x->level++;
- lit(x, "</");
- Name(x);
- lit(x, ">");
- callcb(x, XMLT_CLOSE, x->name, NULL);
-}
-
-
-static void XMLDecl(ctx_t *x) {
- if(fill(x, 5) < 5 || strncmp(x->buf, "<?xml", 5) != 0)
- return;
-
- con(x, 5);
- S(x, 1);
-
- // version
- lit(x, "version");
- Eq(x);
- AttValue(x);
- if(x->val[0] != '1' || x->val[1] != '.')
- err(x, "Invalid XML version");
- int i = 2;
- do
- if(!isDecimal(x->val[i]))
- err(x, "Invalid XML version");
- while(++i < strlen(x->val));
-
- // Accepts either whitespace or a '?' to signal the end of this XML
- // declaration.
-#define se rfill(x, 1); if(x->buf[0] == '?') goto end; S(x, 1); rfill(x, 1); if(x->buf[0] == '?') goto end;
-
- // encoding
- se
- if(x->buf[0] == 'e') {
- lit(x, "encoding");
- Eq(x);
- AttValue(x);
- se
- }
-
- // standalone
- lit(x, "standalone");
- Eq(x);
- AttValue(x);
- if(strcmp(x->val, "yes") != 0 && strcmp(x->val, "no") != 0)
- err(x, "Invalid value for \"standalone\"");
- S(x, 0);
-#undef se
-
-end:
- lit(x, "?>");
-}
-
-
-
-
-// Parses the complete XML document, returns 0 on success or -1 on error.
-int xml_parse(xml_cb_t cb, xml_read_t read, void *dat, GError **e) {
- // Don't allocate this the stack, it's fairly large.
- ctx_t *x = g_new(ctx_t, 1);
- x->dat = dat;
- x->cb = cb;
- x->read = read;
-
- x->buf = x->readbuf;
- x->readeof = FALSE;
- x->len = 0;
-
- x->line = x->byte = 1;
- x->level = MAX_DEPTH;
- x->err = NULL;
-
- if(!setjmp(x->jmp)) {
- // UTF-8 BOM
- if(fill(x, 3) >= 3 && strncmp(x->buf, "\xef\xbb\xbf", 3) == 0)
- con(x, 3);
- XMLDecl(x);
- Misc(x);
- element(x);
- Misc(x);
- // We should have consumed everything now.
- if(fill(x, 1))
- err(x, "Expected end-of-file");
- }
-
- if(x->err)
- g_propagate_error(e, x->err);
- g_free(x);
- return 0;
-}
-