diff options
author | Yorhel <git@yorhel.nl> | 2013-09-05 18:41:18 +0200 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2013-09-05 18:41:18 +0200 |
commit | 4d930b2cf81317b3bd1133c4621c0b712112c953 (patch) | |
tree | 410ea6043f117eb4248f92da438a5857b5183388 | |
parent | 3ebeb7a242b14f2613cd42a1cdc499bed77319e3 (diff) |
Rename some YXML_ tokens for consistency + add token for end-of-attr
Previously, reading the value of an attribute required reading all DATA
tokens until the next YXML_ATTR or the next YXML_EOA. The new
YXML_ATTRSTART and YXML_ATTREND are more clear and provides a more
tighter bound as to where you can stop waiting for more DATA tokens for
the attribute value.
The YXML_EOA token has been renamed to YXML_CONTENT, because that's what
it actually signifies: The start of the contents of the element.
I've also documented the token ranges in which the elem, data and attr
fields in yxml_t remain valid.
-rw-r--r-- | yxml-states | 6 | ||||
-rw-r--r-- | yxml.c | 28 | ||||
-rw-r--r-- | yxml.c.in | 20 | ||||
-rw-r--r-- | yxml.h | 53 |
4 files changed, 55 insertions, 52 deletions
diff --git a/yxml-states b/yxml-states index b284546..7cee3a7 100644 --- a/yxml-states +++ b/yxml-states @@ -140,9 +140,9 @@ etag2 SP etag2; '>' misc2 # Element, after '<X', returns to misc2 -elem0 Name elemname elem0; SP elemnameend elem1; '/' elemnameend attrsend elem3; '>' elemnameend attrsend misc2 -elem1 SP elem1; '/' attrsend elem3; '>' attrsend misc2; NameStart attrstart attr0 -elem2 SP elem1; '/' attrsend elem3; '>' attrsend misc2 +elem0 Name elemname elem0; SP elemnameend elem1; '/' elemnameend content elem3; '>' elemnameend content misc2 +elem1 SP elem1; '/' content elem3; '>' content misc2; NameStart attrstart attr0 +elem2 SP elem1; '/' content elem3; '>' content misc2 elem3 '>' selfclose misc2 # Attribute, after NameStart, returns to elem2 @@ -171,22 +171,22 @@ static inline int yxml_elemname(yxml_t *x, unsigned ch) { static inline int yxml_elemnameend(yxml_t *x, unsigned ch) { - return YXML_OPEN; + return YXML_ELEMEND; } /* Also used in yxml_elemcloseend(), since this function just removes the last - * element from the stack and returns CLOSE and EOD when appropriate. */ + * element from the stack and returns ELEMEND and EOD when appropriate. */ static int yxml_selfclose(yxml_t *x, unsigned ch) { yxml_popstack(x); if(x->stacklen) { x->elem = (char *)x->stack+x->stacklen-1; while(*(x->elem-1)) x->elem--; - return YXML_CLOSE; + return YXML_ELEMEND; } x->elem = (char *)x->stack; - return YXML_CLOSE | YXML_EOD; + return YXML_ELEMEND | YXML_EOD; } @@ -216,18 +216,18 @@ static inline int yxml_attrname(yxml_t *x, unsigned ch) { static inline int yxml_attrnameend(yxml_t *x, unsigned ch) { - return YXML_ATTR; + return YXML_ATTRSTART; } static inline int yxml_attrvalend(yxml_t *x, unsigned ch) { yxml_popstack(x); - return YXML_OK; + return YXML_ATTREND; } -static inline int yxml_attrsend(yxml_t *x, unsigned ch) { - return YXML_EOA; +static inline int yxml_content(yxml_t *x, unsigned ch) { + return YXML_CONTENT; } @@ -450,11 +450,11 @@ yxml_ret_t yxml_parse(yxml_t *x, int _ch) { } if(ch == (unsigned char)'/') { x->state = YXMLS_elem3; - return yxml_elemnameend(x, ch)|yxml_attrsend(x, ch); + return yxml_elemnameend(x, ch)|yxml_content(x, ch); } if(ch == (unsigned char)'>') { x->state = YXMLS_misc2; - return yxml_elemnameend(x, ch)|yxml_attrsend(x, ch); + return yxml_elemnameend(x, ch)|yxml_content(x, ch); } break; case YXMLS_elem1: @@ -462,11 +462,11 @@ yxml_ret_t yxml_parse(yxml_t *x, int _ch) { return YXML_OK; if(ch == (unsigned char)'/') { x->state = YXMLS_elem3; - return yxml_attrsend(x, ch); + return yxml_content(x, ch); } if(ch == (unsigned char)'>') { x->state = YXMLS_misc2; - return yxml_attrsend(x, ch); + return yxml_content(x, ch); } if(yxml_isNameStart(ch)) { x->state = YXMLS_attr0; @@ -480,11 +480,11 @@ yxml_ret_t yxml_parse(yxml_t *x, int _ch) { } if(ch == (unsigned char)'/') { x->state = YXMLS_elem3; - return yxml_attrsend(x, ch); + return yxml_content(x, ch); } if(ch == (unsigned char)'>') { x->state = YXMLS_misc2; - return yxml_attrsend(x, ch); + return yxml_content(x, ch); } break; case YXMLS_elem3: @@ -114,22 +114,22 @@ static inline int yxml_elemname(yxml_t *x, unsigned ch) { static inline int yxml_elemnameend(yxml_t *x, unsigned ch) { - return YXML_OPEN; + return YXML_ELEMEND; } /* Also used in yxml_elemcloseend(), since this function just removes the last - * element from the stack and returns CLOSE and EOD when appropriate. */ + * element from the stack and returns ELEMEND and EOD when appropriate. */ static int yxml_selfclose(yxml_t *x, unsigned ch) { yxml_popstack(x); if(x->stacklen) { x->elem = (char *)x->stack+x->stacklen-1; while(*(x->elem-1)) x->elem--; - return YXML_CLOSE; + return YXML_ELEMEND; } x->elem = (char *)x->stack; - return YXML_CLOSE | YXML_EOD; + return YXML_ELEMEND | YXML_EOD; } @@ -159,30 +159,30 @@ static inline int yxml_attrname(yxml_t *x, unsigned ch) { static inline int yxml_attrnameend(yxml_t *x, unsigned ch) { - return YXML_ATTR; + return YXML_ATTRSTART; } static inline int yxml_attrvalend(yxml_t *x, unsigned ch) { yxml_popstack(x); - return YXML_OK; + return YXML_ATTREND; } -static inline int yxml_attrsend(yxml_t *x, unsigned ch) { - return YXML_EOA; +static inline int yxml_content(yxml_t *x, unsigned ch) { + return YXML_CONTENT; } static inline int yxml_refstart(yxml_t *x, unsigned ch) { - memset(x->ref, 0, YXML_MAX_REF+1); + memset(x->ref, 0, sizeof(x->ref)); x->reflen = 0; return YXML_OK; } static int yxml_ref(yxml_t *x, unsigned ch) { - if(x->reflen >= YXML_MAX_REF) + if(x->reflen >= sizeof(x->ref)-1) return YXML_EREF; x->ref[x->reflen] = ch; x->reflen++; @@ -25,38 +25,39 @@ typedef enum { - YXML_EREF = -5, /* Invalid character or entity reference (&whatever;) */ - YXML_ECLOSE = -4, /* Close tag does not match open tag (<Tag> .. </OtherTag>) */ - YXML_ESTACK = -3, /* Stack overflow (too deeply nested tags or too long element/attribute name) */ - YXML_EATTR = -2, /* Too long attribute name */ - YXML_ESYN = -1, /* Syntax error (unexpected byte) */ - YXML_OK = 0, /* Character consumed, no new token present */ - YXML_OPEN = 1, /* Start of an element: '<Tag ..' */ - YXML_CLOSE = 2, /* End of an element: '.. />' or '</Tag>' */ - YXML_ATTR = 4, /* Attribute: 'Name=..' */ - YXML_DATA = 8, /* Attribute value or element contents */ - YXML_EOA = 16, /* End of attributes: '.. />' or '.. >' */ - YXML_EOD = 32 /* End of XML document */ + YXML_EREF = -5, /* Invalid character or entity reference (&whatever;) */ + YXML_ECLOSE = -4, /* Close tag does not match open tag (<Tag> .. </OtherTag>) */ + YXML_ESTACK = -3, /* Stack overflow (too deeply nested tags or too long element/attribute name) */ + YXML_EATTR = -2, /* Too long attribute name */ + YXML_ESYN = -1, /* Syntax error (unexpected byte) */ + YXML_OK = 0, /* Character consumed, no new token present */ + YXML_ELEMSTART = 1, /* Start of an element: '<Tag ..' */ + YXML_ELEMEND = 2, /* End of an element: '.. />' or '</Tag>' */ + YXML_ATTRSTART = 4, /* Attribute: 'Name=..' */ + YXML_ATTREND = 8, /* End of attribute '.."' */ + YXML_CONTENT = 16, /* Start of element content '.. />' or '.. >' */ + YXML_DATA = 32, /* Attribute value or element contents */ + YXML_EOD = 64 /* End of XML document */ } yxml_ret_t; /* When, exactly, are tokens returned? * * <TagName - * '>' OPEN | EOA - * '/' OPEN | EOA, '>' CLOSE (| EOD) - * ' ' OPEN - * '>' EOA - * '/' EOA, '>' CLOSE (| EOD) + * '>' ELEMSTART | CONTENT + * '/' ELEMSTART | CONTENT, '>' ELEMENT (| EOD) + * ' ' ELEMSTART + * '>' CONTENT + * '/' CONTENT, '>' ELEMEND (| EOD) * Attr - * '=' ATTR + * '=' ATTRSTART * "X DATA * 'Y' DATA * 'Z' DATA - * "> EOA - * "/ EOA, '>' CLOSE (| EOD) + * "> ATTREND + * "/ CONTENT, '>' ELEMEND (| EOD) * * </TagName - * '>' CLOSE (| EOD) + * '>' ELEMEND (| EOD) */ @@ -64,15 +65,17 @@ typedef struct { /* PUBLIC (read-only) */ /* Name of the current element, zero-length if not in any element. Changed - * after YXML_OPEN or YXML_CLOSE. */ + * after YXML_ELEMSTART. The pointer will remain valid up to and including + * the next YXML_CONTENT, the pointed-to buffer will remain valid up to and + * including the YXML_ELEMCLOSE for the corresponding element. */ char *elem; /* The last read character of an attribute value or element data. Changed - * after YXML_DATA. */ + * after YXML_DATA and only valid until the next yxml_parse() call. */ char data; - /* Currently opened attribute name, zero-length if not in an attribute. - * Changed after YXML_ATTR. */ + /* Name of the current attribute. Changed after YXML_ATTRSTART, valid up to + * and including the next YXML_ATTREND. */ char *attr; /* Line number, byte offset within that line, and total bytes read. These |