summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2015-10-27 18:28:33 +0100
committerYorhel <git@yorhel.nl>2015-10-27 18:29:08 +0100
commit05f8de1ea776419b1532d382737ca09df08dcae2 (patch)
treeef45eae8b0c4d9ec9f56f7742932e523e4022376
parent822e94e03a70d916e06dd43fa0e54a0571c82e12 (diff)
Fix parsing of PIs that start with xml
Bug #2: http://dev.yorhel.nl/yxml/bug/2
-rw-r--r--test/pi05.out7
-rw-r--r--test/pi05.xml1
-rw-r--r--test/pi06.out7
-rw-r--r--test/pi06.xml (renamed from test/xmldecl_err07.xml)0
-rw-r--r--test/pi07.out7
-rw-r--r--test/pi07.xml1
-rw-r--r--test/xmldecl_err07.out2
-rw-r--r--test/xmldecl_err08.out2
-rw-r--r--test/xmldecl_err08.xml1
-rw-r--r--yxml-states41
-rw-r--r--yxml.c88
-rw-r--r--yxml.c.in1
12 files changed, 111 insertions, 47 deletions
diff --git a/test/pi05.out b/test/pi05.out
new file mode 100644
index 0000000..0998791
--- /dev/null
+++ b/test/pi05.out
@@ -0,0 +1,7 @@
+
+pistart xml-stylesheet
+picontent href="mystyle.css" type="text/css"
+piend
+elemstart a
+elemend
+ok
diff --git a/test/pi05.xml b/test/pi05.xml
new file mode 100644
index 0000000..b12b519
--- /dev/null
+++ b/test/pi05.xml
@@ -0,0 +1 @@
+<?xml-stylesheet href="mystyle.css" type="text/css"?><a/>
diff --git a/test/pi06.out b/test/pi06.out
new file mode 100644
index 0000000..1a4f531
--- /dev/null
+++ b/test/pi06.out
@@ -0,0 +1,7 @@
+
+pistart xwhatever
+picontent version="1.0"
+piend
+elemstart a
+elemend
+ok
diff --git a/test/xmldecl_err07.xml b/test/pi06.xml
index b95486c..b95486c 100644
--- a/test/xmldecl_err07.xml
+++ b/test/pi06.xml
diff --git a/test/pi07.out b/test/pi07.out
new file mode 100644
index 0000000..0998791
--- /dev/null
+++ b/test/pi07.out
@@ -0,0 +1,7 @@
+
+pistart xml-stylesheet
+picontent href="mystyle.css" type="text/css"
+piend
+elemstart a
+elemend
+ok
diff --git a/test/pi07.xml b/test/pi07.xml
new file mode 100644
index 0000000..d2e3976
--- /dev/null
+++ b/test/pi07.xml
@@ -0,0 +1 @@
+<?xml version="1.0"?><?xml-stylesheet href="mystyle.css" type="text/css"?><a/>
diff --git a/test/xmldecl_err07.out b/test/xmldecl_err07.out
deleted file mode 100644
index 9456d41..0000000
--- a/test/xmldecl_err07.out
+++ /dev/null
@@ -1,2 +0,0 @@
-
-error
diff --git a/test/xmldecl_err08.out b/test/xmldecl_err08.out
deleted file mode 100644
index 9456d41..0000000
--- a/test/xmldecl_err08.out
+++ /dev/null
@@ -1,2 +0,0 @@
-
-error
diff --git a/test/xmldecl_err08.xml b/test/xmldecl_err08.xml
deleted file mode 100644
index 650eaab..0000000
--- a/test/xmldecl_err08.xml
+++ /dev/null
@@ -1 +0,0 @@
-<?xwhatever version="1.0"><a/>
diff --git a/yxml-states b/yxml-states
index fada42f..dd84cac 100644
--- a/yxml-states
+++ b/yxml-states
@@ -75,38 +75,41 @@ le3 '!' @misc3 comment0; '?' @misc3 pi0
lee1 '-' @misc1 comment1; 'D' "OCTYPE" dt0
lee2 '-' @misc2 comment1; '[' "CDATA[" cd0
-leq0 'x' "ml" xmldecl0; NameStart @misc1 pistart pi1
-
-
-# XMLDecl, starting from '<?xml', returns to misc1
-xmldecl0 SP xmldecl1
-xmldecl1 SP xmldecl1; 'v' "ersion" ver0
-xmldecl2 SP xmldecl3; '?' xmldecl7
-xmldecl3 SP xmldecl3; '?' xmldecl7; 'e' "ncoding" enc0; 's' "tandalone" std0
-xmldecl4 SP xmldecl5; '?' xmldecl7
-xmldecl5 SP xmldecl5; '?' xmldecl7; 's' "tandalone" std0
-xmldecl6 SP xmldecl6; '?' xmldecl7
-xmldecl7 '>' misc1
-
-# VersionInfo, after 'version', returns to xmldecl2
+leq0 'x' @misc1 pistart xmldecl0; NameStart @misc1 pistart pi1
+
+
+# XMLDecl, starting from '<?x', returns to misc1.
+# Interleaves some parts with pi because a non-<?xml is just a PI rather than an XMLDecl.
+xmldecl0 'm' piname xmldecl1; Name piname pi1; '?' pinameend pi4; SP pinameend pi2
+xmldecl1 'l' piname xmldecl2; Name piname pi1; '?' pinameend pi4; SP pinameend pi2
+xmldecl2 SP piabort xmldecl3; Name piname pi1
+xmldecl3 SP xmldecl3; 'v' "ersion" ver0
+xmldecl4 SP xmldecl5; '?' xmldecl9
+xmldecl5 SP xmldecl5; '?' xmldecl9; 'e' "ncoding" enc0; 's' "tandalone" std0
+xmldecl6 SP xmldecl7; '?' xmldecl9
+xmldecl7 SP xmldecl7; '?' xmldecl9; 's' "tandalone" std0
+xmldecl8 SP xmldecl8; '?' xmldecl9
+xmldecl9 '>' misc1
+
+# VersionInfo, after 'version', returns to xmldecl4
ver0 SP ver0; '=' ver1
ver1 SP ver1; '\''|'"' $quote "1." ver2
ver2 Num ver3;
-ver3 Num ver3; $quote xmldecl2
+ver3 Num ver3; $quote xmldecl4
-# EncodingDecl, after 'e', returns to xmldecl4
+# EncodingDecl, after 'e', returns to xmldecl6
# TODO: Pass the encoding value to the application?
enc0 SP enc0; '=' enc1
enc1 SP enc1; '\''|'"' $quote enc2
enc2 Alpha enc3
-enc3 EncName enc3; $quote xmldecl4
+enc3 EncName enc3; $quote xmldecl6
-# SDDecl, after 'standalone', returns to xmldecl6
+# SDDecl, after 'standalone', returns to xmldecl8
# TODO: Pass the standalone flag to the application?
std0 SP std0; '=' std1
std1 SP std1; '\''|'"' $quote std2
std2 'y' "es" std3; 'n' "o" std3
-std3 $quote xmldecl6
+std3 $quote xmldecl8
# Comment, after '<!', returns to @
diff --git a/yxml.c b/yxml.c
index d8d9d4c..04dcd95 100644
--- a/yxml.c
+++ b/yxml.c
@@ -89,7 +89,9 @@ typedef enum {
YXMLS_xmldecl4,
YXMLS_xmldecl5,
YXMLS_xmldecl6,
- YXMLS_xmldecl7
+ YXMLS_xmldecl7,
+ YXMLS_xmldecl8,
+ YXMLS_xmldecl9
} yxml_state_t;
@@ -265,6 +267,7 @@ static inline int yxml_attrvalend (yxml_t *x, unsigned ch) { yxml_popstack(x); r
static inline int yxml_pistart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->pi, ch); }
static inline int yxml_piname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); }
+static inline int yxml_piabort (yxml_t *x, unsigned ch) { yxml_popstack(x); return YXML_OK; }
static inline int yxml_pinameend(yxml_t *x, unsigned ch) {
return (x->pi[0]|32) == 'x' && (x->pi[1]|32) == 'm' && (x->pi[2]|32) == 'l' && !x->pi[3] ? YXML_ESYN : YXML_PISTART;
}
@@ -621,7 +624,7 @@ yxml_ret_t yxml_parse(yxml_t *x, int _ch) {
if(yxml_isEncName(ch))
return YXML_OK;
if(x->quote == ch) {
- x->state = YXMLS_xmldecl4;
+ x->state = YXMLS_xmldecl6;
return YXML_OK;
}
break;
@@ -755,10 +758,9 @@ yxml_ret_t yxml_parse(yxml_t *x, int _ch) {
break;
case YXMLS_leq0:
if(ch == (unsigned char)'x') {
- x->state = YXMLS_string;
- x->nextstate = YXMLS_xmldecl0;
- x->string = (unsigned char *)"ml";
- return YXML_OK;
+ x->state = YXMLS_xmldecl0;
+ x->nextstate = YXMLS_misc1;
+ return yxml_pistart(x, ch);
}
if(yxml_isNameStart(ch)) {
x->state = YXMLS_pi1;
@@ -885,7 +887,7 @@ yxml_ret_t yxml_parse(yxml_t *x, int _ch) {
break;
case YXMLS_std3:
if(x->quote == ch) {
- x->state = YXMLS_xmldecl6;
+ x->state = YXMLS_xmldecl8;
return YXML_OK;
}
break;
@@ -918,17 +920,57 @@ yxml_ret_t yxml_parse(yxml_t *x, int _ch) {
if(yxml_isNum(ch))
return YXML_OK;
if(x->quote == ch) {
- x->state = YXMLS_xmldecl2;
+ x->state = YXMLS_xmldecl4;
return YXML_OK;
}
break;
case YXMLS_xmldecl0:
- if(yxml_isSP(ch)) {
+ if(ch == (unsigned char)'m') {
x->state = YXMLS_xmldecl1;
- return YXML_OK;
+ return yxml_piname(x, ch);
+ }
+ if(yxml_isName(ch)) {
+ x->state = YXMLS_pi1;
+ return yxml_piname(x, ch);
+ }
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_pi4;
+ return yxml_pinameend(x, ch);
+ }
+ if(yxml_isSP(ch)) {
+ x->state = YXMLS_pi2;
+ return yxml_pinameend(x, ch);
}
break;
case YXMLS_xmldecl1:
+ if(ch == (unsigned char)'l') {
+ x->state = YXMLS_xmldecl2;
+ return yxml_piname(x, ch);
+ }
+ if(yxml_isName(ch)) {
+ x->state = YXMLS_pi1;
+ return yxml_piname(x, ch);
+ }
+ if(ch == (unsigned char)'?') {
+ x->state = YXMLS_pi4;
+ return yxml_pinameend(x, ch);
+ }
+ if(yxml_isSP(ch)) {
+ x->state = YXMLS_pi2;
+ return yxml_pinameend(x, ch);
+ }
+ break;
+ case YXMLS_xmldecl2:
+ if(yxml_isSP(ch)) {
+ x->state = YXMLS_xmldecl3;
+ return yxml_piabort(x, ch);
+ }
+ if(yxml_isName(ch)) {
+ x->state = YXMLS_pi1;
+ return yxml_piname(x, ch);
+ }
+ break;
+ case YXMLS_xmldecl3:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'v') {
@@ -938,21 +980,21 @@ yxml_ret_t yxml_parse(yxml_t *x, int _ch) {
return YXML_OK;
}
break;
- case YXMLS_xmldecl2:
+ case YXMLS_xmldecl4:
if(yxml_isSP(ch)) {
- x->state = YXMLS_xmldecl3;
+ x->state = YXMLS_xmldecl5;
return YXML_OK;
}
if(ch == (unsigned char)'?') {
- x->state = YXMLS_xmldecl7;
+ x->state = YXMLS_xmldecl9;
return YXML_OK;
}
break;
- case YXMLS_xmldecl3:
+ case YXMLS_xmldecl5:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'?') {
- x->state = YXMLS_xmldecl7;
+ x->state = YXMLS_xmldecl9;
return YXML_OK;
}
if(ch == (unsigned char)'e') {
@@ -968,21 +1010,21 @@ yxml_ret_t yxml_parse(yxml_t *x, int _ch) {
return YXML_OK;
}
break;
- case YXMLS_xmldecl4:
+ case YXMLS_xmldecl6:
if(yxml_isSP(ch)) {
- x->state = YXMLS_xmldecl5;
+ x->state = YXMLS_xmldecl7;
return YXML_OK;
}
if(ch == (unsigned char)'?') {
- x->state = YXMLS_xmldecl7;
+ x->state = YXMLS_xmldecl9;
return YXML_OK;
}
break;
- case YXMLS_xmldecl5:
+ case YXMLS_xmldecl7:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'?') {
- x->state = YXMLS_xmldecl7;
+ x->state = YXMLS_xmldecl9;
return YXML_OK;
}
if(ch == (unsigned char)'s') {
@@ -992,15 +1034,15 @@ yxml_ret_t yxml_parse(yxml_t *x, int _ch) {
return YXML_OK;
}
break;
- case YXMLS_xmldecl6:
+ case YXMLS_xmldecl8:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'?') {
- x->state = YXMLS_xmldecl7;
+ x->state = YXMLS_xmldecl9;
return YXML_OK;
}
break;
- case YXMLS_xmldecl7:
+ case YXMLS_xmldecl9:
if(ch == (unsigned char)'>') {
x->state = YXMLS_misc1;
return YXML_OK;
diff --git a/yxml.c.in b/yxml.c.in
index d92d6d1..3ce0b0f 100644
--- a/yxml.c.in
+++ b/yxml.c.in
@@ -201,6 +201,7 @@ static inline int yxml_attrvalend (yxml_t *x, unsigned ch) { yxml_popstack(x); r
static inline int yxml_pistart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->pi, ch); }
static inline int yxml_piname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); }
+static inline int yxml_piabort (yxml_t *x, unsigned ch) { yxml_popstack(x); return YXML_OK; }
static inline int yxml_pinameend(yxml_t *x, unsigned ch) {
return (x->pi[0]|32) == 'x' && (x->pi[1]|32) == 'm' && (x->pi[2]|32) == 'l' && !x->pi[3] ? YXML_ESYN : YXML_PISTART;
}