summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2020-03-07 17:25:10 +0100
committerYorhel <git@yorhel.nl>2020-03-07 17:25:10 +0100
commitf9438757fc49b9f86961ddb55ae430e36bb88ebb (patch)
tree91b45ef5a36cda232dd44154509ac6dea233e3ef
parent5d96bbbef09e7e97a3ec133d837141028788c675 (diff)
Minor doc fixes + encoding considerations section
-rwxr-xr-xyxml-gen.pl4
-rw-r--r--yxml.c4
-rw-r--r--yxml.h2
-rw-r--r--yxml.md19
4 files changed, 26 insertions, 3 deletions
diff --git a/yxml-gen.pl b/yxml-gen.pl
index 589a445..ad86328 100755
--- a/yxml-gen.pl
+++ b/yxml-gen.pl
@@ -106,7 +106,9 @@ sub readmachine {
sub writeout {
local @ARGV = ($code_fn);
open my $F, '>', $out_fn or die $!;
- print $F "/* THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT! */\n\n";
+ print $F "/* This file is generated by yxml-gen.pl using yxml-states and yxml.c.in as input files.\n";
+ print $F " * It is preferable to edit those files instead of this one if you want to make a change.\n";
+ print $F " * The source files can be found through the homepage: https://dev.yorhel.nl/yxml */\n\n";
while(<>) {
s#/\*=STATES=\*/#join ",\n", map "\tYXMLS_$_", sort keys %states#e;
s#/\*=SWITCH=\*/#join "\n", map $states{$_}, sort keys %states#e;
diff --git a/yxml.c b/yxml.c
index dbbc064..10f88dc 100644
--- a/yxml.c
+++ b/yxml.c
@@ -1,4 +1,6 @@
-/* THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT! */
+/* This file is generated by yxml-gen.pl using yxml-states and yxml.c.in as input files.
+ * It is preferable to edit those files instead of this one if you want to make a change.
+ * The source files can be found through the homepage: https://dev.yorhel.nl/yxml */
/* Copyright (c) 2013-2014 Yoran Heling
diff --git a/yxml.h b/yxml.h
index f4f323d..a0cc5f9 100644
--- a/yxml.h
+++ b/yxml.h
@@ -30,7 +30,7 @@
#define inline __inline
#endif
-/* Full API documentation for this library can be found in the "yxml.pod" file
+/* Full API documentation for this library can be found in the "yxml.md" file
* in the yxml git repository, or online at http://dev.yorhel.nl/yxml/man */
typedef enum {
diff --git a/yxml.md b/yxml.md
index c23e31d..f00236a 100644
--- a/yxml.md
+++ b/yxml.md
@@ -426,3 +426,22 @@ function should **ONLY** be used directly after the `YXML_ELEMSTART`,
`yxml_parse()`, calling this function at any other time may not give the
correct results. This function should **NOT** be used on strings other than
`x->elem`, `x->attr` or `x->pi`.
+
+
+# Encoding considerations
+
+Yxml operates on bytes and assumes that the input XML document has been encoded
+in an ASCII-compatible encoding (i.e. byte values below 128 are interpreted as
+ASCII). Beyond that, yxml does **not** validate that the input XML document
+conforms to any specific encoding - in this sense it follows the
+garbage-in-garbage-out principle. All strings returned by the yxml API will
+have the same encoding as the input XML document, with the exception of
+character references outside of the ASCII character range: those are encoded
+and returned as UTF-8 strings.
+
+If character encoding matters to your application (it likely does), you should
+either make sure that the input XML document as a whole is valid UTF-8 or you
+should perform UTF-8 validation on individual strings returned by the yxml API.
+The former approach is the safest and easiest, but the latter approach allows
+you to still extract useful information out of a document with an invalid
+encoding.