diff options
author | Yorhel <git@yorhel.nl> | 2020-03-07 17:25:10 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2020-03-07 17:25:10 +0100 |
commit | f9438757fc49b9f86961ddb55ae430e36bb88ebb (patch) | |
tree | 91b45ef5a36cda232dd44154509ac6dea233e3ef | |
parent | 5d96bbbef09e7e97a3ec133d837141028788c675 (diff) |
Minor doc fixes + encoding considerations section
-rwxr-xr-x | yxml-gen.pl | 4 | ||||
-rw-r--r-- | yxml.c | 4 | ||||
-rw-r--r-- | yxml.h | 2 | ||||
-rw-r--r-- | yxml.md | 19 |
4 files changed, 26 insertions, 3 deletions
diff --git a/yxml-gen.pl b/yxml-gen.pl index 589a445..ad86328 100755 --- a/yxml-gen.pl +++ b/yxml-gen.pl @@ -106,7 +106,9 @@ sub readmachine { sub writeout { local @ARGV = ($code_fn); open my $F, '>', $out_fn or die $!; - print $F "/* THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT! */\n\n"; + print $F "/* This file is generated by yxml-gen.pl using yxml-states and yxml.c.in as input files.\n"; + print $F " * It is preferable to edit those files instead of this one if you want to make a change.\n"; + print $F " * The source files can be found through the homepage: https://dev.yorhel.nl/yxml */\n\n"; while(<>) { s#/\*=STATES=\*/#join ",\n", map "\tYXMLS_$_", sort keys %states#e; s#/\*=SWITCH=\*/#join "\n", map $states{$_}, sort keys %states#e; @@ -1,4 +1,6 @@ -/* THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT! */ +/* This file is generated by yxml-gen.pl using yxml-states and yxml.c.in as input files. + * It is preferable to edit those files instead of this one if you want to make a change. + * The source files can be found through the homepage: https://dev.yorhel.nl/yxml */ /* Copyright (c) 2013-2014 Yoran Heling @@ -30,7 +30,7 @@ #define inline __inline #endif -/* Full API documentation for this library can be found in the "yxml.pod" file +/* Full API documentation for this library can be found in the "yxml.md" file * in the yxml git repository, or online at http://dev.yorhel.nl/yxml/man */ typedef enum { @@ -426,3 +426,22 @@ function should **ONLY** be used directly after the `YXML_ELEMSTART`, `yxml_parse()`, calling this function at any other time may not give the correct results. This function should **NOT** be used on strings other than `x->elem`, `x->attr` or `x->pi`. + + +# Encoding considerations + +Yxml operates on bytes and assumes that the input XML document has been encoded +in an ASCII-compatible encoding (i.e. byte values below 128 are interpreted as +ASCII). Beyond that, yxml does **not** validate that the input XML document +conforms to any specific encoding - in this sense it follows the +garbage-in-garbage-out principle. All strings returned by the yxml API will +have the same encoding as the input XML document, with the exception of +character references outside of the ASCII character range: those are encoded +and returned as UTF-8 strings. + +If character encoding matters to your application (it likely does), you should +either make sure that the input XML document as a whole is valid UTF-8 or you +should perform UTF-8 validation on individual strings returned by the yxml API. +The former approach is the safest and easiest, but the latter approach allows +you to still extract useful information out of a document with an invalid +encoding. |