Modify XML encoder to hex-encode invalid UTF-8 sequences

There are still some holes, e.g. we leave surrogate pairs be
even though they are not a part of valid UTF-8, but this might
be for the better -- WTF-8 does support surrogate pairs inside
text.

Closes #1207
This commit is contained in:
Martin Hořeňovský
2018-03-25 20:44:30 +02:00
parent e11508b48a
commit 3b801c4fda
8 changed files with 923 additions and 39 deletions

View File

@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<testsuitesloose text artifact
>
<testsuite name="<exe-name>" errors="17" failures="105" tests="1030" hostname="tbd" time="{duration}" timestamp="{iso8601-timestamp}">
<testsuite name="<exe-name>" errors="17" failures="105" tests="1072" hostname="tbd" time="{duration}" timestamp="{iso8601-timestamp}">
<testcase classname="<exe-name>.global" name="# A test name that starts with a #" time="{duration}"/>
<testcase classname="<exe-name>.global" name="#1005: Comparing pointer to int and long (NULL can be either on various systems)" time="{duration}"/>
<testcase classname="<exe-name>.global" name="#1027" time="{duration}"/>
@@ -706,6 +706,12 @@ Exception.tests.cpp:<line number>
<testcase classname="<exe-name>.global" name="XmlEncode/string with quotes" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode/string with control char (1)" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode/string with control char (x7F)" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode: UTF-8/Valid utf-8 strings" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode: UTF-8/Invalid utf-8 strings/Various broken strings" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode: UTF-8/Invalid utf-8 strings/Overlong encodings" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode: UTF-8/Invalid utf-8 strings/Surrogate pairs" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode: UTF-8/Invalid utf-8 strings/Invalid start byte" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode: UTF-8/Invalid utf-8 strings/Missing continuation byte(s)" time="{duration}"/>
<testcase classname="<exe-name>.global" name="array&lt;int, N> -> toString" time="{duration}"/>
<testcase classname="<exe-name>.global" name="atomic if" time="{duration}"/>
<testcase classname="<exe-name>.global" name="boolean member" time="{duration}"/>