Modify XML encoder to hex-encode invalid UTF-8 sequences

There are still some holes, e.g. we leave surrogate pairs be
even though they are not a part of valid UTF-8, but this might
be for the better -- WTF-8 does support surrogate pairs inside
text.

Closes #1207
This commit is contained in:
Martin Hořeňovský 2018-03-25 20:44:30 +02:00
parent e11508b48a
commit 3b801c4fda
8 changed files with 923 additions and 39 deletions

View File

@ -7,51 +7,145 @@
#include "catch_xmlwriter.h"
#include "catch_enforce.h"
#include <iomanip>
using uchar = unsigned char;
namespace Catch {
namespace {
size_t trailingBytes(unsigned char c) {
if ((c & 0xE0) == 0xC0) {
return 2;
}
if ((c & 0xF0) == 0xE0) {
return 3;
}
if ((c & 0xF8) == 0xF0) {
return 4;
}
CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
}
uint32_t headerValue(unsigned char c) {
if ((c & 0xE0) == 0xC0) {
return c & 0x1F;
}
if ((c & 0xF0) == 0xE0) {
return c & 0x0F;
}
if ((c & 0xF8) == 0xF0) {
return c & 0x07;
}
CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
}
void hexEscapeChar(std::ostream& os, unsigned char c) {
os << "\\x"
<< std::uppercase << std::hex << std::setfill('0') << std::setw(2)
<< static_cast<int>(c);
}
} // anonymous namespace
XmlEncode::XmlEncode( std::string const& str, ForWhat forWhat )
: m_str( str ),
m_forWhat( forWhat )
{}
void XmlEncode::encodeTo( std::ostream& os ) const {
// Apostrophe escaping not necessary if we always use " to write attributes
// (see: http://www.w3.org/TR/xml/#syntax)
for( std::size_t i = 0; i < m_str.size(); ++ i ) {
char c = m_str[i];
switch( c ) {
case '<': os << "&lt;"; break;
case '&': os << "&amp;"; break;
for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) {
uchar c = m_str[idx];
switch (c) {
case '<': os << "&lt;"; break;
case '&': os << "&amp;"; break;
case '>':
// See: http://www.w3.org/TR/xml/#syntax
if( i > 2 && m_str[i-1] == ']' && m_str[i-2] == ']' )
os << "&gt;";
else
os << c;
case '>':
// See: http://www.w3.org/TR/xml/#syntax
if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']')
os << "&gt;";
else
os << c;
break;
case '\"':
if (m_forWhat == ForAttributes)
os << "&quot;";
else
os << c;
break;
default:
// Check for control characters and invalid utf-8
// Escape control characters in standard ascii
// see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0
if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) {
hexEscapeChar(os, c);
break;
}
case '\"':
if( m_forWhat == ForAttributes )
os << "&quot;";
else
os << c;
// Plain ASCII: Write it to stream
if (c < 0x7F) {
os << c;
break;
}
default:
// Escape control chars - based on contribution by @espenalb in PR #465 and
// by @mrpi PR #588
if ( ( c >= 0 && c < '\x09' ) || ( c > '\x0D' && c < '\x20') || c=='\x7F' ) {
// see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0
os << "\\x" << std::uppercase << std::hex << std::setfill('0') << std::setw(2)
<< static_cast<int>( c );
}
else
os << c;
// UTF-8 territory
// Check if the encoding is valid and if it is not, hex escape bytes.
// Important: We do not check the exact decoded values for validity, only the encoding format
// First check that this bytes is a valid lead byte:
// This means that it is not encoded as 1111 1XXX
// Or as 10XX XXXX
if (c < 0xC0 ||
c >= 0xF8) {
hexEscapeChar(os, c);
break;
}
auto encBytes = trailingBytes(c);
// Are there enough bytes left to avoid accessing out-of-bounds memory?
if (idx + encBytes - 1 >= m_str.size()) {
hexEscapeChar(os, c);
break;
}
// The header is valid, check data
// The next encBytes bytes must together be a valid utf-8
// This means: bitpattern 10XX XXXX and the extracted value is sane (ish)
bool valid = true;
uint32_t value = headerValue(c);
for (std::size_t n = 1; n < encBytes; ++n) {
uchar nc = m_str[idx + n];
valid &= ((nc & 0xC0) == 0x80);
value = (value << 6) | (nc & 0x3F);
}
if (
// Wrong bit pattern of following bytes
(!valid) ||
// Overlong encodings
(value < 0x80) ||
(0x80 <= value && value < 0x800 && encBytes > 2) ||
(0x800 < value && value < 0x10000 && encBytes > 3) ||
// Encoded value out of range
(value >= 0x110000)
) {
hexEscapeChar(os, c);
break;
}
// If we got here, this is in fact a valid(ish) utf-8 sequence
for (std::size_t n = 0; n < encBytes; ++n) {
os << m_str[idx + n];
}
idx += encBytes - 1;
break;
}
}
}

View File

@ -56,7 +56,7 @@ namespace Catch {
XmlWriter( std::ostream& os = Catch::cout() );
~XmlWriter();
XmlWriter( XmlWriter const& ) = delete;
XmlWriter& operator=( XmlWriter const& ) = delete;

View File

@ -905,6 +905,48 @@ Xml.tests.cpp:<line number>: passed: encode( stringWithQuotes, Catch::XmlEncode:
"don't &quot;quote&quot; me on that"
Xml.tests.cpp:<line number>: passed: encode( "[/x01]" ) == "[//x01]" for: "[/x01]" == "[/x01]"
Xml.tests.cpp:<line number>: passed: encode( "[/x7F]" ) == "[//x7F]" for: "[/x7F]" == "[/x7F]"
Xml.tests.cpp:<line number>: passed: encode(u8"Here be 👾") == u8"Here be 👾" for: "Here be 👾" == "Here be 👾"
Xml.tests.cpp:<line number>: passed: encode(u8"šš") == u8"šš" for: "šš" == "šš"
Xml.tests.cpp:<line number>: passed: encode("/xDF/xBF") == "/xDF/xBF" for: "߿" == "߿"
Xml.tests.cpp:<line number>: passed: encode("/xE0/xA0/x80") == "/xE0/xA0/x80" for: "ࠀ" == "ࠀ"
Xml.tests.cpp:<line number>: passed: encode("/xED/x9F/xBF") == "/xED/x9F/xBF" for: "퟿" == "퟿"
Xml.tests.cpp:<line number>: passed: encode("/xEE/x80/x80") == "/xEE/x80/x80" for: "" == ""
Xml.tests.cpp:<line number>: passed: encode("/xEF/xBF/xBF") == "/xEF/xBF/xBF" for: "￿" == "￿"
Xml.tests.cpp:<line number>: passed: encode("/xF0/x90/x80/x80") == "/xF0/x90/x80/x80" for: "𐀀" == "𐀀"
Xml.tests.cpp:<line number>: passed: encode("/xF4/x8F/xBF/xBF") == "/xF4/x8F/xBF/xBF" for: "􏿿" == "􏿿"
Xml.tests.cpp:<line number>: passed: encode("Here /xFF be 👾") == u8"Here //xFF be 👾" for: "Here /xFF be 👾" == "Here /xFF be 👾"
Xml.tests.cpp:<line number>: passed: encode("/xFF") == "//xFF" for: "/xFF" == "/xFF"
Xml.tests.cpp:<line number>: passed: encode("/xC5/xC5/xA0") == u8"//xC5Š" for: "/xC5Š" == "/xC5Š"
Xml.tests.cpp:<line number>: passed: encode("/xF4/x90/x80/x80") == u8"//xF4//x90//x80//x80" for: "/xF4/x90/x80/x80" == "/xF4/x90/x80/x80"
Xml.tests.cpp:<line number>: passed: encode("/xC0/x80") == u8"//xC0//x80" for: "/xC0/x80" == "/xC0/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF0/x80/x80/x80") == u8"//xF0//x80//x80//x80" for: "/xF0/x80/x80/x80" == "/xF0/x80/x80/x80"
Xml.tests.cpp:<line number>: passed: encode("/xC1/xBF") == u8"//xC1//xBF" for: "/xC1/xBF" == "/xC1/xBF"
Xml.tests.cpp:<line number>: passed: encode("/xE0/x9F/xBF") == u8"//xE0//x9F//xBF" for: "/xE0/x9F/xBF" == "/xE0/x9F/xBF"
Xml.tests.cpp:<line number>: passed: encode("/xF0/x8F/xBF/xBF") == u8"//xF0//x8F//xBF//xBF" for: "/xF0/x8F/xBF/xBF" == "/xF0/x8F/xBF/xBF"
Xml.tests.cpp:<line number>: passed: encode("/xED/xA0/x80") == "/xED/xA0/x80" for: "<22><><EFBFBD>" == "<22><><EFBFBD>"
Xml.tests.cpp:<line number>: passed: encode("/xED/xAF/xBF") == "/xED/xAF/xBF" for: "<22><><EFBFBD>" == "<22><><EFBFBD>"
Xml.tests.cpp:<line number>: passed: encode("/xED/xB0/x80") == "/xED/xB0/x80" for: "<22><><EFBFBD>" == "<22><><EFBFBD>"
Xml.tests.cpp:<line number>: passed: encode("/xED/xBF/xBF") == "/xED/xBF/xBF" for: "<22><><EFBFBD>" == "<22><><EFBFBD>"
Xml.tests.cpp:<line number>: passed: encode("/x80") == u8"//x80" for: "/x80" == "/x80"
Xml.tests.cpp:<line number>: passed: encode("/x81") == u8"//x81" for: "/x81" == "/x81"
Xml.tests.cpp:<line number>: passed: encode("/xBC") == u8"//xBC" for: "/xBC" == "/xBC"
Xml.tests.cpp:<line number>: passed: encode("/xBF") == u8"//xBF" for: "/xBF" == "/xBF"
Xml.tests.cpp:<line number>: passed: encode("/xF5/x80/x80/x80") == u8"//xF5//x80//x80//x80" for: "/xF5/x80/x80/x80" == "/xF5/x80/x80/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF6/x80/x80/x80") == u8"//xF6//x80//x80//x80" for: "/xF6/x80/x80/x80" == "/xF6/x80/x80/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF7/x80/x80/x80") == u8"//xF7//x80//x80//x80" for: "/xF7/x80/x80/x80" == "/xF7/x80/x80/x80"
Xml.tests.cpp:<line number>: passed: encode("/xDE") == u8"//xDE" for: "/xDE" == "/xDE"
Xml.tests.cpp:<line number>: passed: encode("/xDF") == u8"//xDF" for: "/xDF" == "/xDF"
Xml.tests.cpp:<line number>: passed: encode("/xE0") == u8"//xE0" for: "/xE0" == "/xE0"
Xml.tests.cpp:<line number>: passed: encode("/xEF") == u8"//xEF" for: "/xEF" == "/xEF"
Xml.tests.cpp:<line number>: passed: encode("/xF0") == u8"//xF0" for: "/xF0" == "/xF0"
Xml.tests.cpp:<line number>: passed: encode("/xF4") == u8"//xF4" for: "/xF4" == "/xF4"
Xml.tests.cpp:<line number>: passed: encode("/xE0/x80") == u8"//xE0//x80" for: "/xE0/x80" == "/xE0/x80"
Xml.tests.cpp:<line number>: passed: encode("/xE0/xBF") == u8"//xE0//xBF" for: "/xE0/xBF" == "/xE0/xBF"
Xml.tests.cpp:<line number>: passed: encode("/xE1/x80") == u8"//xE1//x80" for: "/xE1/x80" == "/xE1/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF0/x80") == u8"//xF0//x80" for: "/xF0/x80" == "/xF0/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF4/x80") == u8"//xF4//x80" for: "/xF4/x80" == "/xF4/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF0/x80/x80") == u8"//xF0//x80//x80" for: "/xF0/x80/x80" == "/xF0/x80/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF4/x80/x80") == u8"//xF4//x80//x80" for: "/xF4/x80/x80" == "/xF4/x80/x80"
ToStringVector.tests.cpp:<line number>: passed: Catch::Detail::stringify( empty ) == "{ }" for: "{ }" == "{ }"
ToStringVector.tests.cpp:<line number>: passed: Catch::Detail::stringify( oneValue ) == "{ 42 }" for: "{ 42 }" == "{ 42 }"
ToStringVector.tests.cpp:<line number>: passed: Catch::Detail::stringify( twoValues ) == "{ 42, 250 }" for: "{ 42, 250 }" == "{ 42, 250 }"

View File

@ -1084,6 +1084,6 @@ due to unexpected exception with message:
Why would you throw a std::string?
===============================================================================
test cases: 202 | 149 passed | 49 failed | 4 failed as expected
assertions: 1015 | 887 passed | 107 failed | 21 failed as expected
test cases: 203 | 150 passed | 49 failed | 4 failed as expected
assertions: 1057 | 929 passed | 107 failed | 21 failed as expected

View File

@ -7112,6 +7112,305 @@ PASSED:
with expansion:
"[\x7F]" == "[\x7F]"
-------------------------------------------------------------------------------
XmlEncode: UTF-8
Valid utf-8 strings
-------------------------------------------------------------------------------
Xml.tests.cpp:<line number>
...............................................................................
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode(u8"Here be 👾") == u8"Here be 👾" )
with expansion:
"Here be 👾" == "Here be 👾"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode(u8"šš") == u8"šš" )
with expansion:
"šš" == "šš"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xDF\xBF") == "\xDF\xBF" )
with expansion:
"߿" == "߿"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xE0\xA0\x80") == "\xE0\xA0\x80" )
with expansion:
"ࠀ" == "ࠀ"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xED\x9F\xBF") == "\xED\x9F\xBF" )
with expansion:
"퟿" == "퟿"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xEE\x80\x80") == "\xEE\x80\x80" )
with expansion:
"" == ""
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xEF\xBF\xBF") == "\xEF\xBF\xBF" )
with expansion:
"￿" == "￿"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF0\x90\x80\x80") == "\xF0\x90\x80\x80" )
with expansion:
"𐀀" == "𐀀"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF4\x8F\xBF\xBF") == "\xF4\x8F\xBF\xBF" )
with expansion:
"􏿿" == "􏿿"
-------------------------------------------------------------------------------
XmlEncode: UTF-8
Invalid utf-8 strings
Various broken strings
-------------------------------------------------------------------------------
Xml.tests.cpp:<line number>
...............................................................................
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("Here \xFF be 👾") == u8"Here \\xFF be 👾" )
with expansion:
"Here \xFF be 👾" == "Here \xFF be 👾"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xFF") == "\\xFF" )
with expansion:
"\xFF" == "\xFF"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xC5\xC5\xA0") == u8"\\xC5Š" )
with expansion:
"\xC5Š" == "\xC5Š"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF4\x90\x80\x80") == u8"\\xF4\\x90\\x80\\x80" )
with expansion:
"\xF4\x90\x80\x80" == "\xF4\x90\x80\x80"
-------------------------------------------------------------------------------
XmlEncode: UTF-8
Invalid utf-8 strings
Overlong encodings
-------------------------------------------------------------------------------
Xml.tests.cpp:<line number>
...............................................................................
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xC0\x80") == u8"\\xC0\\x80" )
with expansion:
"\xC0\x80" == "\xC0\x80"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF0\x80\x80\x80") == u8"\\xF0\\x80\\x80\\x80" )
with expansion:
"\xF0\x80\x80\x80" == "\xF0\x80\x80\x80"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xC1\xBF") == u8"\\xC1\\xBF" )
with expansion:
"\xC1\xBF" == "\xC1\xBF"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xE0\x9F\xBF") == u8"\\xE0\\x9F\\xBF" )
with expansion:
"\xE0\x9F\xBF" == "\xE0\x9F\xBF"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF0\x8F\xBF\xBF") == u8"\\xF0\\x8F\\xBF\\xBF" )
with expansion:
"\xF0\x8F\xBF\xBF" == "\xF0\x8F\xBF\xBF"
-------------------------------------------------------------------------------
XmlEncode: UTF-8
Invalid utf-8 strings
Surrogate pairs
-------------------------------------------------------------------------------
Xml.tests.cpp:<line number>
...............................................................................
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xED\xA0\x80") == "\xED\xA0\x80" )
with expansion:
"<22><><EFBFBD>" == "<22><><EFBFBD>"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xED\xAF\xBF") == "\xED\xAF\xBF" )
with expansion:
"<22><><EFBFBD>" == "<22><><EFBFBD>"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xED\xB0\x80") == "\xED\xB0\x80" )
with expansion:
"<22><><EFBFBD>" == "<22><><EFBFBD>"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xED\xBF\xBF") == "\xED\xBF\xBF" )
with expansion:
"<22><><EFBFBD>" == "<22><><EFBFBD>"
-------------------------------------------------------------------------------
XmlEncode: UTF-8
Invalid utf-8 strings
Invalid start byte
-------------------------------------------------------------------------------
Xml.tests.cpp:<line number>
...............................................................................
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\x80") == u8"\\x80" )
with expansion:
"\x80" == "\x80"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\x81") == u8"\\x81" )
with expansion:
"\x81" == "\x81"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xBC") == u8"\\xBC" )
with expansion:
"\xBC" == "\xBC"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xBF") == u8"\\xBF" )
with expansion:
"\xBF" == "\xBF"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF5\x80\x80\x80") == u8"\\xF5\\x80\\x80\\x80" )
with expansion:
"\xF5\x80\x80\x80" == "\xF5\x80\x80\x80"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF6\x80\x80\x80") == u8"\\xF6\\x80\\x80\\x80" )
with expansion:
"\xF6\x80\x80\x80" == "\xF6\x80\x80\x80"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF7\x80\x80\x80") == u8"\\xF7\\x80\\x80\\x80" )
with expansion:
"\xF7\x80\x80\x80" == "\xF7\x80\x80\x80"
-------------------------------------------------------------------------------
XmlEncode: UTF-8
Invalid utf-8 strings
Missing continuation byte(s)
-------------------------------------------------------------------------------
Xml.tests.cpp:<line number>
...............................................................................
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xDE") == u8"\\xDE" )
with expansion:
"\xDE" == "\xDE"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xDF") == u8"\\xDF" )
with expansion:
"\xDF" == "\xDF"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xE0") == u8"\\xE0" )
with expansion:
"\xE0" == "\xE0"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xEF") == u8"\\xEF" )
with expansion:
"\xEF" == "\xEF"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF0") == u8"\\xF0" )
with expansion:
"\xF0" == "\xF0"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF4") == u8"\\xF4" )
with expansion:
"\xF4" == "\xF4"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xE0\x80") == u8"\\xE0\\x80" )
with expansion:
"\xE0\x80" == "\xE0\x80"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xE0\xBF") == u8"\\xE0\\xBF" )
with expansion:
"\xE0\xBF" == "\xE0\xBF"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xE1\x80") == u8"\\xE1\\x80" )
with expansion:
"\xE1\x80" == "\xE1\x80"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF0\x80") == u8"\\xF0\\x80" )
with expansion:
"\xF0\x80" == "\xF0\x80"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF4\x80") == u8"\\xF4\\x80" )
with expansion:
"\xF4\x80" == "\xF4\x80"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF0\x80\x80") == u8"\\xF0\\x80\\x80" )
with expansion:
"\xF0\x80\x80" == "\xF0\x80\x80"
Xml.tests.cpp:<line number>:
PASSED:
CHECK( encode("\xF4\x80\x80") == u8"\\xF4\\x80\\x80" )
with expansion:
"\xF4\x80\x80" == "\xF4\x80\x80"
-------------------------------------------------------------------------------
array<int, N> -> toString
-------------------------------------------------------------------------------
@ -8598,6 +8897,6 @@ Misc.tests.cpp:<line number>:
PASSED:
===============================================================================
test cases: 202 | 136 passed | 62 failed | 4 failed as expected
assertions: 1029 | 887 passed | 121 failed | 21 failed as expected
test cases: 203 | 137 passed | 62 failed | 4 failed as expected
assertions: 1071 | 929 passed | 121 failed | 21 failed as expected

View File

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<testsuitesloose text artifact
>
<testsuite name="<exe-name>" errors="17" failures="105" tests="1030" hostname="tbd" time="{duration}" timestamp="{iso8601-timestamp}">
<testsuite name="<exe-name>" errors="17" failures="105" tests="1072" hostname="tbd" time="{duration}" timestamp="{iso8601-timestamp}">
<testcase classname="<exe-name>.global" name="# A test name that starts with a #" time="{duration}"/>
<testcase classname="<exe-name>.global" name="#1005: Comparing pointer to int and long (NULL can be either on various systems)" time="{duration}"/>
<testcase classname="<exe-name>.global" name="#1027" time="{duration}"/>
@ -706,6 +706,12 @@ Exception.tests.cpp:<line number>
<testcase classname="<exe-name>.global" name="XmlEncode/string with quotes" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode/string with control char (1)" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode/string with control char (x7F)" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode: UTF-8/Valid utf-8 strings" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode: UTF-8/Invalid utf-8 strings/Various broken strings" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode: UTF-8/Invalid utf-8 strings/Overlong encodings" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode: UTF-8/Invalid utf-8 strings/Surrogate pairs" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode: UTF-8/Invalid utf-8 strings/Invalid start byte" time="{duration}"/>
<testcase classname="<exe-name>.global" name="XmlEncode: UTF-8/Invalid utf-8 strings/Missing continuation byte(s)" time="{duration}"/>
<testcase classname="<exe-name>.global" name="array&lt;int, N> -> toString" time="{duration}"/>
<testcase classname="<exe-name>.global" name="atomic if" time="{duration}"/>
<testcase classname="<exe-name>.global" name="boolean member" time="{duration}"/>

View File

@ -7881,7 +7881,7 @@ Message from section two
<TestCase name="X/level/1/b" tags="[Tricky]" filename="projects/<exe-name>/UsageTests/Tricky.tests.cpp" >
<OverallResult success="true"/>
</TestCase>
<TestCase name="XmlEncode" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<TestCase name="XmlEncode" tags="[XML]" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Section name="normal string" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Expression success="true" type="REQUIRE" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
@ -7994,6 +7994,378 @@ Message from section two
</Section>
<OverallResult success="true"/>
</TestCase>
<TestCase name="XmlEncode: UTF-8" tags="[UTF-8][XML]" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Section name="Valid utf-8 strings" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode(u8"Here be 👾") == u8"Here be 👾"
</Original>
<Expanded>
"Here be 👾" == "Here be 👾"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode(u8"šš") == u8"šš"
</Original>
<Expanded>
"šš" == "šš"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xDF\xBF") == "\xDF\xBF"
</Original>
<Expanded>
"߿" == "߿"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xE0\xA0\x80") == "\xE0\xA0\x80"
</Original>
<Expanded>
"ࠀ" == "ࠀ"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xED\x9F\xBF") == "\xED\x9F\xBF"
</Original>
<Expanded>
"퟿" == "퟿"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xEE\x80\x80") == "\xEE\x80\x80"
</Original>
<Expanded>
"" == ""
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xEF\xBF\xBF") == "\xEF\xBF\xBF"
</Original>
<Expanded>
"￿" == "￿"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF0\x90\x80\x80") == "\xF0\x90\x80\x80"
</Original>
<Expanded>
"𐀀" == "𐀀"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF4\x8F\xBF\xBF") == "\xF4\x8F\xBF\xBF"
</Original>
<Expanded>
"􏿿" == "􏿿"
</Expanded>
</Expression>
<OverallResults successes="9" failures="0" expectedFailures="0"/>
</Section>
<Section name="Invalid utf-8 strings" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Section name="Various broken strings" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("Here \xFF be 👾") == u8"Here \\xFF be 👾"
</Original>
<Expanded>
"Here \xFF be 👾" == "Here \xFF be 👾"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xFF") == "\\xFF"
</Original>
<Expanded>
"\xFF" == "\xFF"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xC5\xC5\xA0") == u8"\\xC5Š"
</Original>
<Expanded>
"\xC5Š" == "\xC5Š"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF4\x90\x80\x80") == u8"\\xF4\\x90\\x80\\x80"
</Original>
<Expanded>
"\xF4\x90\x80\x80" == "\xF4\x90\x80\x80"
</Expanded>
</Expression>
<OverallResults successes="4" failures="0" expectedFailures="0"/>
</Section>
<OverallResults successes="4" failures="0" expectedFailures="0"/>
</Section>
<Section name="Invalid utf-8 strings" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Section name="Overlong encodings" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xC0\x80") == u8"\\xC0\\x80"
</Original>
<Expanded>
"\xC0\x80" == "\xC0\x80"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF0\x80\x80\x80") == u8"\\xF0\\x80\\x80\\x80"
</Original>
<Expanded>
"\xF0\x80\x80\x80" == "\xF0\x80\x80\x80"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xC1\xBF") == u8"\\xC1\\xBF"
</Original>
<Expanded>
"\xC1\xBF" == "\xC1\xBF"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xE0\x9F\xBF") == u8"\\xE0\\x9F\\xBF"
</Original>
<Expanded>
"\xE0\x9F\xBF" == "\xE0\x9F\xBF"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF0\x8F\xBF\xBF") == u8"\\xF0\\x8F\\xBF\\xBF"
</Original>
<Expanded>
"\xF0\x8F\xBF\xBF" == "\xF0\x8F\xBF\xBF"
</Expanded>
</Expression>
<OverallResults successes="5" failures="0" expectedFailures="0"/>
</Section>
<OverallResults successes="5" failures="0" expectedFailures="0"/>
</Section>
<Section name="Invalid utf-8 strings" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Section name="Surrogate pairs" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xED\xA0\x80") == "\xED\xA0\x80"
</Original>
<Expanded>
"<22><><EFBFBD>" == "<22><><EFBFBD>"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xED\xAF\xBF") == "\xED\xAF\xBF"
</Original>
<Expanded>
"<22><><EFBFBD>" == "<22><><EFBFBD>"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xED\xB0\x80") == "\xED\xB0\x80"
</Original>
<Expanded>
"<22><><EFBFBD>" == "<22><><EFBFBD>"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xED\xBF\xBF") == "\xED\xBF\xBF"
</Original>
<Expanded>
"<22><><EFBFBD>" == "<22><><EFBFBD>"
</Expanded>
</Expression>
<OverallResults successes="4" failures="0" expectedFailures="0"/>
</Section>
<OverallResults successes="4" failures="0" expectedFailures="0"/>
</Section>
<Section name="Invalid utf-8 strings" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Section name="Invalid start byte" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\x80") == u8"\\x80"
</Original>
<Expanded>
"\x80" == "\x80"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\x81") == u8"\\x81"
</Original>
<Expanded>
"\x81" == "\x81"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xBC") == u8"\\xBC"
</Original>
<Expanded>
"\xBC" == "\xBC"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xBF") == u8"\\xBF"
</Original>
<Expanded>
"\xBF" == "\xBF"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF5\x80\x80\x80") == u8"\\xF5\\x80\\x80\\x80"
</Original>
<Expanded>
"\xF5\x80\x80\x80" == "\xF5\x80\x80\x80"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF6\x80\x80\x80") == u8"\\xF6\\x80\\x80\\x80"
</Original>
<Expanded>
"\xF6\x80\x80\x80" == "\xF6\x80\x80\x80"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF7\x80\x80\x80") == u8"\\xF7\\x80\\x80\\x80"
</Original>
<Expanded>
"\xF7\x80\x80\x80" == "\xF7\x80\x80\x80"
</Expanded>
</Expression>
<OverallResults successes="7" failures="0" expectedFailures="0"/>
</Section>
<OverallResults successes="7" failures="0" expectedFailures="0"/>
</Section>
<Section name="Invalid utf-8 strings" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Section name="Missing continuation byte(s)" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xDE") == u8"\\xDE"
</Original>
<Expanded>
"\xDE" == "\xDE"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xDF") == u8"\\xDF"
</Original>
<Expanded>
"\xDF" == "\xDF"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xE0") == u8"\\xE0"
</Original>
<Expanded>
"\xE0" == "\xE0"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xEF") == u8"\\xEF"
</Original>
<Expanded>
"\xEF" == "\xEF"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF0") == u8"\\xF0"
</Original>
<Expanded>
"\xF0" == "\xF0"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF4") == u8"\\xF4"
</Original>
<Expanded>
"\xF4" == "\xF4"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xE0\x80") == u8"\\xE0\\x80"
</Original>
<Expanded>
"\xE0\x80" == "\xE0\x80"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xE0\xBF") == u8"\\xE0\\xBF"
</Original>
<Expanded>
"\xE0\xBF" == "\xE0\xBF"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xE1\x80") == u8"\\xE1\\x80"
</Original>
<Expanded>
"\xE1\x80" == "\xE1\x80"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF0\x80") == u8"\\xF0\\x80"
</Original>
<Expanded>
"\xF0\x80" == "\xF0\x80"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF4\x80") == u8"\\xF4\\x80"
</Original>
<Expanded>
"\xF4\x80" == "\xF4\x80"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF0\x80\x80") == u8"\\xF0\\x80\\x80"
</Original>
<Expanded>
"\xF0\x80\x80" == "\xF0\x80\x80"
</Expanded>
</Expression>
<Expression success="true" type="CHECK" filename="projects/<exe-name>/IntrospectiveTests/Xml.tests.cpp" >
<Original>
encode("\xF4\x80\x80") == u8"\\xF4\\x80\\x80"
</Original>
<Expanded>
"\xF4\x80\x80" == "\xF4\x80\x80"
</Expanded>
</Expression>
<OverallResults successes="13" failures="0" expectedFailures="0"/>
</Section>
<OverallResults successes="13" failures="0" expectedFailures="0"/>
</Section>
<OverallResult success="true"/>
</TestCase>
<TestCase name="array&lt;int, N> -> toString" tags="[array][containers][toString]" filename="projects/<exe-name>/UsageTests/ToStringVector.tests.cpp" >
<Expression success="true" type="REQUIRE" filename="projects/<exe-name>/UsageTests/ToStringVector.tests.cpp" >
<Original>
@ -9469,7 +9841,7 @@ loose text artifact
</Section>
<OverallResult success="true"/>
</TestCase>
<OverallResults successes="887" failures="122" expectedFailures="21"/>
<OverallResults successes="929" failures="122" expectedFailures="21"/>
</Group>
<OverallResults successes="887" failures="121" expectedFailures="21"/>
<OverallResults successes="929" failures="121" expectedFailures="21"/>
</Catch>

View File

@ -1,5 +1,4 @@
#include "catch.hpp"
#include "internal/catch_xmlwriter.h"
#include <sstream>
@ -10,7 +9,7 @@ inline std::string encode( std::string const& str, Catch::XmlEncode::ForWhat for
return oss.str();
}
TEST_CASE( "XmlEncode" ) {
TEST_CASE( "XmlEncode", "[XML]" ) {
SECTION( "normal string" ) {
REQUIRE( encode( "normal string" ) == "normal string" );
}
@ -38,4 +37,76 @@ TEST_CASE( "XmlEncode" ) {
SECTION( "string with control char (x7F)" ) {
REQUIRE( encode( "[\x7F]" ) == "[\\x7F]" );
}
}
}
// Thanks to Peter Bindels (dascandy) for some of the tests
TEST_CASE("XmlEncode: UTF-8", "[XML][UTF-8]") {
SECTION("Valid utf-8 strings") {
CHECK(encode(u8"Here be 👾") == u8"Here be 👾");
CHECK(encode(u8"šš") == u8"šš");
CHECK(encode("\xDF\xBF") == "\xDF\xBF"); // 0x7FF
CHECK(encode("\xE0\xA0\x80") == "\xE0\xA0\x80"); // 0x800
CHECK(encode("\xED\x9F\xBF") == "\xED\x9F\xBF"); // 0xD7FF
CHECK(encode("\xEE\x80\x80") == "\xEE\x80\x80"); // 0xE000
CHECK(encode("\xEF\xBF\xBF") == "\xEF\xBF\xBF"); // 0xFFFF
CHECK(encode("\xF0\x90\x80\x80") == "\xF0\x90\x80\x80"); // 0x10000
CHECK(encode("\xF4\x8F\xBF\xBF") == "\xF4\x8F\xBF\xBF"); // 0x10FFFF
}
SECTION("Invalid utf-8 strings") {
SECTION("Various broken strings") {
CHECK(encode("Here \xFF be 👾") == u8"Here \\xFF be 👾");
CHECK(encode("\xFF") == "\\xFF");
CHECK(encode("\xC5\xC5\xA0") == u8"\\xC5Š");
CHECK(encode("\xF4\x90\x80\x80") == u8"\\xF4\\x90\\x80\\x80"); // 0x110000 -- out of unicode range
}
SECTION("Overlong encodings") {
CHECK(encode("\xC0\x80") == u8"\\xC0\\x80"); // \0
CHECK(encode("\xF0\x80\x80\x80") == u8"\\xF0\\x80\\x80\\x80"); // Super-over-long \0
CHECK(encode("\xC1\xBF") == u8"\\xC1\\xBF"); // ASCII char as UTF-8 (0x7F)
CHECK(encode("\xE0\x9F\xBF") == u8"\\xE0\\x9F\\xBF"); // 0x7FF
CHECK(encode("\xF0\x8F\xBF\xBF") == u8"\\xF0\\x8F\\xBF\\xBF"); // 0xFFFF
}
// Note that we actually don't modify surrogate pairs, as we do not do strict checking
SECTION("Surrogate pairs") {
CHECK(encode("\xED\xA0\x80") == "\xED\xA0\x80"); // Invalid surrogate half 0xD800
CHECK(encode("\xED\xAF\xBF") == "\xED\xAF\xBF"); // Invalid surrogate half 0xDBFF
CHECK(encode("\xED\xB0\x80") == "\xED\xB0\x80"); // Invalid surrogate half 0xDC00
CHECK(encode("\xED\xBF\xBF") == "\xED\xBF\xBF"); // Invalid surrogate half 0xDFFF
}
SECTION("Invalid start byte") {
CHECK(encode("\x80") == u8"\\x80");
CHECK(encode("\x81") == u8"\\x81");
CHECK(encode("\xBC") == u8"\\xBC");
CHECK(encode("\xBF") == u8"\\xBF");
// Out of range
CHECK(encode("\xF5\x80\x80\x80") == u8"\\xF5\\x80\\x80\\x80");
CHECK(encode("\xF6\x80\x80\x80") == u8"\\xF6\\x80\\x80\\x80");
CHECK(encode("\xF7\x80\x80\x80") == u8"\\xF7\\x80\\x80\\x80");
}
SECTION("Missing continuation byte(s)") {
// Missing first continuation byte
CHECK(encode("\xDE") == u8"\\xDE");
CHECK(encode("\xDF") == u8"\\xDF");
CHECK(encode("\xE0") == u8"\\xE0");
CHECK(encode("\xEF") == u8"\\xEF");
CHECK(encode("\xF0") == u8"\\xF0");
CHECK(encode("\xF4") == u8"\\xF4");
// Missing second continuation byte
CHECK(encode("\xE0\x80") == u8"\\xE0\\x80");
CHECK(encode("\xE0\xBF") == u8"\\xE0\\xBF");
CHECK(encode("\xE1\x80") == u8"\\xE1\\x80");
CHECK(encode("\xF0\x80") == u8"\\xF0\\x80");
CHECK(encode("\xF4\x80") == u8"\\xF4\\x80");
// Missing third continuation byte
CHECK(encode("\xF0\x80\x80") == u8"\\xF0\\x80\\x80");
CHECK(encode("\xF4\x80\x80") == u8"\\xF4\\x80\\x80");
}
}
}