From 27fa7218ca68a60cf8e7d0407951e789f25bc006 Mon Sep 17 00:00:00 2001 From: Bronek Kozicki Date: Mon, 12 Mar 2018 20:34:13 +0000 Subject: [PATCH] Do not violate UTF-8 in XML output According to UTF-8 encoding rules, there are no valid one byte long codepoints which start with a bigh bit set (i.e. 0x80 or higher value). Hence such XML output needs to be escaped, same as a control character. --- include/internal/catch_xmlwriter.cpp | 8 +++++-- .../Baselines/compact.sw.approved.txt | 2 ++ .../Baselines/console.std.approved.txt | 2 +- .../Baselines/console.sw.approved.txt | 21 ++++++++++++++++- .../SelfTest/Baselines/junit.sw.approved.txt | 3 ++- .../SelfTest/Baselines/xml.sw.approved.txt | 23 +++++++++++++++++-- .../SelfTest/IntrospectiveTests/Xml.tests.cpp | 6 ++++- 7 files changed, 57 insertions(+), 8 deletions(-) diff --git a/include/internal/catch_xmlwriter.cpp b/include/internal/catch_xmlwriter.cpp index a3316f46..f23c796d 100644 --- a/include/internal/catch_xmlwriter.cpp +++ b/include/internal/catch_xmlwriter.cpp @@ -45,10 +45,14 @@ namespace Catch { default: // Escape control chars - based on contribution by @espenalb in PR #465 and // by @mrpi PR #588 - if ( ( c >= 0 && c < '\x09' ) || ( c > '\x0D' && c < '\x20') || c=='\x7F' ) { + // Also take care to avoid violation of UTF-8 encoding rules + if ( ( c >= 0 && c < '\x09' ) + || ( c > '\x0D' && c < '\x20') + || ( c & 0x80 ) + || c=='\x7F') { // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0 os << "\\x" << std::uppercase << std::hex << std::setfill('0') << std::setw(2) - << static_cast( c ); + << static_cast( static_cast(c) ); } else os << c; diff --git a/projects/SelfTest/Baselines/compact.sw.approved.txt b/projects/SelfTest/Baselines/compact.sw.approved.txt index b02a9e0e..433c3087 100644 --- a/projects/SelfTest/Baselines/compact.sw.approved.txt +++ b/projects/SelfTest/Baselines/compact.sw.approved.txt @@ -905,6 +905,8 @@ Xml.tests.cpp:: passed: encode( stringWithQuotes, Catch::XmlEncode: "don't "quote" me on that" Xml.tests.cpp:: passed: encode( "[/x01]" ) == "[//x01]" for: "[/x01]" == "[/x01]" Xml.tests.cpp:: passed: encode( "[/x7F]" ) == "[//x7F]" for: "[/x7F]" == "[/x7F]" +Xml.tests.cpp:: passed: encode( "[/x80]" ) == "[//x80]" for: "[/x80]" == "[/x80]" +Xml.tests.cpp:: passed: encode( "[/xFF]" ) == "[//xFF]" for: "[/xFF]" == "[/xFF]" ToStringVector.tests.cpp:: passed: Catch::Detail::stringify( empty ) == "{ }" for: "{ }" == "{ }" ToStringVector.tests.cpp:: passed: Catch::Detail::stringify( oneValue ) == "{ 42 }" for: "{ 42 }" == "{ 42 }" ToStringVector.tests.cpp:: passed: Catch::Detail::stringify( twoValues ) == "{ 42, 250 }" for: "{ 42, 250 }" == "{ 42, 250 }" diff --git a/projects/SelfTest/Baselines/console.std.approved.txt b/projects/SelfTest/Baselines/console.std.approved.txt index 6df0a222..cc6f4e64 100644 --- a/projects/SelfTest/Baselines/console.std.approved.txt +++ b/projects/SelfTest/Baselines/console.std.approved.txt @@ -1085,5 +1085,5 @@ due to unexpected exception with message: =============================================================================== test cases: 202 | 149 passed | 49 failed | 4 failed as expected -assertions: 1015 | 887 passed | 107 failed | 21 failed as expected +assertions: 1017 | 889 passed | 107 failed | 21 failed as expected diff --git a/projects/SelfTest/Baselines/console.sw.approved.txt b/projects/SelfTest/Baselines/console.sw.approved.txt index 3a611b72..66069832 100644 --- a/projects/SelfTest/Baselines/console.sw.approved.txt +++ b/projects/SelfTest/Baselines/console.sw.approved.txt @@ -7112,6 +7112,25 @@ PASSED: with expansion: "[\x7F]" == "[\x7F]" +------------------------------------------------------------------------------- +XmlEncode + string with high bit set (x80 and xFF) +------------------------------------------------------------------------------- +Xml.tests.cpp: +............................................................................... + +Xml.tests.cpp:: +PASSED: + REQUIRE( encode( "[\x80]" ) == "[\\x80]" ) +with expansion: + "[\x80]" == "[\x80]" + +Xml.tests.cpp:: +PASSED: + REQUIRE( encode( "[\xFF]" ) == "[\\xFF]" ) +with expansion: + "[\xFF]" == "[\xFF]" + ------------------------------------------------------------------------------- array -> toString ------------------------------------------------------------------------------- @@ -8599,5 +8618,5 @@ PASSED: =============================================================================== test cases: 202 | 136 passed | 62 failed | 4 failed as expected -assertions: 1029 | 887 passed | 121 failed | 21 failed as expected +assertions: 1031 | 889 passed | 121 failed | 21 failed as expected diff --git a/projects/SelfTest/Baselines/junit.sw.approved.txt b/projects/SelfTest/Baselines/junit.sw.approved.txt index 3cdf5a4f..96c2e136 100644 --- a/projects/SelfTest/Baselines/junit.sw.approved.txt +++ b/projects/SelfTest/Baselines/junit.sw.approved.txt @@ -1,7 +1,7 @@ - + @@ -706,6 +706,7 @@ Exception.tests.cpp: + diff --git a/projects/SelfTest/Baselines/xml.sw.approved.txt b/projects/SelfTest/Baselines/xml.sw.approved.txt index 69d35c83..cc098d75 100644 --- a/projects/SelfTest/Baselines/xml.sw.approved.txt +++ b/projects/SelfTest/Baselines/xml.sw.approved.txt @@ -7992,6 +7992,25 @@ Message from section two +
+ + + encode( "[\x80]" ) == "[\\x80]" + + + "[\x80]" == "[\x80]" + + + + + encode( "[\xFF]" ) == "[\\xFF]" + + + "[\xFF]" == "[\xFF]" + + + +
@@ -9469,7 +9488,7 @@ loose text artifact - + - + diff --git a/projects/SelfTest/IntrospectiveTests/Xml.tests.cpp b/projects/SelfTest/IntrospectiveTests/Xml.tests.cpp index 9bbed258..d98ffcdd 100644 --- a/projects/SelfTest/IntrospectiveTests/Xml.tests.cpp +++ b/projects/SelfTest/IntrospectiveTests/Xml.tests.cpp @@ -38,4 +38,8 @@ TEST_CASE( "XmlEncode" ) { SECTION( "string with control char (x7F)" ) { REQUIRE( encode( "[\x7F]" ) == "[\\x7F]" ); } -} \ No newline at end of file + SECTION( "string with high bit set (x80 and xFF)" ) { + REQUIRE( encode( "[\x80]" ) == "[\\x80]" ); + REQUIRE( encode( "[\xFF]" ) == "[\\xFF]" ); + } +}