Modify XML encoder to hex-encode invalid UTF-8 sequences

There are still some holes, e.g. we leave surrogate pairs be
even though they are not a part of valid UTF-8, but this might
be for the better -- WTF-8 does support surrogate pairs inside
text.

Closes #1207
This commit is contained in:
Martin Hořeňovský
2018-03-25 20:44:30 +02:00
parent e11508b48a
commit 3b801c4fda
8 changed files with 923 additions and 39 deletions

View File

@@ -905,6 +905,48 @@ Xml.tests.cpp:<line number>: passed: encode( stringWithQuotes, Catch::XmlEncode:
"don't &quot;quote&quot; me on that"
Xml.tests.cpp:<line number>: passed: encode( "[/x01]" ) == "[//x01]" for: "[/x01]" == "[/x01]"
Xml.tests.cpp:<line number>: passed: encode( "[/x7F]" ) == "[//x7F]" for: "[/x7F]" == "[/x7F]"
Xml.tests.cpp:<line number>: passed: encode(u8"Here be 👾") == u8"Here be 👾" for: "Here be 👾" == "Here be 👾"
Xml.tests.cpp:<line number>: passed: encode(u8"šš") == u8"šš" for: "šš" == "šš"
Xml.tests.cpp:<line number>: passed: encode("/xDF/xBF") == "/xDF/xBF" for: "߿" == "߿"
Xml.tests.cpp:<line number>: passed: encode("/xE0/xA0/x80") == "/xE0/xA0/x80" for: "ࠀ" == "ࠀ"
Xml.tests.cpp:<line number>: passed: encode("/xED/x9F/xBF") == "/xED/x9F/xBF" for: "퟿" == "퟿"
Xml.tests.cpp:<line number>: passed: encode("/xEE/x80/x80") == "/xEE/x80/x80" for: "" == ""
Xml.tests.cpp:<line number>: passed: encode("/xEF/xBF/xBF") == "/xEF/xBF/xBF" for: "￿" == "￿"
Xml.tests.cpp:<line number>: passed: encode("/xF0/x90/x80/x80") == "/xF0/x90/x80/x80" for: "𐀀" == "𐀀"
Xml.tests.cpp:<line number>: passed: encode("/xF4/x8F/xBF/xBF") == "/xF4/x8F/xBF/xBF" for: "􏿿" == "􏿿"
Xml.tests.cpp:<line number>: passed: encode("Here /xFF be 👾") == u8"Here //xFF be 👾" for: "Here /xFF be 👾" == "Here /xFF be 👾"
Xml.tests.cpp:<line number>: passed: encode("/xFF") == "//xFF" for: "/xFF" == "/xFF"
Xml.tests.cpp:<line number>: passed: encode("/xC5/xC5/xA0") == u8"//xC5Š" for: "/xC5Š" == "/xC5Š"
Xml.tests.cpp:<line number>: passed: encode("/xF4/x90/x80/x80") == u8"//xF4//x90//x80//x80" for: "/xF4/x90/x80/x80" == "/xF4/x90/x80/x80"
Xml.tests.cpp:<line number>: passed: encode("/xC0/x80") == u8"//xC0//x80" for: "/xC0/x80" == "/xC0/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF0/x80/x80/x80") == u8"//xF0//x80//x80//x80" for: "/xF0/x80/x80/x80" == "/xF0/x80/x80/x80"
Xml.tests.cpp:<line number>: passed: encode("/xC1/xBF") == u8"//xC1//xBF" for: "/xC1/xBF" == "/xC1/xBF"
Xml.tests.cpp:<line number>: passed: encode("/xE0/x9F/xBF") == u8"//xE0//x9F//xBF" for: "/xE0/x9F/xBF" == "/xE0/x9F/xBF"
Xml.tests.cpp:<line number>: passed: encode("/xF0/x8F/xBF/xBF") == u8"//xF0//x8F//xBF//xBF" for: "/xF0/x8F/xBF/xBF" == "/xF0/x8F/xBF/xBF"
Xml.tests.cpp:<line number>: passed: encode("/xED/xA0/x80") == "/xED/xA0/x80" for: "<22><><EFBFBD>" == "<22><><EFBFBD>"
Xml.tests.cpp:<line number>: passed: encode("/xED/xAF/xBF") == "/xED/xAF/xBF" for: "<22><><EFBFBD>" == "<22><><EFBFBD>"
Xml.tests.cpp:<line number>: passed: encode("/xED/xB0/x80") == "/xED/xB0/x80" for: "<22><><EFBFBD>" == "<22><><EFBFBD>"
Xml.tests.cpp:<line number>: passed: encode("/xED/xBF/xBF") == "/xED/xBF/xBF" for: "<22><><EFBFBD>" == "<22><><EFBFBD>"
Xml.tests.cpp:<line number>: passed: encode("/x80") == u8"//x80" for: "/x80" == "/x80"
Xml.tests.cpp:<line number>: passed: encode("/x81") == u8"//x81" for: "/x81" == "/x81"
Xml.tests.cpp:<line number>: passed: encode("/xBC") == u8"//xBC" for: "/xBC" == "/xBC"
Xml.tests.cpp:<line number>: passed: encode("/xBF") == u8"//xBF" for: "/xBF" == "/xBF"
Xml.tests.cpp:<line number>: passed: encode("/xF5/x80/x80/x80") == u8"//xF5//x80//x80//x80" for: "/xF5/x80/x80/x80" == "/xF5/x80/x80/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF6/x80/x80/x80") == u8"//xF6//x80//x80//x80" for: "/xF6/x80/x80/x80" == "/xF6/x80/x80/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF7/x80/x80/x80") == u8"//xF7//x80//x80//x80" for: "/xF7/x80/x80/x80" == "/xF7/x80/x80/x80"
Xml.tests.cpp:<line number>: passed: encode("/xDE") == u8"//xDE" for: "/xDE" == "/xDE"
Xml.tests.cpp:<line number>: passed: encode("/xDF") == u8"//xDF" for: "/xDF" == "/xDF"
Xml.tests.cpp:<line number>: passed: encode("/xE0") == u8"//xE0" for: "/xE0" == "/xE0"
Xml.tests.cpp:<line number>: passed: encode("/xEF") == u8"//xEF" for: "/xEF" == "/xEF"
Xml.tests.cpp:<line number>: passed: encode("/xF0") == u8"//xF0" for: "/xF0" == "/xF0"
Xml.tests.cpp:<line number>: passed: encode("/xF4") == u8"//xF4" for: "/xF4" == "/xF4"
Xml.tests.cpp:<line number>: passed: encode("/xE0/x80") == u8"//xE0//x80" for: "/xE0/x80" == "/xE0/x80"
Xml.tests.cpp:<line number>: passed: encode("/xE0/xBF") == u8"//xE0//xBF" for: "/xE0/xBF" == "/xE0/xBF"
Xml.tests.cpp:<line number>: passed: encode("/xE1/x80") == u8"//xE1//x80" for: "/xE1/x80" == "/xE1/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF0/x80") == u8"//xF0//x80" for: "/xF0/x80" == "/xF0/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF4/x80") == u8"//xF4//x80" for: "/xF4/x80" == "/xF4/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF0/x80/x80") == u8"//xF0//x80//x80" for: "/xF0/x80/x80" == "/xF0/x80/x80"
Xml.tests.cpp:<line number>: passed: encode("/xF4/x80/x80") == u8"//xF4//x80//x80" for: "/xF4/x80/x80" == "/xF4/x80/x80"
ToStringVector.tests.cpp:<line number>: passed: Catch::Detail::stringify( empty ) == "{ }" for: "{ }" == "{ }"
ToStringVector.tests.cpp:<line number>: passed: Catch::Detail::stringify( oneValue ) == "{ 42 }" for: "{ 42 }" == "{ 42 }"
ToStringVector.tests.cpp:<line number>: passed: Catch::Detail::stringify( twoValues ) == "{ 42, 250 }" for: "{ 42, 250 }" == "{ 42, 250 }"