Improve performance of writing JSON values

The old code was exceedingly simple, as it went char-by-char and
decided whether to write it to the output stream as-is, or escaped.
This caused a _lot_ of stream writes of individual characters.

The new code instead looks for characters that need escaping, and
bulk-writes the non-escaped characters in between them. This leads
to about the same performance for strings that comprise of only
escaped characters, and 3-10x improvement for strings without any
escaping needed.

In practice, we should expect the former rather than the latter,
but this is still nice improvement.
This commit is contained in:
Martin Hořeňovský
2025-08-21 23:36:22 +02:00
parent 78a9518a28
commit fb2e4fbe41
2 changed files with 66 additions and 23 deletions

View File

@@ -7,8 +7,36 @@
// SPDX-License-Identifier: BSL-1.0
#include <catch2/internal/catch_enforce.hpp>
#include <catch2/internal/catch_jsonwriter.hpp>
#include <catch2/internal/catch_unreachable.hpp>
namespace Catch {
namespace {
static bool needsEscape( char c ) {
return c == '"' || c == '\\' || c == '\b' || c == '\f' ||
c == '\n' || c == '\r' || c == '\t';
}
static Catch::StringRef makeEscapeStringRef( char c ) {
if ( c == '"' ) {
return "\\\""_sr;
} else if ( c == '\\' ) {
return "\\\\"_sr;
} else if ( c == '\b' ) {
return "\\b"_sr;
} else if ( c == '\f' ) {
return "\\f"_sr;
} else if ( c == '\n' ) {
return "\\n"_sr;
} else if ( c == '\r' ) {
return "\\r"_sr;
} else if ( c == '\t' ) {
return "\\t"_sr;
}
Catch::Detail::Unreachable();
}
} // namespace
void JsonUtils::indent( std::ostream& os, std::uint64_t level ) {
for ( std::uint64_t i = 0; i < level; ++i ) {
os << " ";
@@ -118,30 +146,19 @@ namespace Catch {
void JsonValueWriter::writeImpl( Catch::StringRef value, bool quote ) {
if ( quote ) { m_os << '"'; }
for (char c : value) {
// Escape list taken from https://www.json.org/json-en.html,
// string definition.
// Note that while forward slash _can_ be escaped, it does
// not have to be, if JSON is not further embedded somewhere
// where forward slash is meaningful.
if ( c == '"' ) {
m_os << "\\\"";
} else if ( c == '\\' ) {
m_os << "\\\\";
} else if ( c == '\b' ) {
m_os << "\\b";
} else if ( c == '\f' ) {
m_os << "\\f";
} else if ( c == '\n' ) {
m_os << "\\n";
} else if ( c == '\r' ) {
m_os << "\\r";
} else if ( c == '\t' ) {
m_os << "\\t";
} else {
m_os << c;
size_t current_start = 0;
for ( size_t i = 0; i < value.size(); ++i ) {
if ( needsEscape( value[i] ) ) {
if ( current_start < i ) {
m_os << value.substr( current_start, i - current_start );
}
m_os << makeEscapeStringRef( value[i] );
current_start = i + 1;
}
}
if ( current_start < value.size() ) {
m_os << value.substr( current_start, value.size() - current_start );
}
if ( quote ) { m_os << '"'; }
}