Handle ANSI escape sequences when performing column wrapping (#2849)

This PR adds functionality to skip around ANSI escape sequences in catch_textflow so they do not contribute to line length and line wrapping code does not split escape sequences in the middle. I've implemented this by creating a AnsiSkippingString abstraction that has a bidirectional iterator that can skip around escape sequences while iterating. Additionally I refactored Column::const_iterator to be iterator-based rather than index-based so this abstraction is a simple drop-in for std::string.

Currently only color sequences are handled, other escape sequences are left unaffected.

Motivation: Text with ANSI color sequences gets messed up when being output by Catch2 #2833.
This commit is contained in:
Jeremy Rifkin 2024-05-04 16:43:52 -05:00 committed by GitHub
parent fa5a53df17
commit 8ce2426e53
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 489 additions and 69 deletions

View File

@ -26,117 +26,228 @@ namespace {
return std::memchr( chars, c, sizeof( chars ) - 1 ) != nullptr;
}
bool isBoundary( std::string const& line, size_t at ) {
assert( at > 0 );
assert( at <= line.size() );
return at == line.size() ||
( isWhitespace( line[at] ) && !isWhitespace( line[at - 1] ) ) ||
isBreakableBefore( line[at] ) ||
isBreakableAfter( line[at - 1] );
}
} // namespace
namespace Catch {
namespace TextFlow {
void AnsiSkippingString::preprocessString() {
for ( auto it = m_string.begin(); it != m_string.end(); ) {
// try to read through an ansi sequence
while ( it != m_string.end() && *it == '\033' &&
it + 1 != m_string.end() && *( it + 1 ) == '[' ) {
auto cursor = it + 2;
while ( cursor != m_string.end() &&
( isdigit( *cursor ) || *cursor == ';' ) ) {
++cursor;
}
if ( cursor == m_string.end() || *cursor != 'm' ) {
break;
}
// 'm' -> 0xff
*cursor = AnsiSkippingString::sentinel;
// if we've read an ansi sequence, set the iterator and
// return to the top of the loop
it = cursor + 1;
}
if ( it != m_string.end() ) {
++m_size;
++it;
}
}
}
AnsiSkippingString::AnsiSkippingString( std::string const& text ):
m_string( text ) {
preprocessString();
}
AnsiSkippingString::AnsiSkippingString( std::string&& text ):
m_string( CATCH_MOVE( text ) ) {
preprocessString();
}
AnsiSkippingString::const_iterator AnsiSkippingString::begin() const {
return const_iterator( m_string );
}
AnsiSkippingString::const_iterator AnsiSkippingString::end() const {
return const_iterator( m_string, const_iterator::EndTag{} );
}
std::string AnsiSkippingString::substring( const_iterator begin,
const_iterator end ) const {
// There's one caveat here to an otherwise simple substring: when
// making a begin iterator we might have skipped ansi sequences at
// the start. If `begin` here is a begin iterator, skipped over
// initial ansi sequences, we'll use the true beginning of the
// string. Lastly: We need to transform any chars we replaced with
// 0xff back to 'm'
auto str = std::string( begin == this->begin() ? m_string.begin()
: begin.m_it,
end.m_it );
std::transform( str.begin(), str.end(), str.begin(), []( char c ) {
return c == AnsiSkippingString::sentinel ? 'm' : c;
} );
return str;
}
void AnsiSkippingString::const_iterator::tryParseAnsiEscapes() {
// check if we've landed on an ansi sequence, and if so read through
// it
while ( m_it != m_string->end() && *m_it == '\033' &&
m_it + 1 != m_string->end() && *( m_it + 1 ) == '[' ) {
auto cursor = m_it + 2;
while ( cursor != m_string->end() &&
( isdigit( *cursor ) || *cursor == ';' ) ) {
++cursor;
}
if ( cursor == m_string->end() ||
*cursor != AnsiSkippingString::sentinel ) {
break;
}
// if we've read an ansi sequence, set the iterator and
// return to the top of the loop
m_it = cursor + 1;
}
}
void AnsiSkippingString::const_iterator::advance() {
assert( m_it != m_string->end() );
m_it++;
tryParseAnsiEscapes();
}
void AnsiSkippingString::const_iterator::unadvance() {
assert( m_it != m_string->begin() );
m_it--;
// if *m_it is 0xff, scan back to the \033 and then m_it-- once more
// (and repeat check)
while ( *m_it == AnsiSkippingString::sentinel ) {
while ( *m_it != '\033' ) {
assert( m_it != m_string->begin() );
m_it--;
}
// if this happens, we must have been a begin iterator that had
// skipped over ansi sequences at the start of a string
assert( m_it != m_string->begin() );
assert( *m_it == '\033' );
m_it--;
}
}
static bool isBoundary( AnsiSkippingString const& line,
AnsiSkippingString::const_iterator it ) {
return it == line.end() ||
( isWhitespace( *it ) &&
!isWhitespace( *it.oneBefore() ) ) ||
isBreakableBefore( *it ) ||
isBreakableAfter( *it.oneBefore() );
}
void Column::const_iterator::calcLength() {
m_addHyphen = false;
m_parsedTo = m_lineStart;
AnsiSkippingString const& current_line = m_column.m_string;
std::string const& current_line = m_column.m_string;
if ( current_line[m_lineStart] == '\n' ) {
++m_parsedTo;
if ( m_parsedTo == current_line.end() ) {
m_lineEnd = m_parsedTo;
return;
}
assert( m_lineStart != current_line.end() );
if ( *m_lineStart == '\n' ) { ++m_parsedTo; }
const auto maxLineLength = m_column.m_width - indentSize();
const auto maxParseTo = std::min(current_line.size(), m_lineStart + maxLineLength);
while ( m_parsedTo < maxParseTo &&
current_line[m_parsedTo] != '\n' ) {
std::size_t lineLength = 0;
while ( m_parsedTo != current_line.end() &&
lineLength < maxLineLength && *m_parsedTo != '\n' ) {
++m_parsedTo;
++lineLength;
}
// If we encountered a newline before the column is filled,
// then we linebreak at the newline and consider this line
// finished.
if ( m_parsedTo < m_lineStart + maxLineLength ) {
m_lineLength = m_parsedTo - m_lineStart;
if ( lineLength < maxLineLength ) {
m_lineEnd = m_parsedTo;
} else {
// Look for a natural linebreak boundary in the column
// (We look from the end, so that the first found boundary is
// the right one)
size_t newLineLength = maxLineLength;
while ( newLineLength > 0 && !isBoundary( current_line, m_lineStart + newLineLength ) ) {
--newLineLength;
m_lineEnd = m_parsedTo;
while ( lineLength > 0 &&
!isBoundary( current_line, m_lineEnd ) ) {
--lineLength;
--m_lineEnd;
}
while ( newLineLength > 0 &&
isWhitespace( current_line[m_lineStart + newLineLength - 1] ) ) {
--newLineLength;
while ( lineLength > 0 &&
isWhitespace( *m_lineEnd.oneBefore() ) ) {
--lineLength;
--m_lineEnd;
}
// If we found one, then that is where we linebreak
if ( newLineLength > 0 ) {
m_lineLength = newLineLength;
} else {
// Otherwise we have to split text with a hyphen
// If we found one, then that is where we linebreak, otherwise
// we have to split text with a hyphen
if ( lineLength == 0 ) {
m_addHyphen = true;
m_lineLength = maxLineLength - 1;
m_lineEnd = m_parsedTo.oneBefore();
}
}
}
size_t Column::const_iterator::indentSize() const {
auto initial =
m_lineStart == 0 ? m_column.m_initialIndent : std::string::npos;
auto initial = m_lineStart == m_column.m_string.begin()
? m_column.m_initialIndent
: std::string::npos;
return initial == std::string::npos ? m_column.m_indent : initial;
}
std::string
Column::const_iterator::addIndentAndSuffix( size_t position,
size_t length ) const {
std::string Column::const_iterator::addIndentAndSuffix(
AnsiSkippingString::const_iterator start,
AnsiSkippingString::const_iterator end ) const {
std::string ret;
const auto desired_indent = indentSize();
ret.reserve( desired_indent + length + m_addHyphen );
// ret.reserve( desired_indent + (end - start) + m_addHyphen );
ret.append( desired_indent, ' ' );
ret.append( m_column.m_string, position, length );
if ( m_addHyphen ) {
ret.push_back( '-' );
}
// ret.append( start, end );
ret += m_column.m_string.substring( start, end );
if ( m_addHyphen ) { ret.push_back( '-' ); }
return ret;
}
Column::const_iterator::const_iterator( Column const& column ): m_column( column ) {
Column::const_iterator::const_iterator( Column const& column ):
m_column( column ),
m_lineStart( column.m_string.begin() ),
m_lineEnd( column.m_string.begin() ),
m_parsedTo( column.m_string.begin() ) {
assert( m_column.m_width > m_column.m_indent );
assert( m_column.m_initialIndent == std::string::npos ||
m_column.m_width > m_column.m_initialIndent );
calcLength();
if ( m_lineLength == 0 ) {
m_lineStart = m_column.m_string.size();
if ( m_lineStart == m_lineEnd ) {
m_lineStart = m_column.m_string.end();
}
}
std::string Column::const_iterator::operator*() const {
assert( m_lineStart <= m_parsedTo );
return addIndentAndSuffix( m_lineStart, m_lineLength );
return addIndentAndSuffix( m_lineStart, m_lineEnd );
}
Column::const_iterator& Column::const_iterator::operator++() {
m_lineStart += m_lineLength;
std::string const& current_line = m_column.m_string;
if ( m_lineStart < current_line.size() && current_line[m_lineStart] == '\n' ) {
m_lineStart += 1;
m_lineStart = m_lineEnd;
AnsiSkippingString const& current_line = m_column.m_string;
if ( m_lineStart != current_line.end() && *m_lineStart == '\n' ) {
m_lineStart++;
} else {
while ( m_lineStart < current_line.size() &&
isWhitespace( current_line[m_lineStart] ) ) {
while ( m_lineStart != current_line.end() &&
isWhitespace( *m_lineStart ) ) {
++m_lineStart;
}
}
if ( m_lineStart != current_line.size() ) {
calcLength();
}
if ( m_lineStart != current_line.end() ) { calcLength(); }
return *this;
}

View File

@ -20,6 +20,107 @@ namespace Catch {
class Columns;
/**
* Abstraction for a string with ansi escape sequences that
* automatically skips over escapes when iterating. Only graphical
* escape sequences are considered.
*
* Internal representation:
* An escape sequence looks like \033[39;49m
* We need bidirectional iteration and the unbound length of escape
* sequences poses a problem for operator-- To make this work we'll
* replace the last `m` with a 0xff (this is a codepoint that won't have
* any utf-8 meaning).
*/
class AnsiSkippingString {
std::string m_string;
std::size_t m_size = 0;
// perform 0xff replacement and calculate m_size
void preprocessString();
public:
class const_iterator;
using iterator = const_iterator;
// note: must be u-suffixed or this will cause a "truncation of
// constant value" warning on MSVC
static constexpr char sentinel = static_cast<char>( 0xffu );
explicit AnsiSkippingString( std::string const& text );
explicit AnsiSkippingString( std::string&& text );
const_iterator begin() const;
const_iterator end() const;
size_t size() const { return m_size; }
std::string substring( const_iterator begin,
const_iterator end ) const;
};
class AnsiSkippingString::const_iterator {
friend AnsiSkippingString;
struct EndTag {};
const std::string* m_string;
std::string::const_iterator m_it;
explicit const_iterator( const std::string& string, EndTag ):
m_string( &string ), m_it( string.end() ) {}
void tryParseAnsiEscapes();
void advance();
void unadvance();
public:
using difference_type = std::ptrdiff_t;
using value_type = char;
using pointer = value_type*;
using reference = value_type&;
using iterator_category = std::bidirectional_iterator_tag;
explicit const_iterator( const std::string& string ):
m_string( &string ), m_it( string.begin() ) {
tryParseAnsiEscapes();
}
char operator*() const { return *m_it; }
const_iterator& operator++() {
advance();
return *this;
}
const_iterator operator++( int ) {
iterator prev( *this );
operator++();
return prev;
}
const_iterator& operator--() {
unadvance();
return *this;
}
const_iterator operator--( int ) {
iterator prev( *this );
operator--();
return prev;
}
bool operator==( const_iterator const& other ) const {
return m_it == other.m_it;
}
bool operator!=( const_iterator const& other ) const {
return !operator==( other );
}
bool operator<=( const_iterator const& other ) const {
return m_it <= other.m_it;
}
const_iterator oneBefore() const {
auto it = *this;
return --it;
}
};
/**
* Represents a column of text with specific width and indentation
*
@ -29,10 +130,11 @@ namespace Catch {
*/
class Column {
// String to be written out
std::string m_string;
AnsiSkippingString m_string;
// Width of the column for linebreaking
size_t m_width = CATCH_CONFIG_CONSOLE_WIDTH - 1;
// Indentation of other lines (including first if initial indent is unset)
// Indentation of other lines (including first if initial indent is
// unset)
size_t m_indent = 0;
// Indentation of the first line
size_t m_initialIndent = std::string::npos;
@ -47,16 +149,19 @@ namespace Catch {
Column const& m_column;
// Where does the current line start?
size_t m_lineStart = 0;
AnsiSkippingString::const_iterator m_lineStart;
// How long should the current line be?
size_t m_lineLength = 0;
AnsiSkippingString::const_iterator m_lineEnd;
// How far have we checked the string to iterate?
size_t m_parsedTo = 0;
AnsiSkippingString::const_iterator m_parsedTo;
// Should a '-' be appended to the line?
bool m_addHyphen = false;
const_iterator( Column const& column, EndTag ):
m_column( column ), m_lineStart( m_column.m_string.size() ) {}
m_column( column ),
m_lineStart( m_column.m_string.end() ),
m_lineEnd( column.m_string.end() ),
m_parsedTo( column.m_string.end() ) {}
// Calculates the length of the current line
void calcLength();
@ -66,8 +171,9 @@ namespace Catch {
// Creates an indented and (optionally) suffixed string from
// current iterator position, indentation and length.
std::string addIndentAndSuffix( size_t position,
size_t length ) const;
std::string addIndentAndSuffix(
AnsiSkippingString::const_iterator start,
AnsiSkippingString::const_iterator end ) const;
public:
using difference_type = std::ptrdiff_t;
@ -84,7 +190,8 @@ namespace Catch {
const_iterator operator++( int );
bool operator==( const_iterator const& other ) const {
return m_lineStart == other.m_lineStart && &m_column == &other.m_column;
return m_lineStart == other.m_lineStart &&
&m_column == &other.m_column;
}
bool operator!=( const_iterator const& other ) const {
return !operator==( other );
@ -125,7 +232,9 @@ namespace Catch {
size_t width() const { return m_width; }
const_iterator begin() const { return const_iterator( *this ); }
const_iterator end() const { return { *this, const_iterator::EndTag{} }; }
const_iterator end() const {
return { *this, const_iterator::EndTag{} };
}
friend std::ostream& operator<<( std::ostream& os,
Column const& col );

View File

@ -12,6 +12,7 @@
#include <sstream>
using Catch::TextFlow::Column;
using Catch::TextFlow::AnsiSkippingString;
namespace {
static std::string as_written(Column const& c) {
@ -198,3 +199,202 @@ TEST_CASE( "#1400 - TextFlow::Column wrapping would sometimes duplicate words",
" in \n"
" convallis posuere, libero nisi ultricies orci, nec lobortis.");
}
TEST_CASE( "TextFlow::AnsiSkippingString skips ansi sequences",
"[TextFlow][ansiskippingstring][approvals]" ) {
SECTION("basic string") {
std::string text = "a\033[38;2;98;174;239mb\033[38mc\033[0md\033[me";
AnsiSkippingString str(text);
SECTION( "iterates forward" ) {
auto it = str.begin();
CHECK(*it == 'a');
++it;
CHECK(*it == 'b');
++it;
CHECK(*it == 'c');
++it;
CHECK(*it == 'd');
++it;
CHECK(*it == 'e');
++it;
CHECK(it == str.end());
}
SECTION( "iterates backwards" ) {
auto it = str.end();
--it;
CHECK(*it == 'e');
--it;
CHECK(*it == 'd');
--it;
CHECK(*it == 'c');
--it;
CHECK(*it == 'b');
--it;
CHECK(*it == 'a');
CHECK(it == str.begin());
}
}
SECTION( "ansi escape sequences at the start" ) {
std::string text = "\033[38;2;98;174;239ma\033[38;2;98;174;239mb\033[38mc\033[0md\033[me";
AnsiSkippingString str(text);
auto it = str.begin();
CHECK(*it == 'a');
++it;
CHECK(*it == 'b');
++it;
CHECK(*it == 'c');
++it;
CHECK(*it == 'd');
++it;
CHECK(*it == 'e');
++it;
CHECK(it == str.end());
--it;
CHECK(*it == 'e');
--it;
CHECK(*it == 'd');
--it;
CHECK(*it == 'c');
--it;
CHECK(*it == 'b');
--it;
CHECK(*it == 'a');
CHECK(it == str.begin());
}
SECTION( "ansi escape sequences at the end" ) {
std::string text = "a\033[38;2;98;174;239mb\033[38mc\033[0md\033[me\033[38;2;98;174;239m";
AnsiSkippingString str(text);
auto it = str.begin();
CHECK(*it == 'a');
++it;
CHECK(*it == 'b');
++it;
CHECK(*it == 'c');
++it;
CHECK(*it == 'd');
++it;
CHECK(*it == 'e');
++it;
CHECK(it == str.end());
--it;
CHECK(*it == 'e');
--it;
CHECK(*it == 'd');
--it;
CHECK(*it == 'c');
--it;
CHECK(*it == 'b');
--it;
CHECK(*it == 'a');
CHECK(it == str.begin());
}
SECTION( "skips consecutive escapes" ) {
std::string text = "\033[38;2;98;174;239m\033[38;2;98;174;239ma\033[38;2;98;174;239mb\033[38m\033[38m\033[38mc\033[0md\033[me";
AnsiSkippingString str(text);
auto it = str.begin();
CHECK(*it == 'a');
++it;
CHECK(*it == 'b');
++it;
CHECK(*it == 'c');
++it;
CHECK(*it == 'd');
++it;
CHECK(*it == 'e');
++it;
CHECK(it == str.end());
--it;
CHECK(*it == 'e');
--it;
CHECK(*it == 'd');
--it;
CHECK(*it == 'c');
--it;
CHECK(*it == 'b');
--it;
CHECK(*it == 'a');
CHECK(it == str.begin());
}
SECTION( "handles incomplete ansi sequences" ) {
std::string text = "a\033[b\033[30c\033[30;d\033[30;2e";
AnsiSkippingString str(text);
CHECK(std::string(str.begin(), str.end()) == text);
}
}
TEST_CASE( "TextFlow::AnsiSkippingString computes the size properly",
"[TextFlow][ansiskippingstring][approvals]" ) {
std::string text = "\033[38;2;98;174;239m\033[38;2;98;174;239ma\033[38;2;98;174;239mb\033[38m\033[38m\033[38mc\033[0md\033[me";
AnsiSkippingString str(text);
CHECK(str.size() == 5);
}
TEST_CASE( "TextFlow::AnsiSkippingString substrings properly",
"[TextFlow][ansiskippingstring][approvals]" ) {
SECTION("basic test") {
std::string text = "a\033[38;2;98;174;239mb\033[38mc\033[0md\033[me";
AnsiSkippingString str(text);
auto a = str.begin();
auto b = str.begin();
++b;
++b;
CHECK(str.substring(a, b) == "a\033[38;2;98;174;239mb\033[38m");
++a;
++b;
CHECK(str.substring(a, b) == "b\033[38mc\033[0m");
CHECK(str.substring(a, str.end()) == "b\033[38mc\033[0md\033[me");
CHECK(str.substring(str.begin(), str.end()) == text);
}
SECTION("escapes at the start") {
std::string text = "\033[38;2;98;174;239m\033[38;2;98;174;239ma\033[38;2;98;174;239mb\033[38m\033[38m\033[38mc\033[0md\033[me";
AnsiSkippingString str(text);
auto a = str.begin();
auto b = str.begin();
++b;
++b;
CHECK(str.substring(a, b) == "\033[38;2;98;174;239m\033[38;2;98;174;239ma\033[38;2;98;174;239mb\033[38m\033[38m\033[38m");
++a;
++b;
CHECK(str.substring(a, b) == "b\033[38m\033[38m\033[38mc\033[0m");
CHECK(str.substring(a, str.end()) == "b\033[38m\033[38m\033[38mc\033[0md\033[me");
CHECK(str.substring(str.begin(), str.end()) == text);
}
SECTION("escapes at the end") {
std::string text = "a\033[38;2;98;174;239mb\033[38mc\033[0md\033[me\033[38m";
AnsiSkippingString str(text);
auto a = str.begin();
auto b = str.begin();
++b;
++b;
CHECK(str.substring(a, b) == "a\033[38;2;98;174;239mb\033[38m");
++a;
++b;
CHECK(str.substring(a, b) == "b\033[38mc\033[0m");
CHECK(str.substring(a, str.end()) == "b\033[38mc\033[0md\033[me\033[38m");
CHECK(str.substring(str.begin(), str.end()) == text);
}
}
TEST_CASE( "TextFlow::Column skips ansi escape sequences",
"[TextFlow][column][approvals]" ) {
std::string text = "\033[38;2;98;174;239m\033[38;2;198;120;221mThe quick brown \033[38;2;198;120;221mfox jumped over the lazy dog\033[0m";
Column col(text);
SECTION( "width=20" ) {
col.width( 20 );
REQUIRE( as_written( col ) == "\033[38;2;98;174;239m\033[38;2;198;120;221mThe quick brown \033[38;2;198;120;221mfox\n"
"jumped over the lazy\n"
"dog\033[0m" );
}
SECTION( "width=80" ) {
col.width( 80 );
REQUIRE( as_written( col ) == text );
}
}