diff --git a/src/catch2/internal/catch_textflow.cpp b/src/catch2/internal/catch_textflow.cpp index 857fd2b9..1c21d20e 100644 --- a/src/catch2/internal/catch_textflow.cpp +++ b/src/catch2/internal/catch_textflow.cpp @@ -26,117 +26,228 @@ namespace { return std::memchr( chars, c, sizeof( chars ) - 1 ) != nullptr; } - bool isBoundary( std::string const& line, size_t at ) { - assert( at > 0 ); - assert( at <= line.size() ); - - return at == line.size() || - ( isWhitespace( line[at] ) && !isWhitespace( line[at - 1] ) ) || - isBreakableBefore( line[at] ) || - isBreakableAfter( line[at - 1] ); - } - } // namespace namespace Catch { namespace TextFlow { + void AnsiSkippingString::preprocessString() { + for ( auto it = m_string.begin(); it != m_string.end(); ) { + // try to read through an ansi sequence + while ( it != m_string.end() && *it == '\033' && + it + 1 != m_string.end() && *( it + 1 ) == '[' ) { + auto cursor = it + 2; + while ( cursor != m_string.end() && + ( isdigit( *cursor ) || *cursor == ';' ) ) { + ++cursor; + } + if ( cursor == m_string.end() || *cursor != 'm' ) { + break; + } + // 'm' -> 0xff + *cursor = AnsiSkippingString::sentinel; + // if we've read an ansi sequence, set the iterator and + // return to the top of the loop + it = cursor + 1; + } + if ( it != m_string.end() ) { + ++m_size; + ++it; + } + } + } + + AnsiSkippingString::AnsiSkippingString( std::string const& text ): + m_string( text ) { + preprocessString(); + } + + AnsiSkippingString::AnsiSkippingString( std::string&& text ): + m_string( CATCH_MOVE( text ) ) { + preprocessString(); + } + + AnsiSkippingString::const_iterator AnsiSkippingString::begin() const { + return const_iterator( m_string ); + } + + AnsiSkippingString::const_iterator AnsiSkippingString::end() const { + return const_iterator( m_string, const_iterator::EndTag{} ); + } + + std::string AnsiSkippingString::substring( const_iterator begin, + const_iterator end ) const { + // There's one caveat here to an otherwise simple substring: when + // making a begin iterator we might have skipped ansi sequences at + // the start. If `begin` here is a begin iterator, skipped over + // initial ansi sequences, we'll use the true beginning of the + // string. Lastly: We need to transform any chars we replaced with + // 0xff back to 'm' + auto str = std::string( begin == this->begin() ? m_string.begin() + : begin.m_it, + end.m_it ); + std::transform( str.begin(), str.end(), str.begin(), []( char c ) { + return c == AnsiSkippingString::sentinel ? 'm' : c; + } ); + return str; + } + + void AnsiSkippingString::const_iterator::tryParseAnsiEscapes() { + // check if we've landed on an ansi sequence, and if so read through + // it + while ( m_it != m_string->end() && *m_it == '\033' && + m_it + 1 != m_string->end() && *( m_it + 1 ) == '[' ) { + auto cursor = m_it + 2; + while ( cursor != m_string->end() && + ( isdigit( *cursor ) || *cursor == ';' ) ) { + ++cursor; + } + if ( cursor == m_string->end() || + *cursor != AnsiSkippingString::sentinel ) { + break; + } + // if we've read an ansi sequence, set the iterator and + // return to the top of the loop + m_it = cursor + 1; + } + } + + void AnsiSkippingString::const_iterator::advance() { + assert( m_it != m_string->end() ); + m_it++; + tryParseAnsiEscapes(); + } + + void AnsiSkippingString::const_iterator::unadvance() { + assert( m_it != m_string->begin() ); + m_it--; + // if *m_it is 0xff, scan back to the \033 and then m_it-- once more + // (and repeat check) + while ( *m_it == AnsiSkippingString::sentinel ) { + while ( *m_it != '\033' ) { + assert( m_it != m_string->begin() ); + m_it--; + } + // if this happens, we must have been a begin iterator that had + // skipped over ansi sequences at the start of a string + assert( m_it != m_string->begin() ); + assert( *m_it == '\033' ); + m_it--; + } + } + + static bool isBoundary( AnsiSkippingString const& line, + AnsiSkippingString::const_iterator it ) { + return it == line.end() || + ( isWhitespace( *it ) && + !isWhitespace( *it.oneBefore() ) ) || + isBreakableBefore( *it ) || + isBreakableAfter( *it.oneBefore() ); + } void Column::const_iterator::calcLength() { m_addHyphen = false; m_parsedTo = m_lineStart; + AnsiSkippingString const& current_line = m_column.m_string; - std::string const& current_line = m_column.m_string; - if ( current_line[m_lineStart] == '\n' ) { - ++m_parsedTo; + if ( m_parsedTo == current_line.end() ) { + m_lineEnd = m_parsedTo; + return; } + assert( m_lineStart != current_line.end() ); + if ( *m_lineStart == '\n' ) { ++m_parsedTo; } + const auto maxLineLength = m_column.m_width - indentSize(); - const auto maxParseTo = std::min(current_line.size(), m_lineStart + maxLineLength); - while ( m_parsedTo < maxParseTo && - current_line[m_parsedTo] != '\n' ) { + std::size_t lineLength = 0; + while ( m_parsedTo != current_line.end() && + lineLength < maxLineLength && *m_parsedTo != '\n' ) { ++m_parsedTo; + ++lineLength; } // If we encountered a newline before the column is filled, // then we linebreak at the newline and consider this line // finished. - if ( m_parsedTo < m_lineStart + maxLineLength ) { - m_lineLength = m_parsedTo - m_lineStart; + if ( lineLength < maxLineLength ) { + m_lineEnd = m_parsedTo; } else { // Look for a natural linebreak boundary in the column // (We look from the end, so that the first found boundary is // the right one) - size_t newLineLength = maxLineLength; - while ( newLineLength > 0 && !isBoundary( current_line, m_lineStart + newLineLength ) ) { - --newLineLength; + m_lineEnd = m_parsedTo; + while ( lineLength > 0 && + !isBoundary( current_line, m_lineEnd ) ) { + --lineLength; + --m_lineEnd; } - while ( newLineLength > 0 && - isWhitespace( current_line[m_lineStart + newLineLength - 1] ) ) { - --newLineLength; + while ( lineLength > 0 && + isWhitespace( *m_lineEnd.oneBefore() ) ) { + --lineLength; + --m_lineEnd; } - // If we found one, then that is where we linebreak - if ( newLineLength > 0 ) { - m_lineLength = newLineLength; - } else { - // Otherwise we have to split text with a hyphen + // If we found one, then that is where we linebreak, otherwise + // we have to split text with a hyphen + if ( lineLength == 0 ) { m_addHyphen = true; - m_lineLength = maxLineLength - 1; + m_lineEnd = m_parsedTo.oneBefore(); } } } size_t Column::const_iterator::indentSize() const { - auto initial = - m_lineStart == 0 ? m_column.m_initialIndent : std::string::npos; + auto initial = m_lineStart == m_column.m_string.begin() + ? m_column.m_initialIndent + : std::string::npos; return initial == std::string::npos ? m_column.m_indent : initial; } - std::string - Column::const_iterator::addIndentAndSuffix( size_t position, - size_t length ) const { + std::string Column::const_iterator::addIndentAndSuffix( + AnsiSkippingString::const_iterator start, + AnsiSkippingString::const_iterator end ) const { std::string ret; const auto desired_indent = indentSize(); - ret.reserve( desired_indent + length + m_addHyphen ); + // ret.reserve( desired_indent + (end - start) + m_addHyphen ); ret.append( desired_indent, ' ' ); - ret.append( m_column.m_string, position, length ); - if ( m_addHyphen ) { - ret.push_back( '-' ); - } + // ret.append( start, end ); + ret += m_column.m_string.substring( start, end ); + if ( m_addHyphen ) { ret.push_back( '-' ); } return ret; } - Column::const_iterator::const_iterator( Column const& column ): m_column( column ) { + Column::const_iterator::const_iterator( Column const& column ): + m_column( column ), + m_lineStart( column.m_string.begin() ), + m_lineEnd( column.m_string.begin() ), + m_parsedTo( column.m_string.begin() ) { assert( m_column.m_width > m_column.m_indent ); assert( m_column.m_initialIndent == std::string::npos || m_column.m_width > m_column.m_initialIndent ); calcLength(); - if ( m_lineLength == 0 ) { - m_lineStart = m_column.m_string.size(); + if ( m_lineStart == m_lineEnd ) { + m_lineStart = m_column.m_string.end(); } } std::string Column::const_iterator::operator*() const { assert( m_lineStart <= m_parsedTo ); - return addIndentAndSuffix( m_lineStart, m_lineLength ); + return addIndentAndSuffix( m_lineStart, m_lineEnd ); } Column::const_iterator& Column::const_iterator::operator++() { - m_lineStart += m_lineLength; - std::string const& current_line = m_column.m_string; - if ( m_lineStart < current_line.size() && current_line[m_lineStart] == '\n' ) { - m_lineStart += 1; + m_lineStart = m_lineEnd; + AnsiSkippingString const& current_line = m_column.m_string; + if ( m_lineStart != current_line.end() && *m_lineStart == '\n' ) { + m_lineStart++; } else { - while ( m_lineStart < current_line.size() && - isWhitespace( current_line[m_lineStart] ) ) { + while ( m_lineStart != current_line.end() && + isWhitespace( *m_lineStart ) ) { ++m_lineStart; } } - if ( m_lineStart != current_line.size() ) { - calcLength(); - } + if ( m_lineStart != current_line.end() ) { calcLength(); } return *this; } @@ -233,25 +344,25 @@ namespace Catch { return os; } - Columns operator+(Column const& lhs, Column const& rhs) { + Columns operator+( Column const& lhs, Column const& rhs ) { Columns cols; cols += lhs; cols += rhs; return cols; } - Columns operator+(Column&& lhs, Column&& rhs) { + Columns operator+( Column&& lhs, Column&& rhs ) { Columns cols; cols += CATCH_MOVE( lhs ); cols += CATCH_MOVE( rhs ); return cols; } - Columns& operator+=(Columns& lhs, Column const& rhs) { + Columns& operator+=( Columns& lhs, Column const& rhs ) { lhs.m_columns.push_back( rhs ); return lhs; } - Columns& operator+=(Columns& lhs, Column&& rhs) { - lhs.m_columns.push_back( CATCH_MOVE(rhs) ); + Columns& operator+=( Columns& lhs, Column&& rhs ) { + lhs.m_columns.push_back( CATCH_MOVE( rhs ) ); return lhs; } Columns operator+( Columns const& lhs, Column const& rhs ) { diff --git a/src/catch2/internal/catch_textflow.hpp b/src/catch2/internal/catch_textflow.hpp index a78451d5..2d9d78a5 100644 --- a/src/catch2/internal/catch_textflow.hpp +++ b/src/catch2/internal/catch_textflow.hpp @@ -20,6 +20,107 @@ namespace Catch { class Columns; + /** + * Abstraction for a string with ansi escape sequences that + * automatically skips over escapes when iterating. Only graphical + * escape sequences are considered. + * + * Internal representation: + * An escape sequence looks like \033[39;49m + * We need bidirectional iteration and the unbound length of escape + * sequences poses a problem for operator-- To make this work we'll + * replace the last `m` with a 0xff (this is a codepoint that won't have + * any utf-8 meaning). + */ + class AnsiSkippingString { + std::string m_string; + std::size_t m_size = 0; + + // perform 0xff replacement and calculate m_size + void preprocessString(); + + public: + class const_iterator; + using iterator = const_iterator; + // note: must be u-suffixed or this will cause a "truncation of + // constant value" warning on MSVC + static constexpr char sentinel = static_cast( 0xffu ); + + explicit AnsiSkippingString( std::string const& text ); + explicit AnsiSkippingString( std::string&& text ); + + const_iterator begin() const; + const_iterator end() const; + + size_t size() const { return m_size; } + + std::string substring( const_iterator begin, + const_iterator end ) const; + }; + + class AnsiSkippingString::const_iterator { + friend AnsiSkippingString; + struct EndTag {}; + + const std::string* m_string; + std::string::const_iterator m_it; + + explicit const_iterator( const std::string& string, EndTag ): + m_string( &string ), m_it( string.end() ) {} + + void tryParseAnsiEscapes(); + void advance(); + void unadvance(); + + public: + using difference_type = std::ptrdiff_t; + using value_type = char; + using pointer = value_type*; + using reference = value_type&; + using iterator_category = std::bidirectional_iterator_tag; + + explicit const_iterator( const std::string& string ): + m_string( &string ), m_it( string.begin() ) { + tryParseAnsiEscapes(); + } + + char operator*() const { return *m_it; } + + const_iterator& operator++() { + advance(); + return *this; + } + const_iterator operator++( int ) { + iterator prev( *this ); + operator++(); + return prev; + } + const_iterator& operator--() { + unadvance(); + return *this; + } + const_iterator operator--( int ) { + iterator prev( *this ); + operator--(); + return prev; + } + + bool operator==( const_iterator const& other ) const { + return m_it == other.m_it; + } + bool operator!=( const_iterator const& other ) const { + return !operator==( other ); + } + bool operator<=( const_iterator const& other ) const { + return m_it <= other.m_it; + } + + const_iterator oneBefore() const { + auto it = *this; + return --it; + } + }; + /** * Represents a column of text with specific width and indentation * @@ -29,10 +130,11 @@ namespace Catch { */ class Column { // String to be written out - std::string m_string; + AnsiSkippingString m_string; // Width of the column for linebreaking size_t m_width = CATCH_CONFIG_CONSOLE_WIDTH - 1; - // Indentation of other lines (including first if initial indent is unset) + // Indentation of other lines (including first if initial indent is + // unset) size_t m_indent = 0; // Indentation of the first line size_t m_initialIndent = std::string::npos; @@ -47,16 +149,19 @@ namespace Catch { Column const& m_column; // Where does the current line start? - size_t m_lineStart = 0; + AnsiSkippingString::const_iterator m_lineStart; // How long should the current line be? - size_t m_lineLength = 0; + AnsiSkippingString::const_iterator m_lineEnd; // How far have we checked the string to iterate? - size_t m_parsedTo = 0; + AnsiSkippingString::const_iterator m_parsedTo; // Should a '-' be appended to the line? bool m_addHyphen = false; const_iterator( Column const& column, EndTag ): - m_column( column ), m_lineStart( m_column.m_string.size() ) {} + m_column( column ), + m_lineStart( m_column.m_string.end() ), + m_lineEnd( column.m_string.end() ), + m_parsedTo( column.m_string.end() ) {} // Calculates the length of the current line void calcLength(); @@ -66,8 +171,9 @@ namespace Catch { // Creates an indented and (optionally) suffixed string from // current iterator position, indentation and length. - std::string addIndentAndSuffix( size_t position, - size_t length ) const; + std::string addIndentAndSuffix( + AnsiSkippingString::const_iterator start, + AnsiSkippingString::const_iterator end ) const; public: using difference_type = std::ptrdiff_t; @@ -84,7 +190,8 @@ namespace Catch { const_iterator operator++( int ); bool operator==( const_iterator const& other ) const { - return m_lineStart == other.m_lineStart && &m_column == &other.m_column; + return m_lineStart == other.m_lineStart && + &m_column == &other.m_column; } bool operator!=( const_iterator const& other ) const { return !operator==( other ); @@ -94,7 +201,7 @@ namespace Catch { explicit Column( std::string const& text ): m_string( text ) {} explicit Column( std::string&& text ): - m_string( CATCH_MOVE(text)) {} + m_string( CATCH_MOVE( text ) ) {} Column& width( size_t newWidth ) & { assert( newWidth > 0 ); @@ -125,7 +232,9 @@ namespace Catch { size_t width() const { return m_width; } const_iterator begin() const { return const_iterator( *this ); } - const_iterator end() const { return { *this, const_iterator::EndTag{} }; } + const_iterator end() const { + return { *this, const_iterator::EndTag{} }; + } friend std::ostream& operator<<( std::ostream& os, Column const& col ); diff --git a/tests/SelfTest/IntrospectiveTests/TextFlow.tests.cpp b/tests/SelfTest/IntrospectiveTests/TextFlow.tests.cpp index 653f65ba..de03ed09 100644 --- a/tests/SelfTest/IntrospectiveTests/TextFlow.tests.cpp +++ b/tests/SelfTest/IntrospectiveTests/TextFlow.tests.cpp @@ -12,6 +12,7 @@ #include using Catch::TextFlow::Column; +using Catch::TextFlow::AnsiSkippingString; namespace { static std::string as_written(Column const& c) { @@ -198,3 +199,202 @@ TEST_CASE( "#1400 - TextFlow::Column wrapping would sometimes duplicate words", " in \n" " convallis posuere, libero nisi ultricies orci, nec lobortis."); } + +TEST_CASE( "TextFlow::AnsiSkippingString skips ansi sequences", + "[TextFlow][ansiskippingstring][approvals]" ) { + + SECTION("basic string") { + std::string text = "a\033[38;2;98;174;239mb\033[38mc\033[0md\033[me"; + AnsiSkippingString str(text); + + SECTION( "iterates forward" ) { + auto it = str.begin(); + CHECK(*it == 'a'); + ++it; + CHECK(*it == 'b'); + ++it; + CHECK(*it == 'c'); + ++it; + CHECK(*it == 'd'); + ++it; + CHECK(*it == 'e'); + ++it; + CHECK(it == str.end()); + } + SECTION( "iterates backwards" ) { + auto it = str.end(); + --it; + CHECK(*it == 'e'); + --it; + CHECK(*it == 'd'); + --it; + CHECK(*it == 'c'); + --it; + CHECK(*it == 'b'); + --it; + CHECK(*it == 'a'); + CHECK(it == str.begin()); + } + } + + SECTION( "ansi escape sequences at the start" ) { + std::string text = "\033[38;2;98;174;239ma\033[38;2;98;174;239mb\033[38mc\033[0md\033[me"; + AnsiSkippingString str(text); + auto it = str.begin(); + CHECK(*it == 'a'); + ++it; + CHECK(*it == 'b'); + ++it; + CHECK(*it == 'c'); + ++it; + CHECK(*it == 'd'); + ++it; + CHECK(*it == 'e'); + ++it; + CHECK(it == str.end()); + --it; + CHECK(*it == 'e'); + --it; + CHECK(*it == 'd'); + --it; + CHECK(*it == 'c'); + --it; + CHECK(*it == 'b'); + --it; + CHECK(*it == 'a'); + CHECK(it == str.begin()); + } + + SECTION( "ansi escape sequences at the end" ) { + std::string text = "a\033[38;2;98;174;239mb\033[38mc\033[0md\033[me\033[38;2;98;174;239m"; + AnsiSkippingString str(text); + auto it = str.begin(); + CHECK(*it == 'a'); + ++it; + CHECK(*it == 'b'); + ++it; + CHECK(*it == 'c'); + ++it; + CHECK(*it == 'd'); + ++it; + CHECK(*it == 'e'); + ++it; + CHECK(it == str.end()); + --it; + CHECK(*it == 'e'); + --it; + CHECK(*it == 'd'); + --it; + CHECK(*it == 'c'); + --it; + CHECK(*it == 'b'); + --it; + CHECK(*it == 'a'); + CHECK(it == str.begin()); + } + + SECTION( "skips consecutive escapes" ) { + std::string text = "\033[38;2;98;174;239m\033[38;2;98;174;239ma\033[38;2;98;174;239mb\033[38m\033[38m\033[38mc\033[0md\033[me"; + AnsiSkippingString str(text); + auto it = str.begin(); + CHECK(*it == 'a'); + ++it; + CHECK(*it == 'b'); + ++it; + CHECK(*it == 'c'); + ++it; + CHECK(*it == 'd'); + ++it; + CHECK(*it == 'e'); + ++it; + CHECK(it == str.end()); + --it; + CHECK(*it == 'e'); + --it; + CHECK(*it == 'd'); + --it; + CHECK(*it == 'c'); + --it; + CHECK(*it == 'b'); + --it; + CHECK(*it == 'a'); + CHECK(it == str.begin()); + } + + SECTION( "handles incomplete ansi sequences" ) { + std::string text = "a\033[b\033[30c\033[30;d\033[30;2e"; + AnsiSkippingString str(text); + CHECK(std::string(str.begin(), str.end()) == text); + } +} + +TEST_CASE( "TextFlow::AnsiSkippingString computes the size properly", + "[TextFlow][ansiskippingstring][approvals]" ) { + std::string text = "\033[38;2;98;174;239m\033[38;2;98;174;239ma\033[38;2;98;174;239mb\033[38m\033[38m\033[38mc\033[0md\033[me"; + AnsiSkippingString str(text); + CHECK(str.size() == 5); +} + +TEST_CASE( "TextFlow::AnsiSkippingString substrings properly", + "[TextFlow][ansiskippingstring][approvals]" ) { + SECTION("basic test") { + std::string text = "a\033[38;2;98;174;239mb\033[38mc\033[0md\033[me"; + AnsiSkippingString str(text); + auto a = str.begin(); + auto b = str.begin(); + ++b; + ++b; + CHECK(str.substring(a, b) == "a\033[38;2;98;174;239mb\033[38m"); + ++a; + ++b; + CHECK(str.substring(a, b) == "b\033[38mc\033[0m"); + CHECK(str.substring(a, str.end()) == "b\033[38mc\033[0md\033[me"); + CHECK(str.substring(str.begin(), str.end()) == text); + } + SECTION("escapes at the start") { + std::string text = "\033[38;2;98;174;239m\033[38;2;98;174;239ma\033[38;2;98;174;239mb\033[38m\033[38m\033[38mc\033[0md\033[me"; + AnsiSkippingString str(text); + auto a = str.begin(); + auto b = str.begin(); + ++b; + ++b; + CHECK(str.substring(a, b) == "\033[38;2;98;174;239m\033[38;2;98;174;239ma\033[38;2;98;174;239mb\033[38m\033[38m\033[38m"); + ++a; + ++b; + CHECK(str.substring(a, b) == "b\033[38m\033[38m\033[38mc\033[0m"); + CHECK(str.substring(a, str.end()) == "b\033[38m\033[38m\033[38mc\033[0md\033[me"); + CHECK(str.substring(str.begin(), str.end()) == text); + } + SECTION("escapes at the end") { + std::string text = "a\033[38;2;98;174;239mb\033[38mc\033[0md\033[me\033[38m"; + AnsiSkippingString str(text); + auto a = str.begin(); + auto b = str.begin(); + ++b; + ++b; + CHECK(str.substring(a, b) == "a\033[38;2;98;174;239mb\033[38m"); + ++a; + ++b; + CHECK(str.substring(a, b) == "b\033[38mc\033[0m"); + CHECK(str.substring(a, str.end()) == "b\033[38mc\033[0md\033[me\033[38m"); + CHECK(str.substring(str.begin(), str.end()) == text); + } +} + +TEST_CASE( "TextFlow::Column skips ansi escape sequences", + "[TextFlow][column][approvals]" ) { + std::string text = "\033[38;2;98;174;239m\033[38;2;198;120;221mThe quick brown \033[38;2;198;120;221mfox jumped over the lazy dog\033[0m"; + Column col(text); + + SECTION( "width=20" ) { + col.width( 20 ); + REQUIRE( as_written( col ) == "\033[38;2;98;174;239m\033[38;2;198;120;221mThe quick brown \033[38;2;198;120;221mfox\n" + "jumped over the lazy\n" + "dog\033[0m" ); + } + + SECTION( "width=80" ) { + col.width( 80 ); + REQUIRE( as_written( col ) == text ); + } +}