Allow test sharding for e.g. Bazel test sharding feature

This greatly simplifies running Catch2 tests in single binary in parallel from external test runners. Instead of having to shard the tests by tags/test names, an external test runner can now just ask for test shard 2 (out of X), and execute that in single process, without having to know what tests are actually in the shard. Note that sharding also applies to test listing, and happens after tests were ordered according to the `--order` feature.
2025-12-16 07:02:12 +01:00 · 2021-07-11 12:46:05 -07:00
parent 6456ee8b01
commit 3087e19cc7
21 changed files with 415 additions and 6 deletions
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -87,6 +87,7 @@ set(TEST_SOURCES
        ${SELF_TEST_DIR}/IntrospectiveTests/RandomNumberGeneration.tests.cpp
        ${SELF_TEST_DIR}/IntrospectiveTests/Reporters.tests.cpp
        ${SELF_TEST_DIR}/IntrospectiveTests/Tag.tests.cpp
+        ${SELF_TEST_DIR}/IntrospectiveTests/Sharding.tests.cpp
        ${SELF_TEST_DIR}/IntrospectiveTests/String.tests.cpp
        ${SELF_TEST_DIR}/IntrospectiveTests/StringManip.tests.cpp
        ${SELF_TEST_DIR}/IntrospectiveTests/Xml.tests.cpp
@@ -310,6 +311,7 @@ set_tests_properties(TagAlias PROPERTIES
 add_test(NAME RandomTestOrdering COMMAND ${PYTHON_EXECUTABLE}
  ${CATCH_DIR}/tests/TestScripts/testRandomOrder.py $<TARGET_FILE:SelfTest>)

+
 add_test(NAME CheckConvenienceHeaders
  COMMAND
    ${PYTHON_EXECUTABLE} ${CATCH_DIR}/tools/scripts/checkConvenienceHeaders.py
--- a/tests/ExtraTests/CMakeLists.txt
+++ b/tests/ExtraTests/CMakeLists.txt
@@ -8,6 +8,22 @@ project( Catch2ExtraTests LANGUAGES CXX )

 message( STATUS "Extra tests included" )

+
+add_test(
+  NAME TestShardingIntegration
+  COMMAND ${PYTHON_EXECUTABLE} ${CATCH_DIR}/tests/TestScripts/testSharding.py $<TARGET_FILE:SelfTest>
+)
+
+add_test(
+  NAME TestSharding::OverlyLargeShardIndex
+  COMMAND $<TARGET_FILE:SelfTest>  --shard-index 5 --shard-count 5
+)
+set_tests_properties(
+    TestSharding::OverlyLargeShardIndex
+  PROPERTIES
+    PASS_REGULAR_EXPRESSION "The shard count \\(5\\) must be greater than the shard index \\(5\\)"
+)
+
 # The MinDuration reporting tests do not need separate compilation, but
 # they have non-trivial execution time, so they are categorized as
 # extra tests, so that they are run less.
--- a/tests/SelfTest/Baselines/compact.sw.approved.txt
+++ b/tests/SelfTest/Baselines/compact.sw.approved.txt
@@ -1273,6 +1273,12 @@ CmdLine.tests.cpp:<line number>: passed: cli.parse({ "test", "--benchmark-no-ana
 CmdLine.tests.cpp:<line number>: passed: config.benchmarkNoAnalysis for: true
 CmdLine.tests.cpp:<line number>: passed: cli.parse({ "test", "--benchmark-warmup-time=10" }) for: {?}
 CmdLine.tests.cpp:<line number>: passed: config.benchmarkWarmupTime == 10 for: 10 == 10
+CmdLine.tests.cpp:<line number>: passed: cli.parse({ "test", "--shard-count=8"}) for: {?}
+CmdLine.tests.cpp:<line number>: passed: config.shardCount == 8 for: 8 == 8
+CmdLine.tests.cpp:<line number>: passed: cli.parse({ "test", "--shard-index=2"}) for: {?}
+CmdLine.tests.cpp:<line number>: passed: config.shardIndex == 2 for: 2 == 2
+CmdLine.tests.cpp:<line number>: passed: !result for: true
+CmdLine.tests.cpp:<line number>: passed: result.errorMessage(), ContainsSubstring( "The shard count must be greater than 0" ) for: "The shard count must be greater than 0" contains: "The shard count must be greater than 0"
 Misc.tests.cpp:<line number>: passed: std::tuple_size<TestType>::value >= 1 for: 3 >= 1
 Misc.tests.cpp:<line number>: passed: std::tuple_size<TestType>::value >= 1 for: 2 >= 1
 Misc.tests.cpp:<line number>: passed: std::tuple_size<TestType>::value >= 1 for: 1 >= 1
--- a/tests/SelfTest/Baselines/console.std.approved.txt
+++ b/tests/SelfTest/Baselines/console.std.approved.txt
@@ -1427,5 +1427,5 @@ due to unexpected exception with message:

 ===============================================================================
 test cases:  373 |  296 passed |  70 failed |  7 failed as expected
-assertions: 2115 | 1959 passed | 129 failed | 27 failed as expected
+assertions: 2121 | 1965 passed | 129 failed | 27 failed as expected

--- a/tests/SelfTest/Baselines/console.sw.approved.txt
+++ b/tests/SelfTest/Baselines/console.sw.approved.txt
@@ -9390,6 +9390,61 @@ CmdLine.tests.cpp:<line number>: PASSED:
 with expansion:
  10 == 10

+-------------------------------------------------------------------------------
+Process can be configured on command line
+  Sharding options
+  shard-count
+-------------------------------------------------------------------------------
+CmdLine.tests.cpp:<line number>
+...............................................................................
+
+CmdLine.tests.cpp:<line number>: PASSED:
+  CHECK( cli.parse({ "test", "--shard-count=8"}) )
+with expansion:
+  {?}
+
+CmdLine.tests.cpp:<line number>: PASSED:
+  REQUIRE( config.shardCount == 8 )
+with expansion:
+  8 == 8
+
+-------------------------------------------------------------------------------
+Process can be configured on command line
+  Sharding options
+  shard-index
+-------------------------------------------------------------------------------
+CmdLine.tests.cpp:<line number>
+...............................................................................
+
+CmdLine.tests.cpp:<line number>: PASSED:
+  CHECK( cli.parse({ "test", "--shard-index=2"}) )
+with expansion:
+  {?}
+
+CmdLine.tests.cpp:<line number>: PASSED:
+  REQUIRE( config.shardIndex == 2 )
+with expansion:
+  2 == 2
+
+-------------------------------------------------------------------------------
+Process can be configured on command line
+  Sharding options
+  Zero shard-count
+-------------------------------------------------------------------------------
+CmdLine.tests.cpp:<line number>
+...............................................................................
+
+CmdLine.tests.cpp:<line number>: PASSED:
+  CHECK( !result )
+with expansion:
+  true
+
+CmdLine.tests.cpp:<line number>: PASSED:
+  CHECK_THAT( result.errorMessage(), ContainsSubstring( "The shard count must be greater than 0" ) )
+with expansion:
+  "The shard count must be greater than 0" contains: "The shard count must be
+  greater than 0"
+
 -------------------------------------------------------------------------------
 Product with differing arities - std::tuple<int, double, float>
 -------------------------------------------------------------------------------
@@ -17040,5 +17095,5 @@ Misc.tests.cpp:<line number>: PASSED:

 ===============================================================================
 test cases:  373 |  280 passed |  86 failed |  7 failed as expected
-assertions: 2132 | 1959 passed | 146 failed | 27 failed as expected
+assertions: 2138 | 1965 passed | 146 failed | 27 failed as expected

--- a/tests/SelfTest/Baselines/junit.sw.approved.txt
+++ b/tests/SelfTest/Baselines/junit.sw.approved.txt
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <testsuitesloose text artifact
 >
-  <testsuite name="<exe-name>" errors="17" failures="129" tests="2132" hostname="tbd" time="{duration}" timestamp="{iso8601-timestamp}">
+  <testsuite name="<exe-name>" errors="17" failures="129" tests="2138" hostname="tbd" time="{duration}" timestamp="{iso8601-timestamp}">
    <properties>
      <property name="random-seed" value="1"/>
      <property name="filters" value="~[!nonportable]~[!benchmark]~[approvals] *"/>
@@ -1096,6 +1096,9 @@ Message.tests.cpp:<line number>
    <testcase classname="<exe-name>.global" name="Process can be configured on command line/Benchmark options/confidence-interval" time="{duration}" status="run"/>
    <testcase classname="<exe-name>.global" name="Process can be configured on command line/Benchmark options/no-analysis" time="{duration}" status="run"/>
    <testcase classname="<exe-name>.global" name="Process can be configured on command line/Benchmark options/warmup-time" time="{duration}" status="run"/>
+    <testcase classname="<exe-name>.global" name="Process can be configured on command line/Sharding options/shard-count" time="{duration}" status="run"/>
+    <testcase classname="<exe-name>.global" name="Process can be configured on command line/Sharding options/shard-index" time="{duration}" status="run"/>
+    <testcase classname="<exe-name>.global" name="Process can be configured on command line/Sharding options/Zero shard-count" time="{duration}" status="run"/>
    <testcase classname="<exe-name>.global" name="Product with differing arities - std::tuple&lt;int, double, float>" time="{duration}" status="run"/>
    <testcase classname="<exe-name>.global" name="Product with differing arities - std::tuple&lt;int, double>" time="{duration}" status="run"/>
    <testcase classname="<exe-name>.global" name="Product with differing arities - std::tuple&lt;int>" time="{duration}" status="run"/>
--- a/tests/SelfTest/Baselines/sonarqube.sw.approved.txt
+++ b/tests/SelfTest/Baselines/sonarqube.sw.approved.txt
@@ -75,6 +75,9 @@
    <testCase name="Process can be configured on command line/Benchmark options/confidence-interval" duration="{duration}"/>
    <testCase name="Process can be configured on command line/Benchmark options/no-analysis" duration="{duration}"/>
    <testCase name="Process can be configured on command line/Benchmark options/warmup-time" duration="{duration}"/>
+    <testCase name="Process can be configured on command line/Sharding options/shard-count" duration="{duration}"/>
+    <testCase name="Process can be configured on command line/Sharding options/shard-index" duration="{duration}"/>
+    <testCase name="Process can be configured on command line/Sharding options/Zero shard-count" duration="{duration}"/>
    <testCase name="Test with special, characters &quot;in name" duration="{duration}"/>
  </file>
  <file path="tests/<exe-name>/IntrospectiveTests/FloatingPoint.tests.cpp">
--- a/tests/SelfTest/Baselines/tap.sw.approved.txt
+++ b/tests/SelfTest/Baselines/tap.sw.approved.txt
@@ -2468,6 +2468,18 @@ ok {test-number} - config.benchmarkNoAnalysis for: true
 ok {test-number} - cli.parse({ "test", "--benchmark-warmup-time=10" }) for: {?}
 # Process can be configured on command line
 ok {test-number} - config.benchmarkWarmupTime == 10 for: 10 == 10
+# Process can be configured on command line
+ok {test-number} - cli.parse({ "test", "--shard-count=8"}) for: {?}
+# Process can be configured on command line
+ok {test-number} - config.shardCount == 8 for: 8 == 8
+# Process can be configured on command line
+ok {test-number} - cli.parse({ "test", "--shard-index=2"}) for: {?}
+# Process can be configured on command line
+ok {test-number} - config.shardIndex == 2 for: 2 == 2
+# Process can be configured on command line
+ok {test-number} - !result for: true
+# Process can be configured on command line
+ok {test-number} - result.errorMessage(), ContainsSubstring( "The shard count must be greater than 0" ) for: "The shard count must be greater than 0" contains: "The shard count must be greater than 0"
 # Product with differing arities - std::tuple<int, double, float>
 ok {test-number} - std::tuple_size<TestType>::value >= 1 for: 3 >= 1
 # Product with differing arities - std::tuple<int, double>
@@ -4266,5 +4278,5 @@ ok {test-number} - q3 == 23. for: 23.0 == 23.0
 ok {test-number} -
 # xmlentitycheck
 ok {test-number} -
-1..2132
+1..2138

--- a/tests/SelfTest/Baselines/xml.sw.approved.txt
+++ b/tests/SelfTest/Baselines/xml.sw.approved.txt
@@ -11469,6 +11469,72 @@ Nor would this
      </Section>
      <OverallResults successes="2" failures="0" expectedFailures="0"/>
    </Section>
+    <Section name="Sharding options" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
+      <Section name="shard-count" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
+        <Expression success="true" type="CHECK" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
+          <Original>
+            cli.parse({ "test", "--shard-count=8"})
+          </Original>
+          <Expanded>
+            {?}
+          </Expanded>
+        </Expression>
+        <Expression success="true" type="REQUIRE" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
+          <Original>
+            config.shardCount == 8
+          </Original>
+          <Expanded>
+            8 == 8
+          </Expanded>
+        </Expression>
+        <OverallResults successes="2" failures="0" expectedFailures="0"/>
+      </Section>
+      <OverallResults successes="2" failures="0" expectedFailures="0"/>
+    </Section>
+    <Section name="Sharding options" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
+      <Section name="shard-index" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
+        <Expression success="true" type="CHECK" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
+          <Original>
+            cli.parse({ "test", "--shard-index=2"})
+          </Original>
+          <Expanded>
+            {?}
+          </Expanded>
+        </Expression>
+        <Expression success="true" type="REQUIRE" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
+          <Original>
+            config.shardIndex == 2
+          </Original>
+          <Expanded>
+            2 == 2
+          </Expanded>
+        </Expression>
+        <OverallResults successes="2" failures="0" expectedFailures="0"/>
+      </Section>
+      <OverallResults successes="2" failures="0" expectedFailures="0"/>
+    </Section>
+    <Section name="Sharding options" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
+      <Section name="Zero shard-count" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
+        <Expression success="true" type="CHECK" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
+          <Original>
+            !result
+          </Original>
+          <Expanded>
+            true
+          </Expanded>
+        </Expression>
+        <Expression success="true" type="CHECK_THAT" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
+          <Original>
+            result.errorMessage(), ContainsSubstring( "The shard count must be greater than 0" )
+          </Original>
+          <Expanded>
+            "The shard count must be greater than 0" contains: "The shard count must be greater than 0"
+          </Expanded>
+        </Expression>
+        <OverallResults successes="2" failures="0" expectedFailures="0"/>
+      </Section>
+      <OverallResults successes="2" failures="0" expectedFailures="0"/>
+    </Section>
    <OverallResult success="true"/>
  </TestCase>
  <TestCase name="Product with differing arities - std::tuple&lt;int, double, float>" tags="[product][template]" filename="tests/<exe-name>/UsageTests/Misc.tests.cpp" >
@@ -20030,6 +20096,6 @@ loose text artifact
    </Section>
    <OverallResult success="true"/>
  </TestCase>
-  <OverallResults successes="1959" failures="146" expectedFailures="27"/>
+  <OverallResults successes="1965" failures="146" expectedFailures="27"/>
  <OverallResultsCases successes="280" failures="86" expectedFailures="7"/>
 </Catch2TestRun>
--- a/tests/SelfTest/IntrospectiveTests/CmdLine.tests.cpp
+++ b/tests/SelfTest/IntrospectiveTests/CmdLine.tests.cpp
@@ -569,6 +569,27 @@ TEST_CASE( "Process can be configured on command line", "[config][command-line]"
            REQUIRE(config.benchmarkWarmupTime == 10);
        }
    }
+
+    SECTION("Sharding options") {
+        SECTION("shard-count") {
+            CHECK(cli.parse({ "test", "--shard-count=8"}));
+
+            REQUIRE(config.shardCount == 8);
+        }
+
+        SECTION("shard-index") {
+            CHECK(cli.parse({ "test", "--shard-index=2"}));
+
+            REQUIRE(config.shardIndex == 2);
+        }
+
+        SECTION("Zero shard-count") {
+            auto result = cli.parse({ "test", "--shard-count=0"});
+
+            CHECK( !result );
+            CHECK_THAT( result.errorMessage(), ContainsSubstring( "The shard count must be greater than 0" ) );
+        }
+    }
 }

 TEST_CASE("Test with special, characters \"in name", "[cli][regression]") {
--- a/tests/SelfTest/IntrospectiveTests/Sharding.tests.cpp
+++ b/tests/SelfTest/IntrospectiveTests/Sharding.tests.cpp
@@ -0,0 +1,41 @@
+/*
+ *  Distributed under the Boost Software License, Version 1.0. (See accompanying
+ *  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+ */
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/generators/catch_generators_all.hpp>
+
+#include <catch2/internal/catch_sharding.hpp>
+
+#include <unordered_map>
+#include <vector>
+
+TEST_CASE("Sharding Function", "[approvals]") {
+    std::vector<int> testContainer = { 0, 1, 2, 3, 4, 5, 6 };
+    std::unordered_map<int, std::vector<std::size_t>> expectedShardSizes = {
+        {1, {7}},
+        {2, {4, 3}},
+        {3, {3, 2, 2}},
+        {4, {2, 2, 2, 1}},
+        {5, {2, 2, 1, 1, 1}},
+        {6, {2, 1, 1, 1, 1, 1}},
+        {7, {1, 1, 1, 1, 1, 1, 1}},
+    };
+
+    auto shardCount = GENERATE(range(1, 7));
+    auto shardIndex = GENERATE_COPY(filter([=](int i) { return i < shardCount; }, range(0, 6)));
+
+    std::vector<int> result = Catch::createShard(testContainer, shardCount, shardIndex);
+
+    auto& sizes = expectedShardSizes[shardCount];
+    REQUIRE(result.size() == sizes[shardIndex]);
+
+    std::size_t startIndex = 0;
+    for(int i = 0; i < shardIndex; i++) {
+        startIndex += sizes[i];
+    }
+
+    for(std::size_t i = 0; i < sizes[shardIndex]; i++) {
+        CHECK(result[i] == testContainer[i + startIndex]);
+    }
+}
--- a/tests/TestScripts/testSharding.py
+++ b/tests/TestScripts/testSharding.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+
+"""
+This test script verifies that sharding tests does change which tests are run.
+This is done by running the binary multiple times, once to list all the tests,
+once per shard to list the tests for that shard, and once again per shard to
+execute the tests. The sharded lists are compared to the full list to ensure
+none are skipped, duplicated, and that the order remains the same. This process
+is repeated for multiple command line argument combinations to ensure sharding
+works with different filters and test orderings.
+"""
+
+import subprocess
+import sys
+import xml.etree.ElementTree as ET
+
+from collections import namedtuple
+
+def make_base_commandline(self_test_exe):
+    return [
+        self_test_exe,
+        '--reporter', 'xml',
+        "--shard-count", "5",
+        "--shard-index", "2",
+        "[generators]~[benchmarks]~[.]"
+    ]
+
+def list_tests(self_test_exe):
+    cmd = make_base_commandline(self_test_exe) + ['--list-tests']
+
+    process = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = process.communicate()
+    if stderr:
+        raise RuntimeError("Unexpected error output:\n" + process.stderr)
+
+    root = ET.fromstring(stdout)
+    result = [elem.text for elem in root.findall('./TestCase/Name')]
+
+    if len(result) < 2:
+        raise RuntimeError("Unexpectedly few tests listed (got {})".format(
+            len(result)))
+
+    return result
+
+
+def execute_tests(self_test_exe):
+    cmd = make_base_commandline(self_test_exe)
+
+    process = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = process.communicate()
+    if stderr:
+        raise RuntimeError("Unexpected error output:\n" + process.stderr)
+
+    root = ET.fromstring(stdout)
+    result = [elem.attrib["name"] for elem in root.findall('./TestCase')]
+
+    if len(result) < 2:
+        raise RuntimeError("Unexpectedly few tests listed (got {})".format(
+            len(result)))
+    return result
+
+
+def check_listed_and_executed_tests_match(listed_tests, executed_tests):
+        listed_names = set(listed_tests)
+        executed_names = set(executed_tests)
+
+        listed_string = "\n".join(listed_names)
+        exeucted_string = "\n".join(executed_names)
+
+        assert listed_names == executed_names, (
+            "Executed tests do not match the listed tests:\nExecuted:\n{}\n\nListed:\n{}".format(exeucted_string, listed_string)
+        )
+
+
+def test_sharding(self_test_exe):
+    listed_tests = list_tests(self_test_exe)
+    executed_tests = execute_tests(self_test_exe)
+
+    check_listed_and_executed_tests_match(listed_tests, executed_tests)
+
+
+def main():
+    self_test_exe, = sys.argv[1:]
+
+    test_sharding(self_test_exe)
+
+if __name__ == '__main__':
+    sys.exit(main())