Allow test sharding for e.g. Bazel test sharding feature

This greatly simplifies running Catch2 tests in single binary
in parallel from external test runners. Instead of having to
shard the tests by tags/test names, an external test runner
can now just ask for test shard 2 (out of X), and execute that
in single process, without having to know what tests are actually
in the shard.

Note that sharding also applies to test listing, and happens after
tests were ordered according to the `--order` feature.
This commit is contained in:
Ben Dunkin
2021-07-11 12:46:05 -07:00
committed by Martin Hořeňovský
parent 6456ee8b01
commit 3087e19cc7
21 changed files with 415 additions and 6 deletions

View File

@@ -87,6 +87,7 @@ set(TEST_SOURCES
${SELF_TEST_DIR}/IntrospectiveTests/RandomNumberGeneration.tests.cpp
${SELF_TEST_DIR}/IntrospectiveTests/Reporters.tests.cpp
${SELF_TEST_DIR}/IntrospectiveTests/Tag.tests.cpp
${SELF_TEST_DIR}/IntrospectiveTests/Sharding.tests.cpp
${SELF_TEST_DIR}/IntrospectiveTests/String.tests.cpp
${SELF_TEST_DIR}/IntrospectiveTests/StringManip.tests.cpp
${SELF_TEST_DIR}/IntrospectiveTests/Xml.tests.cpp
@@ -310,6 +311,7 @@ set_tests_properties(TagAlias PROPERTIES
add_test(NAME RandomTestOrdering COMMAND ${PYTHON_EXECUTABLE}
${CATCH_DIR}/tests/TestScripts/testRandomOrder.py $<TARGET_FILE:SelfTest>)
add_test(NAME CheckConvenienceHeaders
COMMAND
${PYTHON_EXECUTABLE} ${CATCH_DIR}/tools/scripts/checkConvenienceHeaders.py

View File

@@ -8,6 +8,22 @@ project( Catch2ExtraTests LANGUAGES CXX )
message( STATUS "Extra tests included" )
add_test(
NAME TestShardingIntegration
COMMAND ${PYTHON_EXECUTABLE} ${CATCH_DIR}/tests/TestScripts/testSharding.py $<TARGET_FILE:SelfTest>
)
add_test(
NAME TestSharding::OverlyLargeShardIndex
COMMAND $<TARGET_FILE:SelfTest> --shard-index 5 --shard-count 5
)
set_tests_properties(
TestSharding::OverlyLargeShardIndex
PROPERTIES
PASS_REGULAR_EXPRESSION "The shard count \\(5\\) must be greater than the shard index \\(5\\)"
)
# The MinDuration reporting tests do not need separate compilation, but
# they have non-trivial execution time, so they are categorized as
# extra tests, so that they are run less.

View File

@@ -1273,6 +1273,12 @@ CmdLine.tests.cpp:<line number>: passed: cli.parse({ "test", "--benchmark-no-ana
CmdLine.tests.cpp:<line number>: passed: config.benchmarkNoAnalysis for: true
CmdLine.tests.cpp:<line number>: passed: cli.parse({ "test", "--benchmark-warmup-time=10" }) for: {?}
CmdLine.tests.cpp:<line number>: passed: config.benchmarkWarmupTime == 10 for: 10 == 10
CmdLine.tests.cpp:<line number>: passed: cli.parse({ "test", "--shard-count=8"}) for: {?}
CmdLine.tests.cpp:<line number>: passed: config.shardCount == 8 for: 8 == 8
CmdLine.tests.cpp:<line number>: passed: cli.parse({ "test", "--shard-index=2"}) for: {?}
CmdLine.tests.cpp:<line number>: passed: config.shardIndex == 2 for: 2 == 2
CmdLine.tests.cpp:<line number>: passed: !result for: true
CmdLine.tests.cpp:<line number>: passed: result.errorMessage(), ContainsSubstring( "The shard count must be greater than 0" ) for: "The shard count must be greater than 0" contains: "The shard count must be greater than 0"
Misc.tests.cpp:<line number>: passed: std::tuple_size<TestType>::value >= 1 for: 3 >= 1
Misc.tests.cpp:<line number>: passed: std::tuple_size<TestType>::value >= 1 for: 2 >= 1
Misc.tests.cpp:<line number>: passed: std::tuple_size<TestType>::value >= 1 for: 1 >= 1

View File

@@ -1427,5 +1427,5 @@ due to unexpected exception with message:
===============================================================================
test cases: 373 | 296 passed | 70 failed | 7 failed as expected
assertions: 2115 | 1959 passed | 129 failed | 27 failed as expected
assertions: 2121 | 1965 passed | 129 failed | 27 failed as expected

View File

@@ -9390,6 +9390,61 @@ CmdLine.tests.cpp:<line number>: PASSED:
with expansion:
10 == 10
-------------------------------------------------------------------------------
Process can be configured on command line
Sharding options
shard-count
-------------------------------------------------------------------------------
CmdLine.tests.cpp:<line number>
...............................................................................
CmdLine.tests.cpp:<line number>: PASSED:
CHECK( cli.parse({ "test", "--shard-count=8"}) )
with expansion:
{?}
CmdLine.tests.cpp:<line number>: PASSED:
REQUIRE( config.shardCount == 8 )
with expansion:
8 == 8
-------------------------------------------------------------------------------
Process can be configured on command line
Sharding options
shard-index
-------------------------------------------------------------------------------
CmdLine.tests.cpp:<line number>
...............................................................................
CmdLine.tests.cpp:<line number>: PASSED:
CHECK( cli.parse({ "test", "--shard-index=2"}) )
with expansion:
{?}
CmdLine.tests.cpp:<line number>: PASSED:
REQUIRE( config.shardIndex == 2 )
with expansion:
2 == 2
-------------------------------------------------------------------------------
Process can be configured on command line
Sharding options
Zero shard-count
-------------------------------------------------------------------------------
CmdLine.tests.cpp:<line number>
...............................................................................
CmdLine.tests.cpp:<line number>: PASSED:
CHECK( !result )
with expansion:
true
CmdLine.tests.cpp:<line number>: PASSED:
CHECK_THAT( result.errorMessage(), ContainsSubstring( "The shard count must be greater than 0" ) )
with expansion:
"The shard count must be greater than 0" contains: "The shard count must be
greater than 0"
-------------------------------------------------------------------------------
Product with differing arities - std::tuple<int, double, float>
-------------------------------------------------------------------------------
@@ -17040,5 +17095,5 @@ Misc.tests.cpp:<line number>: PASSED:
===============================================================================
test cases: 373 | 280 passed | 86 failed | 7 failed as expected
assertions: 2132 | 1959 passed | 146 failed | 27 failed as expected
assertions: 2138 | 1965 passed | 146 failed | 27 failed as expected

View File

@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<testsuitesloose text artifact
>
<testsuite name="<exe-name>" errors="17" failures="129" tests="2132" hostname="tbd" time="{duration}" timestamp="{iso8601-timestamp}">
<testsuite name="<exe-name>" errors="17" failures="129" tests="2138" hostname="tbd" time="{duration}" timestamp="{iso8601-timestamp}">
<properties>
<property name="random-seed" value="1"/>
<property name="filters" value="~[!nonportable]~[!benchmark]~[approvals] *"/>
@@ -1096,6 +1096,9 @@ Message.tests.cpp:<line number>
<testcase classname="<exe-name>.global" name="Process can be configured on command line/Benchmark options/confidence-interval" time="{duration}" status="run"/>
<testcase classname="<exe-name>.global" name="Process can be configured on command line/Benchmark options/no-analysis" time="{duration}" status="run"/>
<testcase classname="<exe-name>.global" name="Process can be configured on command line/Benchmark options/warmup-time" time="{duration}" status="run"/>
<testcase classname="<exe-name>.global" name="Process can be configured on command line/Sharding options/shard-count" time="{duration}" status="run"/>
<testcase classname="<exe-name>.global" name="Process can be configured on command line/Sharding options/shard-index" time="{duration}" status="run"/>
<testcase classname="<exe-name>.global" name="Process can be configured on command line/Sharding options/Zero shard-count" time="{duration}" status="run"/>
<testcase classname="<exe-name>.global" name="Product with differing arities - std::tuple&lt;int, double, float>" time="{duration}" status="run"/>
<testcase classname="<exe-name>.global" name="Product with differing arities - std::tuple&lt;int, double>" time="{duration}" status="run"/>
<testcase classname="<exe-name>.global" name="Product with differing arities - std::tuple&lt;int>" time="{duration}" status="run"/>

View File

@@ -75,6 +75,9 @@
<testCase name="Process can be configured on command line/Benchmark options/confidence-interval" duration="{duration}"/>
<testCase name="Process can be configured on command line/Benchmark options/no-analysis" duration="{duration}"/>
<testCase name="Process can be configured on command line/Benchmark options/warmup-time" duration="{duration}"/>
<testCase name="Process can be configured on command line/Sharding options/shard-count" duration="{duration}"/>
<testCase name="Process can be configured on command line/Sharding options/shard-index" duration="{duration}"/>
<testCase name="Process can be configured on command line/Sharding options/Zero shard-count" duration="{duration}"/>
<testCase name="Test with special, characters &quot;in name" duration="{duration}"/>
</file>
<file path="tests/<exe-name>/IntrospectiveTests/FloatingPoint.tests.cpp">

View File

@@ -2468,6 +2468,18 @@ ok {test-number} - config.benchmarkNoAnalysis for: true
ok {test-number} - cli.parse({ "test", "--benchmark-warmup-time=10" }) for: {?}
# Process can be configured on command line
ok {test-number} - config.benchmarkWarmupTime == 10 for: 10 == 10
# Process can be configured on command line
ok {test-number} - cli.parse({ "test", "--shard-count=8"}) for: {?}
# Process can be configured on command line
ok {test-number} - config.shardCount == 8 for: 8 == 8
# Process can be configured on command line
ok {test-number} - cli.parse({ "test", "--shard-index=2"}) for: {?}
# Process can be configured on command line
ok {test-number} - config.shardIndex == 2 for: 2 == 2
# Process can be configured on command line
ok {test-number} - !result for: true
# Process can be configured on command line
ok {test-number} - result.errorMessage(), ContainsSubstring( "The shard count must be greater than 0" ) for: "The shard count must be greater than 0" contains: "The shard count must be greater than 0"
# Product with differing arities - std::tuple<int, double, float>
ok {test-number} - std::tuple_size<TestType>::value >= 1 for: 3 >= 1
# Product with differing arities - std::tuple<int, double>
@@ -4266,5 +4278,5 @@ ok {test-number} - q3 == 23. for: 23.0 == 23.0
ok {test-number} -
# xmlentitycheck
ok {test-number} -
1..2132
1..2138

View File

@@ -11469,6 +11469,72 @@ Nor would this
</Section>
<OverallResults successes="2" failures="0" expectedFailures="0"/>
</Section>
<Section name="Sharding options" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
<Section name="shard-count" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
<Expression success="true" type="CHECK" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
<Original>
cli.parse({ "test", "--shard-count=8"})
</Original>
<Expanded>
{?}
</Expanded>
</Expression>
<Expression success="true" type="REQUIRE" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
<Original>
config.shardCount == 8
</Original>
<Expanded>
8 == 8
</Expanded>
</Expression>
<OverallResults successes="2" failures="0" expectedFailures="0"/>
</Section>
<OverallResults successes="2" failures="0" expectedFailures="0"/>
</Section>
<Section name="Sharding options" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
<Section name="shard-index" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
<Expression success="true" type="CHECK" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
<Original>
cli.parse({ "test", "--shard-index=2"})
</Original>
<Expanded>
{?}
</Expanded>
</Expression>
<Expression success="true" type="REQUIRE" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
<Original>
config.shardIndex == 2
</Original>
<Expanded>
2 == 2
</Expanded>
</Expression>
<OverallResults successes="2" failures="0" expectedFailures="0"/>
</Section>
<OverallResults successes="2" failures="0" expectedFailures="0"/>
</Section>
<Section name="Sharding options" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
<Section name="Zero shard-count" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
<Expression success="true" type="CHECK" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
<Original>
!result
</Original>
<Expanded>
true
</Expanded>
</Expression>
<Expression success="true" type="CHECK_THAT" filename="tests/<exe-name>/IntrospectiveTests/CmdLine.tests.cpp" >
<Original>
result.errorMessage(), ContainsSubstring( "The shard count must be greater than 0" )
</Original>
<Expanded>
"The shard count must be greater than 0" contains: "The shard count must be greater than 0"
</Expanded>
</Expression>
<OverallResults successes="2" failures="0" expectedFailures="0"/>
</Section>
<OverallResults successes="2" failures="0" expectedFailures="0"/>
</Section>
<OverallResult success="true"/>
</TestCase>
<TestCase name="Product with differing arities - std::tuple&lt;int, double, float>" tags="[product][template]" filename="tests/<exe-name>/UsageTests/Misc.tests.cpp" >
@@ -20030,6 +20096,6 @@ loose text artifact
</Section>
<OverallResult success="true"/>
</TestCase>
<OverallResults successes="1959" failures="146" expectedFailures="27"/>
<OverallResults successes="1965" failures="146" expectedFailures="27"/>
<OverallResultsCases successes="280" failures="86" expectedFailures="7"/>
</Catch2TestRun>

View File

@@ -569,6 +569,27 @@ TEST_CASE( "Process can be configured on command line", "[config][command-line]"
REQUIRE(config.benchmarkWarmupTime == 10);
}
}
SECTION("Sharding options") {
SECTION("shard-count") {
CHECK(cli.parse({ "test", "--shard-count=8"}));
REQUIRE(config.shardCount == 8);
}
SECTION("shard-index") {
CHECK(cli.parse({ "test", "--shard-index=2"}));
REQUIRE(config.shardIndex == 2);
}
SECTION("Zero shard-count") {
auto result = cli.parse({ "test", "--shard-count=0"});
CHECK( !result );
CHECK_THAT( result.errorMessage(), ContainsSubstring( "The shard count must be greater than 0" ) );
}
}
}
TEST_CASE("Test with special, characters \"in name", "[cli][regression]") {

View File

@@ -0,0 +1,41 @@
/*
* Distributed under the Boost Software License, Version 1.0. (See accompanying
* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*/
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators_all.hpp>
#include <catch2/internal/catch_sharding.hpp>
#include <unordered_map>
#include <vector>
TEST_CASE("Sharding Function", "[approvals]") {
std::vector<int> testContainer = { 0, 1, 2, 3, 4, 5, 6 };
std::unordered_map<int, std::vector<std::size_t>> expectedShardSizes = {
{1, {7}},
{2, {4, 3}},
{3, {3, 2, 2}},
{4, {2, 2, 2, 1}},
{5, {2, 2, 1, 1, 1}},
{6, {2, 1, 1, 1, 1, 1}},
{7, {1, 1, 1, 1, 1, 1, 1}},
};
auto shardCount = GENERATE(range(1, 7));
auto shardIndex = GENERATE_COPY(filter([=](int i) { return i < shardCount; }, range(0, 6)));
std::vector<int> result = Catch::createShard(testContainer, shardCount, shardIndex);
auto& sizes = expectedShardSizes[shardCount];
REQUIRE(result.size() == sizes[shardIndex]);
std::size_t startIndex = 0;
for(int i = 0; i < shardIndex; i++) {
startIndex += sizes[i];
}
for(std::size_t i = 0; i < sizes[shardIndex]; i++) {
CHECK(result[i] == testContainer[i + startIndex]);
}
}

View File

@@ -0,0 +1,90 @@
#!/usr/bin/env python3
"""
This test script verifies that sharding tests does change which tests are run.
This is done by running the binary multiple times, once to list all the tests,
once per shard to list the tests for that shard, and once again per shard to
execute the tests. The sharded lists are compared to the full list to ensure
none are skipped, duplicated, and that the order remains the same. This process
is repeated for multiple command line argument combinations to ensure sharding
works with different filters and test orderings.
"""
import subprocess
import sys
import xml.etree.ElementTree as ET
from collections import namedtuple
def make_base_commandline(self_test_exe):
return [
self_test_exe,
'--reporter', 'xml',
"--shard-count", "5",
"--shard-index", "2",
"[generators]~[benchmarks]~[.]"
]
def list_tests(self_test_exe):
cmd = make_base_commandline(self_test_exe) + ['--list-tests']
process = subprocess.Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
if stderr:
raise RuntimeError("Unexpected error output:\n" + process.stderr)
root = ET.fromstring(stdout)
result = [elem.text for elem in root.findall('./TestCase/Name')]
if len(result) < 2:
raise RuntimeError("Unexpectedly few tests listed (got {})".format(
len(result)))
return result
def execute_tests(self_test_exe):
cmd = make_base_commandline(self_test_exe)
process = subprocess.Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
if stderr:
raise RuntimeError("Unexpected error output:\n" + process.stderr)
root = ET.fromstring(stdout)
result = [elem.attrib["name"] for elem in root.findall('./TestCase')]
if len(result) < 2:
raise RuntimeError("Unexpectedly few tests listed (got {})".format(
len(result)))
return result
def check_listed_and_executed_tests_match(listed_tests, executed_tests):
listed_names = set(listed_tests)
executed_names = set(executed_tests)
listed_string = "\n".join(listed_names)
exeucted_string = "\n".join(executed_names)
assert listed_names == executed_names, (
"Executed tests do not match the listed tests:\nExecuted:\n{}\n\nListed:\n{}".format(exeucted_string, listed_string)
)
def test_sharding(self_test_exe):
listed_tests = list_tests(self_test_exe)
executed_tests = execute_tests(self_test_exe)
check_listed_and_executed_tests_match(listed_tests, executed_tests)
def main():
self_test_exe, = sys.argv[1:]
test_sharding(self_test_exe)
if __name__ == '__main__':
sys.exit(main())