mirror of
https://github.com/catchorg/Catch2.git
synced 2024-11-22 13:26:10 +01:00
Cleanup the shard integration test script
This commit is contained in:
parent
5ac1ffe9ee
commit
342ef5ca7e
@ -5,86 +5,154 @@ This test script verifies that sharding tests does change which tests are run.
|
||||
This is done by running the binary multiple times, once to list all the tests,
|
||||
once per shard to list the tests for that shard, and once again per shard to
|
||||
execute the tests. The sharded lists are compared to the full list to ensure
|
||||
none are skipped, duplicated, and that the order remains the same. This process
|
||||
is repeated for multiple command line argument combinations to ensure sharding
|
||||
works with different filters and test orderings.
|
||||
none are skipped, duplicated, and that the order remains the same.
|
||||
"""
|
||||
|
||||
import random
|
||||
import subprocess
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from collections import namedtuple
|
||||
|
||||
from typing import List, Dict
|
||||
|
||||
seed = random.randint(0, 2 ** 32 - 1)
|
||||
number_of_shards = 5
|
||||
|
||||
def make_base_commandline(self_test_exe):
|
||||
return [
|
||||
self_test_exe,
|
||||
'--reporter', 'xml',
|
||||
"--shard-count", "5",
|
||||
"--shard-index", "2",
|
||||
'--order', 'rand',
|
||||
'--rng-seed', str(seed),
|
||||
"[generators]~[benchmarks]~[.]"
|
||||
]
|
||||
|
||||
def list_tests(self_test_exe):
|
||||
|
||||
def list_tests(self_test_exe: str, extra_args: List[str] = None):
|
||||
cmd = make_base_commandline(self_test_exe) + ['--list-tests']
|
||||
if extra_args:
|
||||
cmd.extend(extra_args)
|
||||
|
||||
process = subprocess.Popen(
|
||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout, stderr = process.communicate()
|
||||
if stderr:
|
||||
raise RuntimeError("Unexpected error output:\n" + process.stderr)
|
||||
try:
|
||||
ret = subprocess.run(cmd,
|
||||
stdout = subprocess.PIPE,
|
||||
stderr = subprocess.PIPE,
|
||||
timeout = 10,
|
||||
check = True,
|
||||
universal_newlines = True)
|
||||
except subprocess.CalledProcessError as ex:
|
||||
print('Could not list tests:\n{}'.format(ex.stderr))
|
||||
|
||||
root = ET.fromstring(stdout)
|
||||
if ret.stderr:
|
||||
raise RuntimeError("Unexpected error output:\n" + ret.stderr)
|
||||
|
||||
root = ET.fromstring(ret.stdout)
|
||||
result = [elem.text for elem in root.findall('./TestCase/Name')]
|
||||
|
||||
if len(result) < 2:
|
||||
raise RuntimeError("Unexpectedly few tests listed (got {})".format(
|
||||
len(result)))
|
||||
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def execute_tests(self_test_exe):
|
||||
def execute_tests(self_test_exe: str, extra_args: List[str] = None):
|
||||
cmd = make_base_commandline(self_test_exe)
|
||||
if extra_args:
|
||||
cmd.extend(extra_args)
|
||||
|
||||
process = subprocess.Popen(
|
||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout, stderr = process.communicate()
|
||||
if stderr:
|
||||
try:
|
||||
ret = subprocess.run(cmd,
|
||||
stdout = subprocess.PIPE,
|
||||
stderr = subprocess.PIPE,
|
||||
timeout = 10,
|
||||
check = True,
|
||||
universal_newlines = True)
|
||||
except subprocess.CalledProcessError as ex:
|
||||
print('Could not list tests:\n{}'.format(ex.stderr))
|
||||
|
||||
if ret.stderr:
|
||||
raise RuntimeError("Unexpected error output:\n" + process.stderr)
|
||||
|
||||
root = ET.fromstring(stdout)
|
||||
root = ET.fromstring(ret.stdout)
|
||||
result = [elem.attrib["name"] for elem in root.findall('./TestCase')]
|
||||
|
||||
if len(result) < 2:
|
||||
raise RuntimeError("Unexpectedly few tests listed (got {})".format(
|
||||
len(result)))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def check_listed_and_executed_tests_match(listed_tests, executed_tests):
|
||||
listed_names = set(listed_tests)
|
||||
executed_names = set(executed_tests)
|
||||
def test_sharded_listing(self_test_exe: str) -> Dict[int, List[str]]:
|
||||
"""
|
||||
Asks the test binary for list of all tests, and also for lists of
|
||||
tests from shards.
|
||||
|
||||
listed_string = "\n".join(listed_names)
|
||||
exeucted_string = "\n".join(executed_names)
|
||||
The combination of shards is then checked whether it corresponds to
|
||||
the full list of all tests.
|
||||
|
||||
assert listed_names == executed_names, (
|
||||
"Executed tests do not match the listed tests:\nExecuted:\n{}\n\nListed:\n{}".format(exeucted_string, listed_string)
|
||||
Returns the dictionary of shard-index => listed tests for later use.
|
||||
"""
|
||||
all_tests = list_tests(self_test_exe)
|
||||
big_shard_tests = list_tests(self_test_exe, ['--shard-count', '1', '--shard-index', '0'])
|
||||
|
||||
assert all_tests == big_shard_tests, (
|
||||
"No-sharding test list does not match the listing of big shard:\nNo shard:\n{}\n\nWith shard:\n{}\n".format(
|
||||
'\n'.join(all_tests),
|
||||
'\n'.join(big_shard_tests)
|
||||
)
|
||||
)
|
||||
|
||||
shard_listings = dict()
|
||||
for shard_idx in range(number_of_shards):
|
||||
shard_listings[shard_idx] = list_tests(self_test_exe, ['--shard-count', str(number_of_shards), '--shard-index', str(shard_idx)])
|
||||
|
||||
def test_sharding(self_test_exe):
|
||||
listed_tests = list_tests(self_test_exe)
|
||||
executed_tests = execute_tests(self_test_exe)
|
||||
shard_sizes = [len(v) for v in shard_listings.values()]
|
||||
assert len(all_tests) == sum(shard_sizes)
|
||||
|
||||
check_listed_and_executed_tests_match(listed_tests, executed_tests)
|
||||
# Check that the shards have roughly the right sizes (e.g. we don't
|
||||
# have all tests in single shard and the others are empty)
|
||||
differences = [abs(x1 - x2) for x1, x2 in zip(shard_sizes, shard_sizes[1:])]
|
||||
assert all(diff <= 1 for diff in differences), "A shard has weird size: {}".format(shard_sizes)
|
||||
|
||||
combined_shards = [inner for outer in shard_listings.values() for inner in outer]
|
||||
assert all_tests == combined_shards, (
|
||||
"All tests and combined shards disagree.\nNo shard:\n{}\n\nCombined:\n{}\n\n".format(
|
||||
'\n'.join(all_tests),
|
||||
'\n'.join(combined_shards)
|
||||
)
|
||||
)
|
||||
shard_listings[-1] = all_tests
|
||||
|
||||
return shard_listings
|
||||
|
||||
|
||||
def test_sharded_execution(self_test_exe: str, listings: Dict[int, List[str]]):
|
||||
"""
|
||||
Runs the test binary and checks that the executed tests match the
|
||||
previously listed tests.
|
||||
|
||||
Also does this for various shard indices, and that the combination
|
||||
of all shards matches the full run/listing.
|
||||
"""
|
||||
all_tests = execute_tests(self_test_exe)
|
||||
big_shard_tests = execute_tests(self_test_exe, ['--shard-count', '1', '--shard-index', '0'])
|
||||
assert all_tests == big_shard_tests
|
||||
|
||||
assert listings[-1] == all_tests
|
||||
|
||||
for shard_idx in range(number_of_shards):
|
||||
assert listings[shard_idx] == execute_tests(self_test_exe, ['--shard-count', str(number_of_shards), '--shard-index', str(shard_idx)])
|
||||
|
||||
|
||||
def main():
|
||||
self_test_exe, = sys.argv[1:]
|
||||
|
||||
test_sharding(self_test_exe)
|
||||
listings = test_sharded_listing(self_test_exe)
|
||||
test_sharded_execution(self_test_exe, listings)
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
Loading…
Reference in New Issue
Block a user